diff --git a/.gitmodules b/.gitmodules index e69de29bb..2f8fffee2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "mlpf/data/key4hep/gen"] + path = mlpf/data/key4hep/gen + url = https://github.com/HEP-KBFI/key4hep-sim.git diff --git a/README.md b/README.md index 9f93b9c01..4810c0f61 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,28 @@ Please ensure you use the correct version of the `jpata/particleflow` software w --- -### **Instructions: Citations and Reuse** +## Running the workflow with Snakemake +The full event reconstruction and model training workflow can be managed using [Snakemake](https://snakemake.readthedocs.io/). Snakemake must be available on both the interactive and worker nodes. + +### 1. Generate the Snakefile +Use the provided script to generate a `Snakefile` for a specific production campaign and model. +```bash +python3 mlpf/produce_snakemake.py --production clic_2025_edm4hep --steps gen,post,tfds +``` + +### 2. Execute the workflow +Run Snakemake using the generated `Snakefile`. The following example uses SLURM and Apptainer: +```bash +snakemake -s snakemake_jobs/clic_2025_edm4hep/Snakefile --executor slurm --jobs 100 --use-apptainer +``` + +To include model training: +```bash +python3 mlpf/produce_snakemake.py --production clic_2025_edm4hep --steps train --model pyg-clic-v1 +snakemake -s snakemake_jobs/clic_2025_edm4hep/Snakefile --executor slurm --jobs 1 --use-apptainer --apptainer-args "--nv" +``` + +# Citations and reuse You are welcome to reuse the code in accordance with the [LICENSE](https://github.com/jpata/particleflow/blob/main/LICENSE). diff --git a/mlpf/data/cms/check_file.py b/mlpf/data/cms/check_file.py deleted file mode 100644 index 93a433e40..000000000 --- a/mlpf/data/cms/check_file.py +++ /dev/null @@ -1,8 +0,0 @@ -import pickle -import sys -import bz2 - -try: - data = pickle.load(bz2.BZ2File(sys.argv[1], "rb"), encoding="iso-8859-1") -except Exception: - print(sys.argv[1]) diff --git a/mlpf/data/cms/genjob.sh b/mlpf/data/cms/genjob.sh deleted file mode 100644 index c082fdc41..000000000 --- a/mlpf/data/cms/genjob.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -#SBATCH --partition main -#SBATCH --cpus-per-task 1 -#SBATCH --mem-per-cpu 6G -#SBATCH -o slurm-%x-%j-%N.out - -scripts/tallinn/cmssw-el8.sh $@ diff --git a/mlpf/data/cms/genjob_nopu.sh b/mlpf/data/cms/genjob_nopu.sh deleted file mode 100755 index 453822d26..000000000 --- a/mlpf/data/cms/genjob_nopu.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/bash -set -e -set -x - -OUTDIR=/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/nopu/ -CMSSWDIR=/scratch/persistent/joosep/CMSSW_15_0_5/ -MLPF_PATH=/home/joosep/particleflow/ - -#seed must be greater than 0 -SAMPLE=$1 -SEED=$2 - -WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED -#WORKDIR=`pwd`/$SAMPLE/$SEED -mkdir -p $WORKDIR -mkdir -p $OUTDIR - -PILEUP=NoPileUp -PILEUP_INPUT= - -N=100 - -env -source /cvmfs/cms.cern.ch/cmsset_default.sh - -cd $CMSSWDIR -eval `scramv1 runtime -sh` -which python -which python3 - -env - -cd $WORKDIR - -#Generate the MC -cmsDriver.py $SAMPLE \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - -n $N \ - --era Run3_2023 \ - --eventcontent FEVTDEBUGHLT \ - -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \ - --datatier GEN-SIM \ - --geometry DB:Extended \ - --pileup $PILEUP \ - --no_exec \ - --fileout step2_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step2 \ - --python_filename=step2_phase1_new.py - -#Run the reco sequences -cmsDriver.py step3 \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - --era Run3_2023 \ - -n -1 \ - --eventcontent FEVTDEBUGHLT \ - --runUnscheduled \ - -s RAW2DIGI,L1Reco,RECO,RECOSIM \ - --datatier GEN-SIM-RECO \ - --geometry DB:Extended \ - --no_exec \ - --filein file:step2_phase1_new.root \ - --fileout step3_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step3 \ - --python_filename=step3_phase1_new.py - -pwd -ls -lrt - -echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py -cmsRun step2_phase1_new.py > /dev/null -#cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root - -cmsRun step3_phase1_new.py > /dev/null -#cp step3_phase1_new.root $OUTDIR/$SAMPLE/root/step3_${SEED}.root -mv pfntuple.root pfntuple_${SEED}.root -cp pfntuple_${SEED}.root $OUTDIR/$SAMPLE/root/ - -# python3 ${MLPF_PATH}/mlpf/data/cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./ -# bzip2 -z pfntuple_${SEED}.pkl -# cp *.pkl.bz2 $OUTDIR/$SAMPLE/raw/ - -rm -Rf /scratch/local/joosep/$SLURM_JOBID diff --git a/mlpf/data/cms/genjob_nopu_val.sh b/mlpf/data/cms/genjob_nopu_val.sh deleted file mode 100755 index 16887701d..000000000 --- a/mlpf/data/cms/genjob_nopu_val.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -set -e -set -x - -OUTDIR=/local/joosep/mlpf/cms/20250618_cmssw_15_0_5_f8ae2f/nopu_val/ -CMSSWDIR=/scratch/persistent/joosep/CMSSW_15_0_5/ -MLPF_PATH=/home/joosep/particleflow/ - -#seed must be greater than 0 -SAMPLE=$1 -SEED=$2 - -WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED -#WORKDIR=`pwd`/$SAMPLE/$SEED -mkdir -p $WORKDIR -mkdir -p $OUTDIR/$SAMPLE/root - -PILEUP=NoPileUp -PILEUP_INPUT= - -N=100 - -env -source /cvmfs/cms.cern.ch/cmsset_default.sh - -cd $CMSSWDIR -eval `scramv1 runtime -sh` -which python -which python3 - -env - -cd $WORKDIR - -#Generate the MC -cmsDriver.py $SAMPLE \ - --conditions auto:phase1_2024_realistic \ - --beamspot Realistic2024ppRefCollision \ - -n $N \ - --era Run3_2024 \ - --eventcontent FEVTDEBUGHLT \ - -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2024 \ - --datatier GEN-SIM-DIGI-RAW \ - --geometry DB:Extended \ - --pileup $PILEUP \ - --no_exec \ - --fileout step2_phase1_new.root \ - --python_filename=step2_phase1_new.py - -pwd -ls -lrt - -echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py -cmsRun step2_phase1_new.py > /dev/null -cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root - -rm -Rf /scratch/local/joosep/$SLURM_JOBID diff --git a/mlpf/data/cms/genjob_pu.sh b/mlpf/data/cms/genjob_pu.sh new file mode 100755 index 000000000..8a0aaf4e2 --- /dev/null +++ b/mlpf/data/cms/genjob_pu.sh @@ -0,0 +1,121 @@ +#!/bin/bash +set -e +set -x +unset PYTHONPATH + +#seed must be greater than 0 +SAMPLE=$1 +SEED=$2 +PU_TYPE=${3:-nopu} +COPY_STEP2=${4:-false} + +CMSSWDIR=${CMSSWDIR:-/scratch/persistent/joosep/CMSSW_15_0_5/} +CONFIG_DIR=${CONFIG_DIR:-/home/joosep/particleflow/} +OUTDIR_DEFAULT_BASE=/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1 + +if [ "$PU_TYPE" == "nopu" ]; then + PILEUP=NoPileUp + PILEUP_INPUT= + NEV_DEFAULT=100 + OUTDIR_SUFFIX="nopu" +elif [ "$PU_TYPE" == "pu55to75" ]; then + PILEUP=Run3_Flat55To75_PoissonOOTPU + #classical mixing for Summer24 /MinBias_TuneCP5_13p6TeV-pythia8/RunIII2024Summer24GS-140X_mcRun3_2024_realistic_v20-v1/GEN-SIM + PILEUP_INPUT=filelist:${CONFIG_DIR}/mlpf/data/cms/pu_files_local.txt + NEV_DEFAULT=50 + OUTDIR_SUFFIX="pu55to75" +else + echo "Unknown PU_TYPE: $PU_TYPE" + exit 1 +fi + +OUTDIR=${OUTDIR:-${OUTDIR_DEFAULT_BASE}/${OUTDIR_SUFFIX}/} +N=${NEV:-$NEV_DEFAULT} + +if [ -z "$WORKDIR" ]; then + if [ ! -z "$SLURM_JOBID" ]; then + WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED + CLEANUP_DIR=/scratch/local/joosep/$SLURM_JOBID + else + WORKDIR=/scratch/local/joosep/job_${SAMPLE}_${SEED} + CLEANUP_DIR=$WORKDIR + fi +else + CLEANUP_DIR=$WORKDIR +fi + +mkdir -p $WORKDIR +mkdir -p $OUTDIR/$SAMPLE/root/ + +# Ensure cleanup on exit, even if the job fails +cleanup() { + # Safety check: never delete the root scratch directory + if [ ! -z "$CLEANUP_DIR" ] && [ "$CLEANUP_DIR" != "/scratch/local/joosep" ] && [ "$CLEANUP_DIR" != "/scratch/local/joosep/" ]; then + echo "Cleaning up scratch directory $CLEANUP_DIR" + rm -Rf $CLEANUP_DIR + fi +} +trap cleanup EXIT + +env +source /cvmfs/cms.cern.ch/cmsset_default.sh + +cd $CMSSWDIR +eval `scramv1 runtime -sh` + +env + +cd $WORKDIR + +#PU arguments +PU_ARGS=("--pileup" "$PILEUP") +if [ ! -z "$PILEUP_INPUT" ]; then + PU_ARGS+=("--pileup_input" "$PILEUP_INPUT") +fi + +#Generate the MC +cmsDriver.py $SAMPLE \ + --conditions 140X_mcRun3_2024_realistic_v26 \ + --beamspot DBrealistic \ + -n $N \ + --era Run3_2024 \ + --eventcontent FEVTDEBUGHLT \ + -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \ + --datatier GEN-SIM \ + --geometry DB:Extended \ + "${PU_ARGS[@]}" \ + --no_exec \ + --fileout step2_phase1_new.root \ + --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step2 \ + --python_filename=step2_phase1_new.py + +#Run the reco sequences +cmsDriver.py step3 \ + --conditions 140X_mcRun3_2024_realistic_v26 \ + --beamspot DBrealistic \ + -n -1 \ + --era Run3_2024 \ + --eventcontent FEVTDEBUGHLT \ + --runUnscheduled \ + -s RAW2DIGI,L1Reco,RECO,RECOSIM \ + --datatier GEN-SIM-RECO \ + --geometry DB:Extended \ + --no_exec \ + --filein file:step2_phase1_new.root \ + --fileout step3_phase1_new.root \ + --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step3 \ + --python_filename=step3_phase1_new.py + +pwd +ls -lrt + +echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py +cmsRun step2_phase1_new.py > /dev/null +if [ "$COPY_STEP2" == "true" ]; then + cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root +fi + +cmsRun step3_phase1_new.py > /dev/null +#cp step3_phase1_new.root $OUTDIR/$SAMPLE/root/step3_${SEED}.root +mv pfntuple.root pfntuple_${SEED}.root +cp pfntuple_${SEED}.root $OUTDIR/$SAMPLE/root/ diff --git a/mlpf/data/cms/genjob_pu0to150.sh b/mlpf/data/cms/genjob_pu0to150.sh deleted file mode 100755 index 7c493dbd4..000000000 --- a/mlpf/data/cms/genjob_pu0to150.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -set -e -set -x - -OUTDIR=/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/pu0to150/ -CMSSWDIR=/scratch/persistent/joosep/CMSSW_15_0_5/ -MLPF_PATH=/home/joosep/particleflow/ - -#seed must be greater than 0 -SAMPLE=$1 -SEED=$2 - -WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED -#WORKDIR=`pwd`/$SAMPLE/$SEED -mkdir -p $WORKDIR -mkdir -p $OUTDIR - -PILEUP=Run3_Flat0To150_PoissonOOTPU -PILEUP_INPUT=filelist:${MLPF_PATH}/mlpf/data/cms/pu_files_local.txt - -N=50 - -env -source /cvmfs/cms.cern.ch/cmsset_default.sh - -cd $CMSSWDIR -eval `scramv1 runtime -sh` -which python -which python3 - -env - -cd $WORKDIR - -#Generate the MC -cmsDriver.py $SAMPLE \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - -n $N \ - --era Run3_2023 \ - --eventcontent FEVTDEBUGHLT \ - -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \ - --datatier GEN-SIM \ - --geometry DB:Extended \ - --pileup $PILEUP \ - --pileup_input $PILEUP_INPUT \ - --no_exec \ - --fileout step2_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step2 \ - --python_filename=step2_phase1_new.py - -#Run the reco sequences -cmsDriver.py step3 \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - --era Run3_2023 \ - -n -1 \ - --eventcontent FEVTDEBUGHLT \ - --runUnscheduled \ - -s RAW2DIGI,L1Reco,RECO,RECOSIM \ - --datatier GEN-SIM-RECO \ - --geometry DB:Extended \ - --no_exec \ - --filein file:step2_phase1_new.root \ - --fileout step3_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step3 \ - --python_filename=step3_phase1_new.py - -pwd -ls -lrt - -echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py -cmsRun step2_phase1_new.py > /dev/null -#cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root - -cmsRun step3_phase1_new.py > /dev/null -#cp step3_phase1_new.root $OUTDIR/$SAMPLE/root/step3_${SEED}.root -mv pfntuple.root pfntuple_${SEED}.root -cp pfntuple_${SEED}.root $OUTDIR/$SAMPLE/root/ - -# python3 ${MLPF_PATH}/mlpf/data/cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./ -# bzip2 -z pfntuple_${SEED}.pkl -# cp *.pkl.bz2 $OUTDIR/$SAMPLE/raw/ - -rm -Rf /scratch/local/joosep/$SLURM_JOBID diff --git a/mlpf/data/cms/genjob_pu55to75.sh b/mlpf/data/cms/genjob_pu55to75.sh deleted file mode 100755 index 3887d94b0..000000000 --- a/mlpf/data/cms/genjob_pu55to75.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -set -e -set -x - -OUTDIR=/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/pu55to75/ -CMSSWDIR=/scratch/persistent/joosep/CMSSW_15_0_5/ -MLPF_PATH=/home/joosep/particleflow/ - -#seed must be greater than 0 -SAMPLE=$1 -SEED=$2 - -WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED -#WORKDIR=`pwd`/$SAMPLE/$SEED -mkdir -p $WORKDIR -mkdir -p $OUTDIR - -PILEUP=Run3_Flat55To75_PoissonOOTPU -PILEUP_INPUT=filelist:${MLPF_PATH}/mlpf/data/cms/pu_files_local.txt - -N=50 - -env -source /cvmfs/cms.cern.ch/cmsset_default.sh - -cd $CMSSWDIR -eval `scramv1 runtime -sh` -which python -which python3 - -env - -cd $WORKDIR - -#Generate the MC -cmsDriver.py $SAMPLE \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - -n $N \ - --era Run3_2023 \ - --eventcontent FEVTDEBUGHLT \ - -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \ - --datatier GEN-SIM \ - --geometry DB:Extended \ - --pileup $PILEUP \ - --pileup_input $PILEUP_INPUT \ - --no_exec \ - --fileout step2_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step2 \ - --python_filename=step2_phase1_new.py - -#Run the reco sequences -cmsDriver.py step3 \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - --era Run3_2023 \ - -n -1 \ - --eventcontent FEVTDEBUGHLT \ - --runUnscheduled \ - -s RAW2DIGI,L1Reco,RECO,RECOSIM \ - --datatier GEN-SIM-RECO \ - --geometry DB:Extended \ - --no_exec \ - --filein file:step2_phase1_new.root \ - --fileout step3_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step3 \ - --python_filename=step3_phase1_new.py - -pwd -ls -lrt - -echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py -cmsRun step2_phase1_new.py > /dev/null -#cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root - -cmsRun step3_phase1_new.py > /dev/null -#cp step3_phase1_new.root $OUTDIR/$SAMPLE/root/step3_${SEED}.root -mv pfntuple.root pfntuple_${SEED}.root -cp pfntuple_${SEED}.root $OUTDIR/$SAMPLE/root/ - -# python3 ${MLPF_PATH}/mlpf/data/cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./ -# bzip2 -z pfntuple_${SEED}.pkl -# cp *.pkl.bz2 $OUTDIR/$SAMPLE/raw/ - -rm -Rf /scratch/local/joosep/$SLURM_JOBID diff --git a/mlpf/data/cms/genjob_pu55to75_val.sh b/mlpf/data/cms/genjob_pu55to75_val.sh deleted file mode 100755 index 819eabf85..000000000 --- a/mlpf/data/cms/genjob_pu55to75_val.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash -set -e -set -x - -OUTDIR=/local/joosep/mlpf/cms/20250630_cmssw_15_0_5_f8ae2f/pu55to75_val/ -CMSSWDIR=/scratch/persistent/joosep/CMSSW_15_0_5/ -MLPF_PATH=/home/joosep/particleflow/ - -#seed must be greater than 0 -SAMPLE=$1 -SEED=$2 - -WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED -#WORKDIR=`pwd`/$SAMPLE/$SEED -mkdir -p $WORKDIR -mkdir -p $OUTDIR/$SAMPLE/root - -PILEUP=Run3_Flat55To75_PoissonOOTPU -PILEUP_INPUT=filelist:${MLPF_PATH}/mlpf/data/cms/pu_files_local_val.txt - -N=50 - -env -source /cvmfs/cms.cern.ch/cmsset_default.sh - -cd $CMSSWDIR -eval `scramv1 runtime -sh` -which python -which python3 - -env - -cd $WORKDIR - -#Generate the MC -cmsDriver.py $SAMPLE \ - --conditions auto:phase1_2024_realistic \ - --beamspot Realistic2024ppRefCollision \ - -n $N \ - --era Run3_2024 \ - --eventcontent FEVTDEBUGHLT \ - -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2024 \ - --datatier GEN-SIM-DIGI-RAW \ - --geometry DB:Extended \ - --pileup $PILEUP \ - --pileup_input $PILEUP_INPUT \ - --no_exec \ - --fileout step2_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step2 \ - --python_filename=step2_phase1_new.py - -cmsDriver.py step3 \ - --conditions auto:phase1_2024_realistic \ - --beamspot Realistic2024ppRefCollision \ - --era Run3_2024 \ - -n -1 \ - --eventcontent FEVTDEBUGHLT \ - -s RAW2DIGI,L1Reco,RECO,RECOSIM \ - --datatier GEN-SIM-RECO \ - --geometry DB:Extended \ - --no_exec \ - --filein file:step2_phase1_new.root \ - --fileout step3_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step3 \ - --python_filename=step3_phase1_new.py - -pwd -ls -lrt - -echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py -cmsRun step2_phase1_new.py > /dev/null -cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root - -cmsRun step3_phase1_new.py > /dev/null -#cp step3_phase1_new.root $OUTDIR/$SAMPLE/root/step3_${SEED}.root -cp pfntuple.root $OUTDIR/$SAMPLE/root/pfntuple_${SEED}.root - -rm -Rf /scratch/local/joosep/$SLURM_JOBID diff --git a/mlpf/data/cms/genjob_pu55to75_val_v2.sh b/mlpf/data/cms/genjob_pu55to75_val_v2.sh deleted file mode 100755 index 50fbc51f2..000000000 --- a/mlpf/data/cms/genjob_pu55to75_val_v2.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -set -e -set -x - -OUTDIR=/local/joosep/mlpf/cms/20251001_cmssw_15_0_5_e42b72/pu55to75_val/ -CMSSWDIR=/scratch/persistent/joosep/CMSSW_15_0_5/ -MLPF_PATH=/home/joosep/particleflow/ - -#seed must be greater than 0 -SAMPLE=$1 -SEED=$2 - -WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED -#WORKDIR=`pwd`/$SAMPLE/$SEED -mkdir -p $WORKDIR -mkdir -p $OUTDIR/$SAMPLE/root - -PILEUP=Run3_Flat55To75_PoissonOOTPU -PILEUP_INPUT=filelist:${MLPF_PATH}/mlpf/data/cms/pu_files_local.txt - -N=50 - -env -source /cvmfs/cms.cern.ch/cmsset_default.sh - -cd $CMSSWDIR -eval `scramv1 runtime -sh` -which python -which python3 - -env - -cd $WORKDIR - -#Generate the MC -cmsDriver.py $SAMPLE \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - -n $N \ - --era Run3_2023 \ - --eventcontent FEVTDEBUGHLT \ - -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \ - --datatier GEN-SIM \ - --geometry DB:Extended \ - --pileup $PILEUP \ - --pileup_input $PILEUP_INPUT \ - --no_exec \ - --fileout step2_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step2 \ - --python_filename=step2_phase1_new.py - -#Run the reco sequences -cmsDriver.py step3 \ - --conditions auto:phase1_2023_realistic \ - --beamspot Realistic25ns13p6TeVEarly2023Collision \ - --era Run3_2023 \ - -n -1 \ - --eventcontent FEVTDEBUGHLT \ - --runUnscheduled \ - -s RAW2DIGI,L1Reco,RECO,RECOSIM \ - --datatier GEN-SIM-RECO \ - --geometry DB:Extended \ - --no_exec \ - --filein file:step2_phase1_new.root \ - --fileout step3_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step3 \ - --python_filename=step3_phase1_new.py - -pwd -ls -lrt - -echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py -cmsRun step2_phase1_new.py > /dev/null -cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root - -cmsRun step3_phase1_new.py > /dev/null -cp pfntuple.root $OUTDIR/$SAMPLE/root/pfntuple_${SEED}.root - -rm -Rf /scratch/local/joosep/$SLURM_JOBID diff --git a/mlpf/data/cms/genjob_pu55to75_val_v3.sh b/mlpf/data/cms/genjob_pu55to75_val_v3.sh deleted file mode 100755 index efe291062..000000000 --- a/mlpf/data/cms/genjob_pu55to75_val_v3.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash -set -e -set -x - -OUTDIR=/local/joosep/mlpf/cms/20251125_cmssw_15_0_5_117d32/pu55to75_val/ -CMSSWDIR=/scratch/persistent/joosep/CMSSW_15_0_5/ -MLPF_PATH=/home/joosep/particleflow/ - -#seed must be greater than 0 -SAMPLE=$1 -SEED=$2 - -WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED -#WORKDIR=`pwd`/$SAMPLE/$SEED -mkdir -p $WORKDIR -mkdir -p $OUTDIR/$SAMPLE/root - -PILEUP=Run3_Flat55To75_PoissonOOTPU -PILEUP_INPUT=filelist:${MLPF_PATH}/mlpf/data/cms/pu_files_local_val2.txt - -N=50 - -env -source /cvmfs/cms.cern.ch/cmsset_default.sh - -cd $CMSSWDIR -eval `scramv1 runtime -sh` -which python -which python3 - -env - -cd $WORKDIR - -#Generate the MC -cmsDriver.py $SAMPLE \ - --conditions 140X_mcRun3_2024_realistic_v26 \ - --beamspot DBrealistic \ - -n $N \ - --era Run3_2024 \ - --eventcontent FEVTDEBUGHLT \ - -s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \ - --datatier GEN-SIM \ - --geometry DB:Extended \ - --pileup $PILEUP \ - --pileup_input $PILEUP_INPUT \ - --no_exec \ - --fileout step2_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step2 \ - --python_filename=step2_phase1_new.py - -# --customise_commands "process.mix.input.nbPileupEvents.probFunctionVariable = cms.vint32(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120) \n process.mix.input.nbPileupEvents.probValue = cms.vdouble(0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446,0.00826446)" - -#Run the reco sequences -cmsDriver.py step3 \ - --conditions 140X_mcRun3_2024_realistic_v26 \ - --beamspot DBrealistic \ - --era Run3_2024 \ - -n -1 \ - --eventcontent FEVTDEBUGHLT \ - --runUnscheduled \ - -s RAW2DIGI,L1Reco,RECO,RECOSIM \ - --datatier GEN-SIM-RECO \ - --geometry DB:Extended \ - --no_exec \ - --filein file:step2_phase1_new.root \ - --fileout step3_phase1_new.root \ - --customise Validation/RecoParticleFlow/customize_pfanalysis.customize_step3 \ - --python_filename=step3_phase1_new.py - -pwd -ls -lrt - -echo "process.RandomNumberGeneratorService.generator.initialSeed = $SEED" >> step2_phase1_new.py -cmsRun step2_phase1_new.py > /dev/null -cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root - -cmsRun step3_phase1_new.py > /dev/null -cp pfntuple.root $OUTDIR/$SAMPLE/root/pfntuple_${SEED}.root - -rm -Rf /scratch/local/joosep/$SLURM_JOBID diff --git a/mlpf/data/cms/multicrab.py b/mlpf/data/cms/multicrab.py deleted file mode 100644 index 9e4dafad7..000000000 --- a/mlpf/data/cms/multicrab.py +++ /dev/null @@ -1,72 +0,0 @@ -import os - -from CRABAPI.RawCommand import crabCommand -from CRABClient.UserUtilities import config - - -def submit(config): - crabCommand("submit", config=config) - # save crab config for the future - with open( - config.General.workArea + "/crab_" + config.General.requestName + "/crab_config.py", - "w", - ) as fi: - fi.write(config.pythonise_()) - - -# https://cmsweb.cern.ch/das/request?view=plain&limit=50&instance=prod%2Fglobal&input=%2FRelVal*%2FCMSSW_11_0_0_pre4*%2FGEN-SIM-DIGI-RAW -samples = [ - ( - "/RelValQCD_FlatPt_15_3000HS_14/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", - "QCD_run3", - ), - ( - "/RelValNuGun/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", - "NuGun_run3", - ), - ( - "/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU_110X_mcRun3_2021_realistic_v5-v1/GEN-SIM-DIGI-RAW", - "TTbar_run3", - ), - # ("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU25ns_110X_mcRun4_realistic_v2_2026D41PU140-v1/GEN-SIM-DIGI-RAW", - # "TTbar_run4_pu140"), - # ("/RelValTTbar_14TeV/CMSSW_11_0_0_pre12-PU25ns_110X_mcRun4_realistic_v2_2026D41PU200-v1/GEN-SIM-DIGI-RAW", - # "TTbar_run4_pu200") -] - -if __name__ == "__main__": - for dataset, name in samples: - - if os.path.isfile("step3_dump.pyc"): - os.remove("step3_dump.pyc") - - conf = config() - - conf.General.requestName = name - conf.General.transferLogs = True - conf.General.workArea = "crab_projects" - conf.JobType.pluginName = "Analysis" - conf.JobType.psetName = "step3_dump.py" - conf.JobType.maxJobRuntimeMin = 8 * 60 - conf.JobType.allowUndistributedCMSSW = True - conf.JobType.outputFiles = [ - "step3_inMINIAODSIM.root", - "step3_AOD.root", - ] - conf.JobType.maxMemoryMB = 6000 - conf.JobType.numCores = 2 - - conf.Data.inputDataset = dataset - conf.Data.splitting = "LumiBased" - conf.Data.unitsPerJob = 2 - # conf.Data.totalUnits = 50 - conf.Data.publication = False - conf.Data.outputDatasetTag = "pfvalidation" - # conf.Data.ignoreLocality = True - - # Where the output files will be transmitted to - # conf.Site.storageSite = 'T3_US_Baylor' - conf.Site.storageSite = "T2_US_Caltech" - # conf.Site.whitelist = ["T2_US_Caltech", "T2_CH_CERN"] - - submit(conf) diff --git a/mlpf/data/cms/postprocessing2.py b/mlpf/data/cms/postprocessing2.py index fe6437c02..13878ea6a 100644 --- a/mlpf/data/cms/postprocessing2.py +++ b/mlpf/data/cms/postprocessing2.py @@ -981,12 +981,12 @@ def parse_args(): parser.add_argument("--input", type=str, help="Input file from PFAnalysis", required=True) parser.add_argument("--outpath", type=str, default="raw", help="output path") parser.add_argument( - "--save-full-graph", + "--save_full_graph", action="store_true", help="save the full event graph", ) parser.add_argument( - "--num-events", + "--num_events", type=int, help="number of events to process", default=-1, diff --git a/mlpf/data/cms/postprocessing_jobs.py b/mlpf/data/cms/postprocessing_jobs.py deleted file mode 100644 index 9d41e4098..000000000 --- a/mlpf/data/cms/postprocessing_jobs.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import glob - - -def chunks(lst, n): - """Yield successive n-sized chunks from lst.""" - for i in range(0, len(lst), n): - yield lst[i : i + n] - - -def write_script(infiles, outfiles): - s = [] - s += ["#!/bin/bash"] - s += ["#SBATCH --partition short"] - s += ["#SBATCH --cpus-per-task 1"] - s += ["#SBATCH --mem-per-cpu 4G"] - s += ["#SBATCH -o logs/slurm-%x-%j-%N.out"] - s += ["set -e"] - - for inf, outf in zip(infiles, outfiles): - outpath = os.path.dirname(outf) - - outf_no_bzip = outf.replace(".pkl.bz2", ".pkl") - s += [f"if [ ! -f {outf} ]; then"] - s += [ - f" echo 'trying {inf}'", - " singularity exec -B /local /home/software/singularity/pytorch.simg:2024-08-18" - + f" python3 mlpf/data/cms/postprocessing2.py --input {inf} --outpath {outpath} && bzip2 -z {outf_no_bzip} || echo 'FAIL {inf}'", - ] - s += ["fi"] - ret = "\n".join(s) - return ret - - -samples = [ - # PU - "/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/pu55to75/TTbar_14TeV_TuneCUETP8M1_cfi/", - "/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/pu55to75/QCDForPF_14TeV_TuneCUETP8M1_cfi", - "/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/pu55to75/ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi", - # NoPU - "/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/nopu/TTbar_14TeV_TuneCUETP8M1_cfi", - "/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/nopu/QCDForPF_14TeV_TuneCUETP8M1_cfi", - "/local/joosep/mlpf/cms/20250508_cmssw_15_0_5_d3c6d1/nopu/ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi", -] - - -def inf_to_outf(inf): - return inf.replace(".root", ".pkl.bz2").replace("/root/", "/raw/") - - -ichunk = 1 -for sample in samples: - infiles = sorted(list(glob.glob(f"{sample}/root/pfntuple*.root"))) - infiles = [inf for inf in infiles if not os.path.isfile(inf_to_outf(inf))] - for infiles_chunk in chunks(infiles, 10): - outfiles_chunk = [inf_to_outf(inf) for inf in infiles_chunk] - os.makedirs(os.path.dirname(outfiles_chunk[0]), exist_ok=True) - scr = write_script(infiles_chunk, outfiles_chunk) - ofname = f"jobscripts/postproc_{ichunk}.sh" - with open(ofname, "w") as outfi: - outfi.write(scr) - ichunk += 1 diff --git a/mlpf/data/cms/pu_files.txt b/mlpf/data/cms/pu_files.txt index bf38307a3..f335d860a 100644 --- a/mlpf/data/cms/pu_files.txt +++ b/mlpf/data/cms/pu_files.txt @@ -1,26 +1,100 @@ -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/1e1225c4-2461-41b9-85be-db2fdd24f004.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/20394926-521a-4e8f-ad9a-4be041a29895.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/270df9d2-8a37-4f79-8c66-c7d4a4103d30.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/30a9eac8-f576-4658-9a7e-fc7644428d3c.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/45019cf6-efe6-4ec9-94e9-529c437524f9.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/550a00d5-8a2f-4ed5-a9f2-8a9a7ac46230.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/5603cd43-2f98-464a-8ae1-e3ee11baa295.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/6a093d4b-6102-4b86-ba7c-fed41bf51093.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/6d6a6fa0-457f-428e-bc20-ff78e40ec0b4.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/72284c20-70b7-4e67-80a2-522986e59443.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/73916dee-4245-4b93-be51-4438ddeab67c.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/73e9fa89-e75d-46c2-92c4-47c288da9cf1.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/78690f43-ec22-49a7-8889-40743b53d2b8.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/7a7dbc11-8fe1-4f95-8eef-31ce7b8981d1.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/7aeb6826-1bd2-44fa-aa31-f30496c01613.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/7f2cafa1-00ed-441a-92c7-57394c0f2cd0.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/86e83280-5c20-4231-aba2-ce2439f20a1c.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/985202c3-c1f2-48a0-be06-f7107719b85f.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/9c21174b-b205-4309-9793-a840dfc06ce6.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/ae524eae-0c04-49d6-ab27-944efe81f04f.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/af366b17-a172-436f-925a-8d7829a8cd8f.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/b5afd1ed-fbbd-4713-a3b5-dab9fed963fe.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/bafb8604-1d7a-4420-81aa-398c0d5db308.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/c45dbf7f-5ba8-475b-889f-bea59e966f1b.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/ebf10c30-184c-44b7-b433-19fff9299248.root -/store/relval/CMSSW_14_0_6/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v10_STD_2024_MinBias-v2/2590000/f3e6930e-d2ed-475a-967e-168a71a694eb.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/868bd0fc-ab87-4836-987a-009a89334314.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/254b8626-0a60-4f76-84bc-528ed9bf4028.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6c7034d9-bab5-499f-ac2b-0d2dad0fc831.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4c4502f0-4622-44c7-b30e-d39fe3e1bbd5.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/eb4ea6a0-53b3-41fa-bb26-4694612bd38e.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/abe960a7-cc73-4c2d-8dd0-7a34610fecfd.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6bcbcb1a-7736-4c7b-b925-7aedf087fd0c.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e3b50d04-c783-4d10-9412-0b862fff9397.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4553d463-7ab4-4174-9a49-606e084fb6d7.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7f5fdf57-d9f0-4ab8-8d27-c55cc05f6a10.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/20e67f0e-7c7d-4ab1-8ee8-83f3054a9b64.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ca9cbc2f-ecf8-4e4c-96ca-8db9376b667a.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7cdca843-6e7c-427a-996c-dbc2c93a7cbe.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/cd37e663-8809-45b4-956d-86ab9faf15f8.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c1ec3eed-ae1d-4112-bdf5-93124e0058d0.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3ca37924-5eb9-4681-8fed-59ea7af93004.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f0397197-d51c-419e-8d79-40b8a38ae84f.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3afb2c5d-6048-41c4-b509-1c588ff22c4a.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e55fda6e-f4d0-4b2e-a9be-fd2dc6b0d54f.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/284f4d3b-7959-4070-a004-24f28f186a29.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/eed965e8-c16e-4ca7-95f4-7f9244ebd7f0.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f76535f4-1e3d-4538-bcf9-3be725ad4c89.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/92f58fd0-cc55-4cc0-8130-041f6ae97221.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/0b908e5a-bccf-40d3-acf8-4995e08bb931.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a27f46f2-424f-4ea1-98b0-d8626711195a.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e2fda9eb-8cda-42e2-960b-e67b8398ad38.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6068febc-17b6-4c7d-b1c2-3ef5e455cf47.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/5642b79a-32f3-4b6d-9be2-640bd2a5e8a2.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2a3a0054-e344-4f21-a564-cb370801ddf7.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/569bf4c4-e68c-4361-bc96-f0ddf0d790a1.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4941d073-9cd1-49b9-9d45-eb320b063c14.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e54b0c5d-6111-4767-9408-da23a689217f.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e351a3be-8316-48aa-82a3-f6c2605a8fc4.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8599020b-2e11-4599-a187-98010da229cb.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/9ef49fc3-b6aa-471c-8959-e5fbf737f4f4.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ea5a885c-371f-4e0d-b88c-a503b6ae9639.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a8b37365-46c8-499a-9e81-ebaa445a05c5.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7a5e7c7c-35a7-4287-9538-1aa4e9aa7254.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/adbb2885-f619-4a2e-807e-3e5691151577.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/aa33c777-c202-4285-bc91-73c69fa6755f.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4a3c24f8-a4e0-46a0-84cb-57d67efb7111.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ace54945-77a7-4ffd-a024-f2f237ccd021.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6afe44e0-690b-4634-a32c-ebd705cbfd07.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/44c88040-2361-43a9-bbe4-a5b014f4f2e5.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6a18dbd4-1741-40fa-bca4-7fe021832d52.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1f141624-c992-443e-a328-31b5cf75e18e.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/06241e2f-9c12-4d18-9aa7-e60b1ece1cee.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d54ba98f-990a-43eb-9137-80a46570d398.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/517694e4-12d7-44ec-98ac-85414f9d9b26.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8ec67618-7494-43ca-992d-d738dc904e5c.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/473acffa-0691-4d57-b354-1b18fc80da49.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/b23f0226-8424-49cf-bd19-9eaf50030c2e.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/39ea49f7-6464-4864-a889-eb2eb688f9da.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/498fd9e9-bc25-42d8-a32f-d613965d85f1.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c39c4355-95c3-42dc-a8e5-f047a5eb4a3e.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2f05df1d-91b2-4ae2-be05-e93d4e780cd1.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ab1ebb2e-a10e-405a-a732-be8bb9413212.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/41d2d282-cced-4197-a0ef-9a51cc9f036d.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1d5248aa-c06a-4246-9aaa-7877d26f7770.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2e9ab8db-fa66-4660-836c-6e6cd6420862.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4f40ca1c-6634-43c4-a1c1-61e5bc8450c6.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/363d5f69-3331-404d-a2aa-24277a5b62bd.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ceb90e16-3231-48f0-8a79-d9ae00200672.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e928d6a2-5952-45cf-ac72-a2cc5cd5f16e.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c9bdc242-b9c8-43fa-b3df-a67dbd958e30.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d15a342a-9303-4697-ab2b-903514be1f73.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/16494d37-5116-4faa-b7d5-b8b8cd8476a7.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1e5ec87c-83a0-4144-b451-b6efe00e2a2f.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c9f047d1-bdba-4050-9b57-79cd9b727720.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/049294f5-fd84-4270-991d-0c0ab4efe65f.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6a5ba4cb-6010-4b9b-ac2d-78c699dc6579.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4dbc3bb2-da2e-451b-9fa4-6de8920c0385.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e2bf6238-b81a-4bc6-bad7-4e76d5877793.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8949ce13-468c-4a4e-a1bc-84060a9798b8.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e11018c2-1a8f-4e58-b29a-0c751687e9c8.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d52c2280-e636-4e64-b941-db16379b7131.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1c9f9ce0-dca1-47d1-a267-4369a06e8b21.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1af3d728-9a4e-4b13-867e-941f3a44af5e.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/b31c6e0e-4192-4767-bd26-802c94f31f86.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/635c713a-0a53-46d0-9c84-2ecdf55a84a9.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/9d8d134e-8caf-4300-8571-400efdbadc56.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f1dca930-ee74-41cd-9b24-c64c662b5443.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c374f9d8-ef17-4cde-8096-1744166aeb97.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d16b02b3-a586-458b-b6ad-75fc8b982ce9.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8f52e12e-a328-4f38-a18a-0b2461efbb5d.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/89aa992b-9137-4f26-9ac0-6f2419a42525.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a3d432d0-360f-4ccd-a1b3-540a8246a74c.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/36abce8c-280e-42cc-813a-946e581719e1.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/33f8bbfc-8463-4ad3-8e49-ab45aaaf70de.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/bc118829-386b-4acd-9dbb-0fb06f4eb1c3.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/0e917484-df18-4c46-a0b6-3732d9e8b327.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a447a799-e2d0-46bc-ae96-2d28ceeb85b1.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3f0a96c3-ae9a-42f3-b580-6b9d2ed2c3a9.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f28a4189-6027-4bd6-b228-faac9834258a.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8327e4f9-ad29-40c3-a192-def18506baa2.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1e3e734f-b28f-4a71-8ec1-b5a88c780f00.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/600431e5-05a0-47f5-b429-9141ebd817b9.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6deeba91-1221-43ac-a2a0-2dc16bdaed1a.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/58a44ed4-2500-4553-9539-801abf929ecc.root +/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/02813942-aba2-41a6-9427-31db9b0374cf.root diff --git a/mlpf/data/cms/pu_files_local.txt b/mlpf/data/cms/pu_files_local.txt index 383b75f9f..b253b8762 100644 --- a/mlpf/data/cms/pu_files_local.txt +++ b/mlpf/data/cms/pu_files_local.txt @@ -1,47 +1,100 @@ -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/04b0e2da-5bcc-42f3-9129-bf07f3868439.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/05509c9d-f1b4-4400-8ebc-80c3d3d15c34.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/06c1dd3b-2d68-45ff-a1ee-7309e0475839.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/0d8d8b1f-a913-4b6a-bb99-d681a5a2d111.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/0dbd501f-1489-4b58-a300-add86c78b616.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/1c66de5b-3602-42c8-aadf-811138206c1a.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/20018e67-c181-4abf-8d7e-e00125fc6b1c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/26431b82-a782-4ac2-aba2-53f86bc48141.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/34a60ad4-ba90-4a26-b2ba-d82af7022f69.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3848635d-2064-42ed-9a58-245212af57ce.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3b62e708-4290-4b68-96bd-ac162bdc213c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3e53f47d-2785-4b40-832f-976e5c7f99cc.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3e7a94b0-3a38-467e-847f-130476108f33.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/436d27b4-1911-419a-9488-7c5ef907cbf8.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/45da01a0-2342-4497-bd64-7a4b68fcab42.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/463e0371-c068-49a3-9492-0440db62ec24.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/46d19ab7-bd34-43f3-88c4-50fce465d387.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/470844c5-72fa-4146-9fde-9ef0002a88e5.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/48897e7e-95a0-4af4-a84d-6dc18abca201.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/535eb495-c419-41ae-b931-db4993dc3cff.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/5723dc2f-91c3-4352-8226-d15f6463bc43.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/5bf7051d-ca34-469d-8750-d4af7791405d.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/624d7332-1b40-4213-8216-3b866a75f238.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/7808cc97-eaf7-463d-a22d-94c448510613.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/78d278b7-03e2-4f80-b77d-0cc79e4f0787.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/7dabdf75-0061-4071-ba86-76aa9d86f34c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/8263abd0-a8f0-41f5-89f5-1e957defd854.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/851ba514-0a10-4049-abff-80a8e6adf734.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/85540035-313e-481d-8e2d-6ff82ec6b90f.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/941c7020-0ff7-449a-9048-c364b346c1e5.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/992e9de5-68b3-4dbd-bdbb-4a0e595d7bd0.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/9bf620e9-ba5c-47af-9119-498800633d7d.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/a0029411-37e6-4a4f-a114-47446ff5a5c7.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/a3f4dd02-18ed-417b-a8c0-c2c6cf95ae0c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/af8551e6-fc4e-4c5c-aaa9-445a72ed2858.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/b9a9a06e-2220-4e27-b991-fe9e14735702.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/bb552f54-0acd-45a1-9abc-6ead9a93fcce.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/bed3eb75-f466-4997-a07c-a03f2f33546b.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/bf53eb3e-7d4e-4b1e-b74a-44b086dfa6d4.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c075c80e-f227-4ee0-8901-7262f35771db.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c3cac888-03a9-423e-92bf-05d99256b183.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c495c184-ab30-4871-8be0-a6478293fd86.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c6487285-8cb6-43db-9dce-4bdc8d78e28f.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/cd6a8dcd-d132-49f9-a988-bd3110793212.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/d45182bc-5952-4b75-8749-13df1e68d52e.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/db18d422-199c-425b-9c9a-fd2bdf5d6724.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/e8512379-6c42-4678-9b18-9a3ea9faabc5.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/868bd0fc-ab87-4836-987a-009a89334314.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/254b8626-0a60-4f76-84bc-528ed9bf4028.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6c7034d9-bab5-499f-ac2b-0d2dad0fc831.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4c4502f0-4622-44c7-b30e-d39fe3e1bbd5.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/eb4ea6a0-53b3-41fa-bb26-4694612bd38e.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/abe960a7-cc73-4c2d-8dd0-7a34610fecfd.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6bcbcb1a-7736-4c7b-b925-7aedf087fd0c.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e3b50d04-c783-4d10-9412-0b862fff9397.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4553d463-7ab4-4174-9a49-606e084fb6d7.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7f5fdf57-d9f0-4ab8-8d27-c55cc05f6a10.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/20e67f0e-7c7d-4ab1-8ee8-83f3054a9b64.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ca9cbc2f-ecf8-4e4c-96ca-8db9376b667a.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7cdca843-6e7c-427a-996c-dbc2c93a7cbe.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/cd37e663-8809-45b4-956d-86ab9faf15f8.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c1ec3eed-ae1d-4112-bdf5-93124e0058d0.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3ca37924-5eb9-4681-8fed-59ea7af93004.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f0397197-d51c-419e-8d79-40b8a38ae84f.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3afb2c5d-6048-41c4-b509-1c588ff22c4a.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e55fda6e-f4d0-4b2e-a9be-fd2dc6b0d54f.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/284f4d3b-7959-4070-a004-24f28f186a29.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/eed965e8-c16e-4ca7-95f4-7f9244ebd7f0.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f76535f4-1e3d-4538-bcf9-3be725ad4c89.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/92f58fd0-cc55-4cc0-8130-041f6ae97221.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/0b908e5a-bccf-40d3-acf8-4995e08bb931.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a27f46f2-424f-4ea1-98b0-d8626711195a.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e2fda9eb-8cda-42e2-960b-e67b8398ad38.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6068febc-17b6-4c7d-b1c2-3ef5e455cf47.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/5642b79a-32f3-4b6d-9be2-640bd2a5e8a2.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2a3a0054-e344-4f21-a564-cb370801ddf7.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/569bf4c4-e68c-4361-bc96-f0ddf0d790a1.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4941d073-9cd1-49b9-9d45-eb320b063c14.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e54b0c5d-6111-4767-9408-da23a689217f.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e351a3be-8316-48aa-82a3-f6c2605a8fc4.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8599020b-2e11-4599-a187-98010da229cb.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/9ef49fc3-b6aa-471c-8959-e5fbf737f4f4.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ea5a885c-371f-4e0d-b88c-a503b6ae9639.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a8b37365-46c8-499a-9e81-ebaa445a05c5.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7a5e7c7c-35a7-4287-9538-1aa4e9aa7254.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/adbb2885-f619-4a2e-807e-3e5691151577.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/aa33c777-c202-4285-bc91-73c69fa6755f.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4a3c24f8-a4e0-46a0-84cb-57d67efb7111.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ace54945-77a7-4ffd-a024-f2f237ccd021.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6afe44e0-690b-4634-a32c-ebd705cbfd07.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/44c88040-2361-43a9-bbe4-a5b014f4f2e5.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6a18dbd4-1741-40fa-bca4-7fe021832d52.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1f141624-c992-443e-a328-31b5cf75e18e.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/06241e2f-9c12-4d18-9aa7-e60b1ece1cee.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d54ba98f-990a-43eb-9137-80a46570d398.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/517694e4-12d7-44ec-98ac-85414f9d9b26.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8ec67618-7494-43ca-992d-d738dc904e5c.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/473acffa-0691-4d57-b354-1b18fc80da49.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/b23f0226-8424-49cf-bd19-9eaf50030c2e.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/39ea49f7-6464-4864-a889-eb2eb688f9da.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/498fd9e9-bc25-42d8-a32f-d613965d85f1.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c39c4355-95c3-42dc-a8e5-f047a5eb4a3e.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2f05df1d-91b2-4ae2-be05-e93d4e780cd1.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ab1ebb2e-a10e-405a-a732-be8bb9413212.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/41d2d282-cced-4197-a0ef-9a51cc9f036d.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1d5248aa-c06a-4246-9aaa-7877d26f7770.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2e9ab8db-fa66-4660-836c-6e6cd6420862.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4f40ca1c-6634-43c4-a1c1-61e5bc8450c6.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/363d5f69-3331-404d-a2aa-24277a5b62bd.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ceb90e16-3231-48f0-8a79-d9ae00200672.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e928d6a2-5952-45cf-ac72-a2cc5cd5f16e.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c9bdc242-b9c8-43fa-b3df-a67dbd958e30.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d15a342a-9303-4697-ab2b-903514be1f73.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/16494d37-5116-4faa-b7d5-b8b8cd8476a7.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1e5ec87c-83a0-4144-b451-b6efe00e2a2f.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c9f047d1-bdba-4050-9b57-79cd9b727720.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/049294f5-fd84-4270-991d-0c0ab4efe65f.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6a5ba4cb-6010-4b9b-ac2d-78c699dc6579.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4dbc3bb2-da2e-451b-9fa4-6de8920c0385.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e2bf6238-b81a-4bc6-bad7-4e76d5877793.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8949ce13-468c-4a4e-a1bc-84060a9798b8.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e11018c2-1a8f-4e58-b29a-0c751687e9c8.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d52c2280-e636-4e64-b941-db16379b7131.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1c9f9ce0-dca1-47d1-a267-4369a06e8b21.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1af3d728-9a4e-4b13-867e-941f3a44af5e.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/b31c6e0e-4192-4767-bd26-802c94f31f86.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/635c713a-0a53-46d0-9c84-2ecdf55a84a9.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/9d8d134e-8caf-4300-8571-400efdbadc56.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f1dca930-ee74-41cd-9b24-c64c662b5443.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c374f9d8-ef17-4cde-8096-1744166aeb97.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d16b02b3-a586-458b-b6ad-75fc8b982ce9.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8f52e12e-a328-4f38-a18a-0b2461efbb5d.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/89aa992b-9137-4f26-9ac0-6f2419a42525.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a3d432d0-360f-4ccd-a1b3-540a8246a74c.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/36abce8c-280e-42cc-813a-946e581719e1.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/33f8bbfc-8463-4ad3-8e49-ab45aaaf70de.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/bc118829-386b-4acd-9dbb-0fb06f4eb1c3.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/0e917484-df18-4c46-a0b6-3732d9e8b327.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a447a799-e2d0-46bc-ae96-2d28ceeb85b1.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3f0a96c3-ae9a-42f3-b580-6b9d2ed2c3a9.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f28a4189-6027-4bd6-b228-faac9834258a.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8327e4f9-ad29-40c3-a192-def18506baa2.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1e3e734f-b28f-4a71-8ec1-b5a88c780f00.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/600431e5-05a0-47f5-b429-9141ebd817b9.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6deeba91-1221-43ac-a2a0-2dc16bdaed1a.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/58a44ed4-2500-4553-9539-801abf929ecc.root +file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/02813942-aba2-41a6-9427-31db9b0374cf.root diff --git a/mlpf/data/cms/pu_files_local_val.txt b/mlpf/data/cms/pu_files_local_val.txt deleted file mode 100644 index cd3bae04e..000000000 --- a/mlpf/data/cms/pu_files_local_val.txt +++ /dev/null @@ -1,47 +0,0 @@ -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/20018e67-c181-4abf-8d7e-e00125fc6b1c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c495c184-ab30-4871-8be0-a6478293fd86.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c075c80e-f227-4ee0-8901-7262f35771db.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c3cac888-03a9-423e-92bf-05d99256b183.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/06c1dd3b-2d68-45ff-a1ee-7309e0475839.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/cd6a8dcd-d132-49f9-a988-bd3110793212.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/624d7332-1b40-4213-8216-3b866a75f238.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/26431b82-a782-4ac2-aba2-53f86bc48141.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/992e9de5-68b3-4dbd-bdbb-4a0e595d7bd0.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/7808cc97-eaf7-463d-a22d-94c448510613.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/a0029411-37e6-4a4f-a114-47446ff5a5c7.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/470844c5-72fa-4146-9fde-9ef0002a88e5.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/535eb495-c419-41ae-b931-db4993dc3cff.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/48897e7e-95a0-4af4-a84d-6dc18abca201.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/d45182bc-5952-4b75-8749-13df1e68d52e.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/bf53eb3e-7d4e-4b1e-b74a-44b086dfa6d4.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/9bf620e9-ba5c-47af-9119-498800633d7d.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/0d8d8b1f-a913-4b6a-bb99-d681a5a2d111.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/7dabdf75-0061-4071-ba86-76aa9d86f34c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/46d19ab7-bd34-43f3-88c4-50fce465d387.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3b62e708-4290-4b68-96bd-ac162bdc213c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/45da01a0-2342-4497-bd64-7a4b68fcab42.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/e8512379-6c42-4678-9b18-9a3ea9faabc5.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/436d27b4-1911-419a-9488-7c5ef907cbf8.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/5723dc2f-91c3-4352-8226-d15f6463bc43.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/941c7020-0ff7-449a-9048-c364b346c1e5.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/78d278b7-03e2-4f80-b77d-0cc79e4f0787.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/c6487285-8cb6-43db-9dce-4bdc8d78e28f.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3e7a94b0-3a38-467e-847f-130476108f33.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3e53f47d-2785-4b40-832f-976e5c7f99cc.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/05509c9d-f1b4-4400-8ebc-80c3d3d15c34.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/db18d422-199c-425b-9c9a-fd2bdf5d6724.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/b9a9a06e-2220-4e27-b991-fe9e14735702.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/af8551e6-fc4e-4c5c-aaa9-445a72ed2858.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/3848635d-2064-42ed-9a58-245212af57ce.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/04b0e2da-5bcc-42f3-9129-bf07f3868439.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/8263abd0-a8f0-41f5-89f5-1e957defd854.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/34a60ad4-ba90-4a26-b2ba-d82af7022f69.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/bed3eb75-f466-4997-a07c-a03f2f33546b.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/463e0371-c068-49a3-9492-0440db62ec24.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/0dbd501f-1489-4b58-a300-add86c78b616.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/85540035-313e-481d-8e2d-6ff82ec6b90f.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/5bf7051d-ca34-469d-8750-d4af7791405d.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/a3f4dd02-18ed-417b-a8c0-c2c6cf95ae0c.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/bb552f54-0acd-45a1-9abc-6ead9a93fcce.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/1c66de5b-3602-42c8-aadf-811138206c1a.root -file:/scratch/persistent/joosep/cms/store/relval/CMSSW_14_1_0_pre7/RelValMinBias_14TeV/GEN-SIM/140X_mcRun3_2024_realistic_v21_STD_MinBias_2026D110_GenSim-v1/2580000/851ba514-0a10-4049-abff-80a8e6adf734.root diff --git a/mlpf/data/cms/pu_files_local_val2.txt b/mlpf/data/cms/pu_files_local_val2.txt deleted file mode 100644 index b253b8762..000000000 --- a/mlpf/data/cms/pu_files_local_val2.txt +++ /dev/null @@ -1,100 +0,0 @@ -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/868bd0fc-ab87-4836-987a-009a89334314.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/254b8626-0a60-4f76-84bc-528ed9bf4028.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6c7034d9-bab5-499f-ac2b-0d2dad0fc831.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4c4502f0-4622-44c7-b30e-d39fe3e1bbd5.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/eb4ea6a0-53b3-41fa-bb26-4694612bd38e.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/abe960a7-cc73-4c2d-8dd0-7a34610fecfd.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6bcbcb1a-7736-4c7b-b925-7aedf087fd0c.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e3b50d04-c783-4d10-9412-0b862fff9397.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4553d463-7ab4-4174-9a49-606e084fb6d7.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7f5fdf57-d9f0-4ab8-8d27-c55cc05f6a10.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/20e67f0e-7c7d-4ab1-8ee8-83f3054a9b64.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ca9cbc2f-ecf8-4e4c-96ca-8db9376b667a.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7cdca843-6e7c-427a-996c-dbc2c93a7cbe.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/cd37e663-8809-45b4-956d-86ab9faf15f8.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c1ec3eed-ae1d-4112-bdf5-93124e0058d0.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3ca37924-5eb9-4681-8fed-59ea7af93004.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f0397197-d51c-419e-8d79-40b8a38ae84f.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3afb2c5d-6048-41c4-b509-1c588ff22c4a.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e55fda6e-f4d0-4b2e-a9be-fd2dc6b0d54f.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/284f4d3b-7959-4070-a004-24f28f186a29.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/eed965e8-c16e-4ca7-95f4-7f9244ebd7f0.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f76535f4-1e3d-4538-bcf9-3be725ad4c89.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/92f58fd0-cc55-4cc0-8130-041f6ae97221.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/0b908e5a-bccf-40d3-acf8-4995e08bb931.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a27f46f2-424f-4ea1-98b0-d8626711195a.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e2fda9eb-8cda-42e2-960b-e67b8398ad38.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6068febc-17b6-4c7d-b1c2-3ef5e455cf47.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/5642b79a-32f3-4b6d-9be2-640bd2a5e8a2.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2a3a0054-e344-4f21-a564-cb370801ddf7.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/569bf4c4-e68c-4361-bc96-f0ddf0d790a1.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4941d073-9cd1-49b9-9d45-eb320b063c14.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e54b0c5d-6111-4767-9408-da23a689217f.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e351a3be-8316-48aa-82a3-f6c2605a8fc4.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8599020b-2e11-4599-a187-98010da229cb.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/9ef49fc3-b6aa-471c-8959-e5fbf737f4f4.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ea5a885c-371f-4e0d-b88c-a503b6ae9639.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a8b37365-46c8-499a-9e81-ebaa445a05c5.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/7a5e7c7c-35a7-4287-9538-1aa4e9aa7254.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/adbb2885-f619-4a2e-807e-3e5691151577.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/aa33c777-c202-4285-bc91-73c69fa6755f.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4a3c24f8-a4e0-46a0-84cb-57d67efb7111.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ace54945-77a7-4ffd-a024-f2f237ccd021.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6afe44e0-690b-4634-a32c-ebd705cbfd07.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/44c88040-2361-43a9-bbe4-a5b014f4f2e5.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6a18dbd4-1741-40fa-bca4-7fe021832d52.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1f141624-c992-443e-a328-31b5cf75e18e.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/06241e2f-9c12-4d18-9aa7-e60b1ece1cee.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d54ba98f-990a-43eb-9137-80a46570d398.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/517694e4-12d7-44ec-98ac-85414f9d9b26.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8ec67618-7494-43ca-992d-d738dc904e5c.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/473acffa-0691-4d57-b354-1b18fc80da49.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/b23f0226-8424-49cf-bd19-9eaf50030c2e.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/39ea49f7-6464-4864-a889-eb2eb688f9da.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/498fd9e9-bc25-42d8-a32f-d613965d85f1.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c39c4355-95c3-42dc-a8e5-f047a5eb4a3e.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2f05df1d-91b2-4ae2-be05-e93d4e780cd1.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ab1ebb2e-a10e-405a-a732-be8bb9413212.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/41d2d282-cced-4197-a0ef-9a51cc9f036d.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1d5248aa-c06a-4246-9aaa-7877d26f7770.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/2e9ab8db-fa66-4660-836c-6e6cd6420862.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4f40ca1c-6634-43c4-a1c1-61e5bc8450c6.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/363d5f69-3331-404d-a2aa-24277a5b62bd.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/ceb90e16-3231-48f0-8a79-d9ae00200672.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e928d6a2-5952-45cf-ac72-a2cc5cd5f16e.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c9bdc242-b9c8-43fa-b3df-a67dbd958e30.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d15a342a-9303-4697-ab2b-903514be1f73.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/16494d37-5116-4faa-b7d5-b8b8cd8476a7.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1e5ec87c-83a0-4144-b451-b6efe00e2a2f.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c9f047d1-bdba-4050-9b57-79cd9b727720.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/049294f5-fd84-4270-991d-0c0ab4efe65f.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6a5ba4cb-6010-4b9b-ac2d-78c699dc6579.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/4dbc3bb2-da2e-451b-9fa4-6de8920c0385.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e2bf6238-b81a-4bc6-bad7-4e76d5877793.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8949ce13-468c-4a4e-a1bc-84060a9798b8.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/e11018c2-1a8f-4e58-b29a-0c751687e9c8.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d52c2280-e636-4e64-b941-db16379b7131.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1c9f9ce0-dca1-47d1-a267-4369a06e8b21.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1af3d728-9a4e-4b13-867e-941f3a44af5e.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/b31c6e0e-4192-4767-bd26-802c94f31f86.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/635c713a-0a53-46d0-9c84-2ecdf55a84a9.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/9d8d134e-8caf-4300-8571-400efdbadc56.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f1dca930-ee74-41cd-9b24-c64c662b5443.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/c374f9d8-ef17-4cde-8096-1744166aeb97.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/d16b02b3-a586-458b-b6ad-75fc8b982ce9.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8f52e12e-a328-4f38-a18a-0b2461efbb5d.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/89aa992b-9137-4f26-9ac0-6f2419a42525.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a3d432d0-360f-4ccd-a1b3-540a8246a74c.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/36abce8c-280e-42cc-813a-946e581719e1.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/33f8bbfc-8463-4ad3-8e49-ab45aaaf70de.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/bc118829-386b-4acd-9dbb-0fb06f4eb1c3.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/0e917484-df18-4c46-a0b6-3732d9e8b327.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/a447a799-e2d0-46bc-ae96-2d28ceeb85b1.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/3f0a96c3-ae9a-42f3-b580-6b9d2ed2c3a9.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/f28a4189-6027-4bd6-b228-faac9834258a.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/8327e4f9-ad29-40c3-a192-def18506baa2.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/1e3e734f-b28f-4a71-8ec1-b5a88c780f00.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/600431e5-05a0-47f5-b429-9141ebd817b9.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/6deeba91-1221-43ac-a2a0-2dc16bdaed1a.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/58a44ed4-2500-4553-9539-801abf929ecc.root -file:/local/joosep/cms/store/mc/RunIII2024Summer24GS/MinBias_TuneCP5_13p6TeV-pythia8/GEN-SIM/140X_mcRun3_2024_realistic_v20-v1/120000/02813942-aba2-41a6-9427-31db9b0374cf.root diff --git a/mlpf/data/cms/run_gen.sh b/mlpf/data/cms/run_gen.sh deleted file mode 100755 index 23a0dd29c..000000000 --- a/mlpf/data/cms/run_gen.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -#seq 1 10 | parallel -j10 ./genjob_pu.sh TTbar_14TeV_TuneCUETP8M1_cfi {} -#seq 1 10 | parallel -j10 ./genjob_pu.sh ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi {} -#seq 1 100 | parallel -j12 ./genjob.sh SingleElectronFlatPt1To100_pythia8_cfi {} -#seq 1 100 | parallel -j12 ./genjob.sh SinglePiFlatPt0p7To10_cfi {} -#seq 1 100 | parallel -j12 ./genjob.sh SingleTauFlatPt2To150_cfi {} - -#./genjob.sh SingleNeutronFlatPt0p7To1000_cfi 1 & -#./genjob.sh SingleProtonPlusFlatPt0p7To1000_cfi 1 & -#./genjob.sh SingleProtonMinusFlatPt0p7To1000_cfi 1 & -#./genjob.sh SinglePiPlusFlatPt0p7To1000_cfi 1 & -#./genjob.sh SinglePiMinusFlatPt0p7To1000_cfi 1 & -#./genjob.sh SingleGammaFlatPt1To1000_pythia8_cfi 1 & -#./genjob.sh SingleElectronFlatPt1To1000_pythia8_cfi 1 -#./genjob.sh SingleTauFlatPt1To1000_cfi 1 & -#./genjob.sh SinglePi0Pt1To1000_pythia8_cfi 1 & - -./genjob_pu.sh TTbar_14TeV_TuneCUETP8M1_cfi 1 -./genjob_pu.sh ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi 1 -#./genjob_pu.sh TTbar_14TeV_TuneCUETP8M1_cfi 2 diff --git a/mlpf/data/key4hep/gen b/mlpf/data/key4hep/gen new file mode 160000 index 000000000..819e4e9e8 --- /dev/null +++ b/mlpf/data/key4hep/gen @@ -0,0 +1 @@ +Subproject commit 819e4e9e87f0087167ca5257cee2d1c3b085f9a5 diff --git a/mlpf/data/key4hep/postprocessing.py b/mlpf/data/key4hep/postprocessing.py index e4b9fc9a1..31df4400a 100644 --- a/mlpf/data/key4hep/postprocessing.py +++ b/mlpf/data/key4hep/postprocessing.py @@ -125,8 +125,7 @@ def weighted_avg_and_std(values, weights): def track_pt(omega): a = 3 * 10**-4 - b = 4 # B-field in tesla, from clicRec_e4h_input - + b = 4 # B-field in tesla return a * np.abs(b / omega) @@ -233,7 +232,7 @@ def hits_to_features(hit_data, iev, coll, feats): return awkward.Record(feat_arr) -def get_calohit_matrix_and_genadj(dataset, hit_data, calohit_links, iev, collectionIDs): +def get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs): feats = ["type", "cellID", "energy", "energyError", "time", "position.x", "position.y", "position.z"] hit_idx_global = 0 @@ -253,18 +252,10 @@ def get_calohit_matrix_and_genadj(dataset, hit_data, calohit_links, iev, collect # add all edges from genparticle to calohit calohit_to_gen_weight = calohit_links["CalohitMCTruthLink.weight"][iev] - if dataset == "clic": - calohit_to_gen_calo_colid = calohit_links["CalohitMCTruthLink#0.collectionID"][iev] - calohit_to_gen_gen_colid = calohit_links["CalohitMCTruthLink#1.collectionID"][iev] - calohit_to_gen_calo_idx = calohit_links["CalohitMCTruthLink#0.index"][iev] - calohit_to_gen_gen_idx = calohit_links["CalohitMCTruthLink#1.index"][iev] - elif dataset == "fcc": - calohit_to_gen_calo_colid = calohit_links["_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.collectionID"][iev] - calohit_to_gen_gen_colid = calohit_links["_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.collectionID"][iev] - calohit_to_gen_calo_idx = calohit_links["_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.index"][iev] - calohit_to_gen_gen_idx = calohit_links["_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.index"][iev] - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + calohit_to_gen_calo_colid = calohit_links["_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.collectionID"][iev] + calohit_to_gen_gen_colid = calohit_links["_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.collectionID"][iev] + calohit_to_gen_calo_idx = calohit_links["_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.index"][iev] + calohit_to_gen_gen_idx = calohit_links["_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.index"][iev] genparticle_to_hit_matrix_coo0 = [] genparticle_to_hit_matrix_coo1 = [] @@ -291,20 +282,12 @@ def get_calohit_matrix_and_genadj(dataset, hit_data, calohit_links, iev, collect ) -def hit_cluster_adj(dataset, prop_data, hit_idx_local_to_global, iev): +def hit_cluster_adj(prop_data, hit_idx_local_to_global, iev): - if dataset == "clic": - coll_arr = prop_data["PandoraClusters#1"]["PandoraClusters#1.collectionID"][iev] - idx_arr = prop_data["PandoraClusters#1"]["PandoraClusters#1.index"][iev] - hits_begin = prop_data["PandoraClusters"]["PandoraClusters.hits_begin"][iev] - hits_end = prop_data["PandoraClusters"]["PandoraClusters.hits_end"][iev] - elif dataset == "fcc": - coll_arr = prop_data["_PandoraClusters_hits/_PandoraClusters_hits.collectionID"][iev] - idx_arr = prop_data["_PandoraClusters_hits/_PandoraClusters_hits.index"][iev] - hits_begin = prop_data["PandoraClusters"]["PandoraClusters.hits_begin"][iev] - hits_end = prop_data["PandoraClusters"]["PandoraClusters.hits_end"][iev] - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + coll_arr = prop_data["_PandoraClusters_hits/_PandoraClusters_hits.collectionID"][iev] + idx_arr = prop_data["_PandoraClusters_hits/_PandoraClusters_hits.index"][iev] + hits_begin = prop_data["PandoraClusters"]["PandoraClusters.hits_begin"][iev] + hits_end = prop_data["PandoraClusters"]["PandoraClusters.hits_end"][iev] # index in the array of all hits hit_to_cluster_matrix_coo0 = [] @@ -331,14 +314,9 @@ def hit_cluster_adj(dataset, prop_data, hit_idx_local_to_global, iev): return hit_to_cluster_matrix_coo0, hit_to_cluster_matrix_coo1, hit_to_cluster_matrix_w -def gen_to_features(dataset, prop_data, iev): +def gen_to_features(prop_data, iev): - if dataset == "clic": - gen_arr = prop_data[iev] - elif dataset == "fcc": - gen_arr = prop_data[mc_coll][iev] - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + gen_arr = prop_data[mc_coll][iev] gen_arr = {k.replace(mc_coll + ".", ""): gen_arr[k] for k in gen_arr.fields} @@ -374,27 +352,15 @@ def gen_to_features(dataset, prop_data, iev): "daughters_end": gen_arr["daughters_end"], } - if dataset == "clic": - ret["index"] = prop_data["MCParticles#1.index"][iev] - elif dataset == "fcc": - ret["index"] = prop_data["_MCParticles_daughters/_MCParticles_daughters.index"][iev] - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + ret["index"] = prop_data["_MCParticles_daughters/_MCParticles_daughters.index"][iev] return ret -def genparticle_track_adj(dataset, sitrack_links, iev): - - if dataset == "clic": - trk_to_gen_trkidx = sitrack_links["SiTracksMCTruthLink#0.index"][iev] - trk_to_gen_genidx = sitrack_links["SiTracksMCTruthLink#1.index"][iev] - elif dataset == "fcc": - trk_to_gen_trkidx = sitrack_links["_SiTracksMCTruthLink_from/_SiTracksMCTruthLink_from.index"][iev] - trk_to_gen_genidx = sitrack_links["_SiTracksMCTruthLink_to/_SiTracksMCTruthLink_to.index"][iev] - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") +def genparticle_track_adj(sitrack_links, iev): + trk_to_gen_trkidx = sitrack_links["_SiTracksMCTruthLink_from/_SiTracksMCTruthLink_from.index"][iev] + trk_to_gen_genidx = sitrack_links["_SiTracksMCTruthLink_to/_SiTracksMCTruthLink_to.index"][iev] trk_to_gen_w = sitrack_links["SiTracksMCTruthLink.weight"][iev] genparticle_to_track_matrix_coo0 = awkward.to_numpy(trk_to_gen_genidx) @@ -476,47 +442,38 @@ def cluster_to_features(prop_data, hit_features, hit_to_cluster, iev): return awkward.Record(ret) -def track_to_features(dataset, prop_data, iev): - if dataset == "clic": - track_arr = prop_data[track_coll][iev] - feats_from_track = ["type", "chi2", "ndf", "dEdx", "dEdxError", "radiusOfInnermostHit"] - ret = {feat: track_arr[track_coll + "." + feat] for feat in feats_from_track} - - elif dataset == "fcc": - track_arr = prop_data[track_coll][iev] - # the following are needed since they are no longer defined under SiTracks_Refitted - track_arr_dQdx = prop_data["SiTracks_Refitted_dQdx"][iev] - track_arr_trackStates = prop_data["_SiTracks_Refitted_trackStates"][iev] +def track_to_features(prop_data, iev): + track_arr = prop_data[track_coll][iev] + # the following are needed since they are no longer defined under SiTracks_Refitted + track_arr_dQdx = prop_data["SiTracks_Refitted_dQdx"][iev] + track_arr_trackStates = prop_data["_SiTracks_Refitted_trackStates"][iev] - feats_from_track = ["type", "chi2", "ndf"] - ret = {feat: track_arr[track_coll + "." + feat] for feat in feats_from_track} + feats_from_track = ["type", "chi2", "ndf"] + ret = {feat: track_arr[track_coll + "." + feat] for feat in feats_from_track} - ret["dEdx"] = track_arr_dQdx["SiTracks_Refitted_dQdx.dQdx.value"] - ret["dEdxError"] = track_arr_dQdx["SiTracks_Refitted_dQdx.dQdx.error"] + ret["dEdx"] = track_arr_dQdx["SiTracks_Refitted_dQdx.dQdx.value"] + ret["dEdxError"] = track_arr_dQdx["SiTracks_Refitted_dQdx.dQdx.error"] - # build the radiusOfInnermostHit variable - num_tracks = len(ret["dEdx"]) - innermost_radius = [] - for itrack in range(num_tracks): + # build the radiusOfInnermostHit variable + num_tracks = len(ret["dEdx"]) + innermost_radius = [] + for itrack in range(num_tracks): - # select the track states corresponding to itrack - # pick the state AtFirstHit - # https://github.com/key4hep/EDM4hep/blob/fe5a54046a91a7e648d0b588960db7841aebc670/edm4hep.yaml#L220 - ibegin = track_arr[track_coll + "." + "trackStates_begin"][itrack] - iend = track_arr[track_coll + "." + "trackStates_end"][itrack] + # select the track states corresponding to itrack + # pick the state AtFirstHit + # https://github.com/key4hep/EDM4hep/blob/fe5a54046a91a7e648d0b588960db7841aebc670/edm4hep.yaml#L220 + ibegin = track_arr[track_coll + "." + "trackStates_begin"][itrack] + iend = track_arr[track_coll + "." + "trackStates_end"][itrack] - refX = track_arr_trackStates["_SiTracks_Refitted_trackStates" + "." + "referencePoint.x"][ibegin:iend] - refY = track_arr_trackStates["_SiTracks_Refitted_trackStates" + "." + "referencePoint.y"][ibegin:iend] - location = track_arr_trackStates["_SiTracks_Refitted_trackStates" + "." + "location"][ibegin:iend] + refX = track_arr_trackStates["_SiTracks_Refitted_trackStates" + "." + "referencePoint.x"][ibegin:iend] + refY = track_arr_trackStates["_SiTracks_Refitted_trackStates" + "." + "referencePoint.y"][ibegin:iend] + location = track_arr_trackStates["_SiTracks_Refitted_trackStates" + "." + "location"][ibegin:iend] - istate = np.argmax(location == 2) # 2 refers to AtFirstHit + istate = np.argmax(location == 2) # 2 refers to AtFirstHit - innermost_radius.append(math.sqrt(refX[istate] ** 2 + refY[istate] ** 2)) + innermost_radius.append(math.sqrt(refX[istate] ** 2 + refY[istate] ** 2)) - ret["radiusOfInnermostHit"] = np.array(innermost_radius) - - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + ret["radiusOfInnermostHit"] = np.array(innermost_radius) n_tr = len(ret["type"]) @@ -524,14 +481,7 @@ def track_to_features(dataset, prop_data, iev): trackstate_idx = prop_data[track_coll][track_coll + ".trackStates_begin"][iev] # get the properties of the track at the first track state (at the origin) for k in ["tanLambda", "D0", "phi", "omega", "Z0", "time"]: - - if dataset == "clic": - ret[k] = awkward.to_numpy(prop_data["SiTracks_1"]["SiTracks_1." + k][iev][trackstate_idx]) - elif dataset == "fcc": - ret[k] = awkward.to_numpy(prop_data["_SiTracks_Refitted_trackStates"]["_SiTracks_Refitted_trackStates." + k][iev][trackstate_idx]) - - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + ret[k] = awkward.to_numpy(prop_data["_SiTracks_Refitted_trackStates"]["_SiTracks_Refitted_trackStates." + k][iev][trackstate_idx]) ret["pt"] = awkward.to_numpy(track_pt(ret["omega"])) ret["px"] = awkward.to_numpy(np.cos(ret["phi"])) * ret["pt"] @@ -615,18 +565,18 @@ def add_daughters_to_status1(gen_features, genparticle_to_hit, genparticle_to_tr return genparticle_to_hit, genparticle_to_trk -def get_genparticles_and_adjacencies(dataset, prop_data, hit_data, calohit_links, sitrack_links, iev, collectionIDs): - gen_features = gen_to_features(dataset, prop_data, iev) - hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj(dataset, hit_data, calohit_links, iev, collectionIDs) - hit_to_cluster = hit_cluster_adj(dataset, prop_data, hit_idx_local_to_global, iev) +def get_genparticles_and_adjacencies(prop_data, hit_data, calohit_links, sitrack_links, iev, collectionIDs): + gen_features = gen_to_features(prop_data, iev) + hit_features, genparticle_to_hit, hit_idx_local_to_global = get_calohit_matrix_and_genadj(hit_data, calohit_links, iev, collectionIDs) + hit_to_cluster = hit_cluster_adj(prop_data, hit_idx_local_to_global, iev) cluster_features = cluster_to_features(prop_data, hit_features, hit_to_cluster, iev) - track_features = track_to_features(dataset, prop_data, iev) - genparticle_to_trk = genparticle_track_adj(dataset, sitrack_links, iev) + track_features = track_to_features(prop_data, iev) + genparticle_to_trk = genparticle_track_adj(sitrack_links, iev) # collect hits of st=1 daughters to the st=1 particles mask_status1 = gen_features["generatorStatus"] == 1 - if gen_features["index"] is not None: # if there are even daughters + if gen_features["index"] is not None: # if there are daughters genparticle_to_hit, genparticle_to_trk = add_daughters_to_status1(gen_features, genparticle_to_hit, genparticle_to_trk) n_gp = awkward.count(gen_features["PDG"]) @@ -867,16 +817,10 @@ def get_recoptcl_to_obj(n_rps, reco_arr, idx_rp_to_track, idx_rp_to_cluster): return track_to_rp, cluster_to_rp -def get_reco_properties(dataset, prop_data, iev): +def get_reco_properties(prop_data, iev): - if dataset == "clic": - reco_arr = prop_data["MergedRecoParticles"][iev] - reco_arr = {k.replace("MergedRecoParticles.", ""): reco_arr[k] for k in reco_arr.fields} - elif dataset == "fcc": - reco_arr = prop_data["PandoraPFOs"][iev] - reco_arr = {k.replace("PandoraPFOs.", ""): reco_arr[k] for k in reco_arr.fields} - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + reco_arr = prop_data["PandoraPFOs"][iev] + reco_arr = {k.replace("PandoraPFOs.", ""): reco_arr[k] for k in reco_arr.fields} reco_p4 = vector.awk( awkward.zip({"mass": reco_arr["mass"], "x": reco_arr["momentum.x"], "y": reco_arr["momentum.y"], "z": reco_arr["momentum.z"]}) @@ -886,12 +830,7 @@ def get_reco_properties(dataset, prop_data, iev): reco_arr["phi"] = reco_p4.phi reco_arr["energy"] = reco_p4.energy - if dataset == "clic": - msk = reco_arr["type"] != 0 - elif dataset == "fcc": - msk = reco_arr["PDG"] != 0 - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + msk = reco_arr["PDG"] != 0 reco_arr = awkward.Record({k: reco_arr[k][msk] for k in reco_arr.keys()}) return reco_arr @@ -959,7 +898,7 @@ def compute_jets(particles_p4, min_pt=jet_ptcut, with_indices=False): return ret -def process_one_file(fn, ofn, dataset): +def process_one_file(fn, ofn): # output exists, do not recreate if os.path.isfile(ofn): @@ -970,132 +909,74 @@ def process_one_file(fn, ofn, dataset): fi = uproot.open(fn) arrs = fi["events"] - if dataset == "clic": - collectionIDs = { - k: v - for k, v in zip( - fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_names"][0], - fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_collectionIDs"][0], - ) - } - prop_data = arrs.arrays( - [ - "MCParticles.PDG", - "MCParticles.momentum.x", - "MCParticles.momentum.y", - "MCParticles.momentum.z", - "MCParticles.mass", - "MCParticles.charge", - "MCParticles.generatorStatus", - "MCParticles.simulatorStatus", - "MCParticles.daughters_begin", - "MCParticles.daughters_end", - "MCParticles#1.index", - track_coll, - "SiTracks_1", - "PandoraClusters", - "PandoraClusters#1", - "PandoraClusters#0", - "MergedRecoParticles", - ] - ) - calohit_links = arrs.arrays( - [ - "CalohitMCTruthLink.weight", - "CalohitMCTruthLink#0.index", - "CalohitMCTruthLink#0.collectionID", - "CalohitMCTruthLink#1.index", - "CalohitMCTruthLink#1.collectionID", - ] - ) - sitrack_links = arrs.arrays( - [ - "SiTracksMCTruthLink.weight", - "SiTracksMCTruthLink#0.index", - "SiTracksMCTruthLink#0.collectionID", - "SiTracksMCTruthLink#1.index", - "SiTracksMCTruthLink#1.collectionID", - ] - ) - # maps the recoparticle track/cluster index (in tracks_begin,end and clusters_begin,end) - # to the index in the track/cluster collection - idx_rp_to_cluster = arrs["MergedRecoParticles#0/MergedRecoParticles#0.index"].array() - idx_rp_to_track = arrs["MergedRecoParticles#1/MergedRecoParticles#1.index"].array() - - hit_data = { - "ECALBarrel": arrs["ECALBarrel"].array(), - "ECALEndcap": arrs["ECALEndcap"].array(), - "ECALOther": arrs["ECALOther"].array(), - "HCALBarrel": arrs["HCALBarrel"].array(), - "HCALEndcap": arrs["HCALEndcap"].array(), - "HCALOther": arrs["HCALOther"].array(), - "MUON": arrs["MUON"].array(), - } - elif dataset == "fcc": - collectionIDs = { - k: v - for k, v in zip( - fi.get("podio_metadata").arrays("events___idTable/m_names")["events___idTable/m_names"][0], - fi.get("podio_metadata").arrays("events___idTable/m_collectionIDs")["events___idTable/m_collectionIDs"][0], - ) - } - prop_data = arrs.arrays( - [ - mc_coll, - "MCParticles.PDG", - "MCParticles.momentum.x", - "MCParticles.momentum.y", - "MCParticles.momentum.z", - "MCParticles.mass", - "MCParticles.charge", - "MCParticles.generatorStatus", - "MCParticles.simulatorStatus", - "MCParticles.daughters_begin", - "MCParticles.daughters_end", - "_MCParticles_daughters/_MCParticles_daughters.index", # similar to "MCParticles#1.index" in clic - track_coll, - "_SiTracks_Refitted_trackStates", - "PandoraClusters", - "_PandoraClusters_hits/_PandoraClusters_hits.index", - "_PandoraClusters_hits/_PandoraClusters_hits.collectionID", - "PandoraPFOs", - "SiTracks_Refitted_dQdx", - ] - ) - calohit_links = arrs.arrays( - [ - "CalohitMCTruthLink.weight", - "_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.collectionID", - "_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.index", - "_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.collectionID", - "_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.index", - ] - ) - sitrack_links = arrs.arrays( - [ - "SiTracksMCTruthLink.weight", - "_SiTracksMCTruthLink_to/_SiTracksMCTruthLink_to.collectionID", - "_SiTracksMCTruthLink_to/_SiTracksMCTruthLink_to.index", - "_SiTracksMCTruthLink_from/_SiTracksMCTruthLink_from.collectionID", - "_SiTracksMCTruthLink_from/_SiTracksMCTruthLink_from.index", - ] + collectionIDs = { + k: v + for k, v in zip( + fi.get("podio_metadata").arrays("events___CollectionTypeInfo/events___CollectionTypeInfo.name")[ + "events___CollectionTypeInfo/events___CollectionTypeInfo.name" + ][0], + fi.get("podio_metadata").arrays("events___CollectionTypeInfo/events___CollectionTypeInfo.collectionID")[ + "events___CollectionTypeInfo/events___CollectionTypeInfo.collectionID" + ][0], ) + } - # maps the recoparticle track/cluster index (in tracks_begin,end and clusters_begin,end) - # to the index in the track/cluster collection - idx_rp_to_cluster = arrs["_PandoraPFOs_clusters/_PandoraPFOs_clusters.index"].array() - idx_rp_to_track = arrs["_PandoraPFOs_tracks/_PandoraPFOs_tracks.index"].array() - - hit_data = { - "ECALBarrel": arrs["ECALBarrel"].array(), - "ECALEndcap": arrs["ECALEndcap"].array(), - "HCALBarrel": arrs["HCALBarrel"].array(), - "HCALEndcap": arrs["HCALEndcap"].array(), - "HCALOther": arrs["HCALOther"].array(), - "MUON": arrs["MUON"].array(), - } - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + prop_data = arrs.arrays( + [ + mc_coll, + "MCParticles.PDG", + "MCParticles.momentum.x", + "MCParticles.momentum.y", + "MCParticles.momentum.z", + "MCParticles.mass", + "MCParticles.charge", + "MCParticles.generatorStatus", + "MCParticles.simulatorStatus", + "MCParticles.daughters_begin", + "MCParticles.daughters_end", + "_MCParticles_daughters/_MCParticles_daughters.index", + track_coll, + "_SiTracks_Refitted_trackStates", + "PandoraClusters", + "_PandoraClusters_hits/_PandoraClusters_hits.index", + "_PandoraClusters_hits/_PandoraClusters_hits.collectionID", + "PandoraPFOs", + "SiTracks_Refitted_dQdx", + ] + ) + calohit_links = arrs.arrays( + [ + "CalohitMCTruthLink.weight", + "_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.collectionID", + "_CalohitMCTruthLink_to/_CalohitMCTruthLink_to.index", + "_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.collectionID", + "_CalohitMCTruthLink_from/_CalohitMCTruthLink_from.index", + ] + ) + sitrack_links = arrs.arrays( + [ + "SiTracksMCTruthLink.weight", + "_SiTracksMCTruthLink_to/_SiTracksMCTruthLink_to.collectionID", + "_SiTracksMCTruthLink_to/_SiTracksMCTruthLink_to.index", + "_SiTracksMCTruthLink_from/_SiTracksMCTruthLink_from.collectionID", + "_SiTracksMCTruthLink_from/_SiTracksMCTruthLink_from.index", + ] + ) + + # maps the recoparticle track/cluster index (in tracks_begin,end and clusters_begin,end) + # to the index in the track/cluster collection + idx_rp_to_cluster = arrs["_PandoraPFOs_clusters/_PandoraPFOs_clusters.index"].array() + idx_rp_to_track = arrs["_PandoraPFOs_tracks/_PandoraPFOs_tracks.index"].array() + + hit_data = { + "ECALOther": arrs["ECALOther"].array(), + "ECALBarrel": arrs["ECALBarrel"].array(), + "ECALEndcap": arrs["ECALEndcap"].array(), + "HCALBarrel": arrs["HCALBarrel"].array(), + "HCALEndcap": arrs["HCALEndcap"].array(), + "HCALOther": arrs["HCALOther"].array(), + "MUON": arrs["MUON"].array(), + } # Compute truth MET and jets from status=1 pythia particles mc_pdg = np.abs(prop_data["MCParticles.PDG"]) @@ -1117,14 +998,9 @@ def process_one_file(fn, ofn, dataset): for iev in tqdm.tqdm(range(arrs.num_entries), total=arrs.num_entries): # get the reco particles - reco_arr = get_reco_properties(dataset, prop_data, iev) + reco_arr = get_reco_properties(prop_data, iev) - if dataset == "clic": - reco_type = np.abs(reco_arr["type"]) - elif dataset == "fcc": - reco_type = np.abs(reco_arr["PDG"]) - else: - raise Exception("--dataset provided is not supported. Only 'fcc' or 'clic' are supported atm.") + reco_type = np.abs(reco_arr["PDG"]) n_rps = len(reco_type) reco_features = awkward.Record( @@ -1147,7 +1023,6 @@ def process_one_file(fn, ofn, dataset): # get the genparticles and the links between genparticles and tracks/clusters gpdata = get_genparticles_and_adjacencies( - dataset, prop_data, hit_data, calohit_links, @@ -1280,7 +1155,6 @@ def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--input", type=str, help="Input file ROOT file", required=True) parser.add_argument("--outpath", type=str, default="raw", help="output path") - parser.add_argument("--dataset", type=str, help="Which detector dataset?", required=True, choices=["clic", "fcc"]) args = parser.parse_args() return args @@ -1294,11 +1168,11 @@ def process(args): flist = glob.glob(args.input + "/*.root") for infile in flist: outfile = os.path.join(args.outpath, os.path.basename(infile).split(".")[0] + ".parquet") - process_one_file(infile, outfile, args.dataset) + process_one_file(infile, outfile) else: infile = args.input outfile = os.path.join(args.outpath, os.path.basename(infile).split(".")[0] + ".parquet") - process_one_file(infile, outfile, args.dataset) + process_one_file(infile, outfile) if __name__ == "__main__": diff --git a/mlpf/heptfds/clic_pf_edm4hep/ttbar_pu10.py b/mlpf/heptfds/cld_pf_edm4hep/qq.py similarity index 66% rename from mlpf/heptfds/clic_pf_edm4hep/ttbar_pu10.py rename to mlpf/heptfds/cld_pf_edm4hep/qq.py index e511bf064..8eb8f3f81 100644 --- a/mlpf/heptfds/clic_pf_edm4hep/ttbar_pu10.py +++ b/mlpf/heptfds/cld_pf_edm4hep/qq.py @@ -1,7 +1,10 @@ from pathlib import Path +import os import numpy as np +import tensorflow_datasets as tfds from utils_edm import ( + NUM_SPLITS, X_FEATURES_CL, X_FEATURES_TRK, Y_FEATURES, @@ -9,40 +12,36 @@ split_sample, ) -import tensorflow_datasets as tfds - _DESCRIPTION = """ -CLIC EDM4HEP dataset with ee -> ttbar + PU10 at 380 GeV. -PU is generated with ee->gg, overlaying random events from Poisson(10). +CLD EDM4HEP dataset with ee -> qq at 365 GeV. - X: reconstructed tracks and clusters, variable number N per event - ygen: stable generator particles, zero-padded to N per event - ycand: baseline particle flow particles, zero-padded to N per event """ _CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 +FIXME """ -class ClicEdmTtbarPu10Pf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.5.0") +class CldEdmQqPf(tfds.core.GeneratorBasedBuilder): + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "UNDEFINED")) RELEASE_NOTES = { - "1.3.0": "Update stats to ~1M events", - "1.4.0": "Fix ycand matching", - "1.5.0": "Regenerate with ARRAY_RECORD", + "2.6.0": "New generation with v1.2.2_key4hep_2025-05-29_CLD_3edac3", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ For the raw input files in ROOT EDM4HEP format, please see the citation above. The processed tensorflow_dataset can also be downloaded from: - rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep/ ./ + rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cld_edm4hep/ ./ """ + # create configs 1 ... NUM_SPLITS + 1 that allow to parallelize the dataset building + BUILDER_CONFIGS = [tfds.core.BuilderConfig(name=str(group)) for group in range(1, NUM_SPLITS + 1)] + def __init__(self, *args, **kwargs): kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmTtbarPu10Pf, self).__init__(*args, **kwargs) + super(CldEdmQqPf, self).__init__(*args, **kwargs) def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" @@ -58,8 +57,11 @@ def _info(self) -> tfds.core.DatasetInfo: ), dtype=np.float32, ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), + "ytarget": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), + "genmet": tfds.features.Scalar(dtype=np.float32), + "genjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), + "targetjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), } ), supervised_keys=None, @@ -74,7 +76,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): path = dl_manager.manual_dir - return split_sample(Path(path / "p8_ee_tt_ecm380_PU10/")) + return split_sample(Path(path / "p8_ee_qq_ecm365"), self.builder_config, num_splits=NUM_SPLITS) def _generate_examples(self, files): return generate_examples(files) diff --git a/mlpf/heptfds/cld_pf_edm4hep/ttbar.py b/mlpf/heptfds/cld_pf_edm4hep/ttbar.py index 3b94be629..5c5c84f39 100644 --- a/mlpf/heptfds/cld_pf_edm4hep/ttbar.py +++ b/mlpf/heptfds/cld_pf_edm4hep/ttbar.py @@ -1,5 +1,6 @@ from pathlib import Path +import os import numpy as np import tensorflow_datasets as tfds from utils_edm import ( @@ -24,11 +25,12 @@ class CldEdmTtbarPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("2.5.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "UNDEFINED")) RELEASE_NOTES = { "2.0.0": "Initial release", "2.3.0": "Fix target/truth momentum, st=1 more inclusive: PR352", "2.5.0": "Use 10 splits, skip 2.4.0 to unify with CMS datasets", + "2.6.0": "New generation with v1.2.2_key4hep_2025-05-29_CLD_3edac3", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ For the raw input files in ROOT EDM4HEP format, please see the citation above. @@ -77,7 +79,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): path = dl_manager.manual_dir - return split_sample(Path(path / "p8_ee_tt_ecm365"), self.builder_config, num_splits=NUM_SPLITS) + return split_sample(Path(path / "p8_ee_ttbar_ecm365"), self.builder_config, num_splits=NUM_SPLITS) def _generate_examples(self, files): return generate_examples(files) diff --git a/mlpf/heptfds/cld_pf_edm4hep/utils_edm.py b/mlpf/heptfds/cld_pf_edm4hep/utils_edm.py index 9cf0828e1..447052a36 100644 --- a/mlpf/heptfds/cld_pf_edm4hep/utils_edm.py +++ b/mlpf/heptfds/cld_pf_edm4hep/utils_edm.py @@ -90,6 +90,8 @@ def split_sample(path, builder_config, num_splits=NUM_SPLITS, test_frac=0.9): split_index = int(builder_config.name) - 1 files_train_split = split_list(files_train, num_splits) files_test_split = split_list(files_test, num_splits) + assert len(files_train_split[split_index]) > 0 + assert len(files_test_split[split_index]) > 0 return { "train": generate_examples(files_train_split[split_index]), @@ -208,6 +210,7 @@ def prepare_data_clic(fn): def generate_examples(files): for fi in files: Xs, ytargets, ycands, genmets, genjets, targetjets = prepare_data_clic(fi) + print(fi, [len(x) for x in Xs]) for iev in range(len(Xs)): gm = genmets[iev][0] gj = genjets[iev] diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar.py b/mlpf/heptfds/cld_pf_edm4hep/ww.py similarity index 61% rename from mlpf/heptfds/clic_pf_edm4hep_hits/ttbar.py rename to mlpf/heptfds/cld_pf_edm4hep/ww.py index 5f837b5a4..9266b62e2 100644 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar.py +++ b/mlpf/heptfds/cld_pf_edm4hep/ww.py @@ -1,50 +1,47 @@ from pathlib import Path +import os import numpy as np +import tensorflow_datasets as tfds from utils_edm import ( - X_FEATURES_CH, + NUM_SPLITS, + X_FEATURES_CL, X_FEATURES_TRK, Y_FEATURES, generate_examples, split_sample, ) -import tensorflow_datasets as tfds - _DESCRIPTION = """ -CLIC EDM4HEP dataset with ttbar with raw hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event +CLD EDM4HEP dataset with ee -> WW at 365 GeV. + - X: reconstructed tracks and clusters, variable number N per event - ygen: stable generator particles, zero-padded to N per event - ycand: baseline particle flow particles, zero-padded to N per event """ _CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 +FIXME """ -class ClicEdmTtbarHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") +class CldEdmWwPf(tfds.core.GeneratorBasedBuilder): + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "UNDEFINED")) RELEASE_NOTES = { - "0.9.0": "Small stats", - "1.0.0": "Initial release", - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", + "2.6.0": "New generation with v1.2.2_key4hep_2025-05-29_CLD_3edac3", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ For the raw input files in ROOT EDM4HEP format, please see the citation above. The processed tensorflow_dataset can also be downloaded from: - FIXME + rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cld_edm4hep/ ./ """ + # create configs 1 ... NUM_SPLITS + 1 that allow to parallelize the dataset building + BUILDER_CONFIGS = [tfds.core.BuilderConfig(name=str(group)) for group in range(1, NUM_SPLITS + 1)] + def __init__(self, *args, **kwargs): kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmTtbarHitsPf, self).__init__(*args, **kwargs) + super(CldEdmWwPf, self).__init__(*args, **kwargs) def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" @@ -56,12 +53,15 @@ def _info(self) -> tfds.core.DatasetInfo: "X": tfds.features.Tensor( shape=( None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), + max(len(X_FEATURES_TRK), len(X_FEATURES_CL)), ), dtype=np.float32, ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), + "ytarget": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), + "genmet": tfds.features.Scalar(dtype=np.float32), + "genjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), + "targetjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), } ), supervised_keys=None, @@ -69,14 +69,14 @@ def _info(self) -> tfds.core.DatasetInfo: citation=_CITATION, metadata=tfds.core.MetadataDict( x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, + x_features_cluster=X_FEATURES_CL, y_features=Y_FEATURES, ), ) def _split_generators(self, dl_manager: tfds.download.DownloadManager): path = dl_manager.manual_dir - return split_sample(Path(path / "p8_ee_tt_ecm380/")) + return split_sample(Path(path / "p8_ee_WW_ecm365"), self.builder_config, num_splits=NUM_SPLITS) def _generate_examples(self, files): return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep/gamma.py b/mlpf/heptfds/cld_pf_edm4hep/zz.py similarity index 80% rename from mlpf/heptfds/clic_pf_edm4hep/gamma.py rename to mlpf/heptfds/cld_pf_edm4hep/zz.py index a94c286ba..428240de7 100644 --- a/mlpf/heptfds/clic_pf_edm4hep/gamma.py +++ b/mlpf/heptfds/cld_pf_edm4hep/zz.py @@ -1,5 +1,6 @@ from pathlib import Path +import os import numpy as np import tensorflow_datasets as tfds from utils_edm import ( @@ -12,29 +13,30 @@ ) _DESCRIPTION = """ -CLIC EDM4HEP dataset with single photon gun samples. +CLD EDM4HEP dataset with ee -> ZZ at 365 GeV. - X: reconstructed tracks and clusters, variable number N per event - ygen: stable generator particles, zero-padded to N per event - ycand: baseline particle flow particles, zero-padded to N per event """ _CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 +FIXME """ -class ClicEdmGamma(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("2.5.0") +class CldEdmZzPf(tfds.core.GeneratorBasedBuilder): + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "UNDEFINED")) RELEASE_NOTES = { + "2.0.0": "Initial release", + "2.3.0": "Fix target/truth momentum, st=1 more inclusive: PR352", "2.5.0": "Use 10 splits, skip 2.4.0 to unify with CMS datasets", + "2.6.0": "New generation with v1.2.2_key4hep_2025-05-29_CLD_3edac3", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ For the raw input files in ROOT EDM4HEP format, please see the citation above. The processed tensorflow_dataset can also be downloaded from: - rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/clic_edm4hep/ ./ + rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cld_edm4hep/ ./ """ # create configs 1 ... NUM_SPLITS + 1 that allow to parallelize the dataset building @@ -42,7 +44,7 @@ class ClicEdmGamma(tfds.core.GeneratorBasedBuilder): def __init__(self, *args, **kwargs): kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmGamma, self).__init__(*args, **kwargs) + super(CldEdmZzPf, self).__init__(*args, **kwargs) def _info(self) -> tfds.core.DatasetInfo: """Returns the dataset metadata.""" @@ -65,7 +67,8 @@ def _info(self) -> tfds.core.DatasetInfo: "targetjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), } ), - homepage="https://github.com/jpata/particleflow", + supervised_keys=None, + homepage="", citation=_CITATION, metadata=tfds.core.MetadataDict( x_features_track=X_FEATURES_TRK, @@ -76,7 +79,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): path = dl_manager.manual_dir - return split_sample(Path(path / "gamma//"), self.builder_config, num_splits=NUM_SPLITS) + return split_sample(Path(path / "p8_ee_ZZ_ecm365"), self.builder_config, num_splits=NUM_SPLITS) def _generate_examples(self, files): return generate_examples(files) diff --git a/mlpf/heptfds/cld_pf_edm4hep_hits/utils_edm.py b/mlpf/heptfds/cld_pf_edm4hep_hits/utils_edm.py deleted file mode 100644 index 517b085c5..000000000 --- a/mlpf/heptfds/cld_pf_edm4hep_hits/utils_edm.py +++ /dev/null @@ -1,172 +0,0 @@ -import awkward as ak -import numpy as np -import tqdm -import random - -# from fcc/postprocessing_hits.py -X_FEATURES_TRK = [ - "elemtype", - "pt", - "eta", - "sin_phi", - "cos_phi", - "p", - "chi2", - "ndf", - "dEdx", - "dEdxError", - "radiusOfInnermostHit", - "tanLambda", - "D0", - "omega", - "Z0", - "time", -] -X_FEATURES_CH = [ - "elemtype", - "et", - "eta", - "sin_phi", - "cos_phi", - "energy", - "position.x", - "position.y", - "position.z", - "time", - "subdetector", - "type", -] -X_FEAT_NUM = max(len(X_FEATURES_TRK), len(X_FEATURES_CH)) - -Y_FEATURES = ["PDG", "charge", "pt", "eta", "sin_phi", "cos_phi", "energy"] -labels = [0, 211, 130, 22, 11, 13] - - -def split_sample(path, test_frac=0.8, max_files=0): - files = sorted(list(path.glob("*.parquet"))) - if max_files > 0: - files = files[:max_files] - print("Found {} files in {}".format(len(files), path)) - assert len(files) > 0 - idx_split = int(test_frac * len(files)) - files_train = files[:idx_split] - files_test = files[idx_split:] - assert len(files_train) > 0 - assert len(files_test) > 0 - return { - "train": generate_examples(files_train), - "test": generate_examples(files_test), - } - - -def split_sample_several(paths, test_frac=0.8): - files = sum([list(path.glob("*.parquet")) for path in paths], []) - random.shuffle(files) - print("Found {} files".format(len(files))) - assert len(files) > 0 - idx_split = int(test_frac * len(files)) - files_train = files[:idx_split] - files_test = files[idx_split:] - assert len(files_train) > 0 - assert len(files_test) > 0 - return { - "train": generate_examples(files_train), - "test": generate_examples(files_test), - } - - -def prepare_data_cld(fn): - ret = ak.from_parquet(fn) - - X_track = ret["X_track"] - X_hit = ret["X_hit"] - tracks_assoc_mats = ret["gp_to_track"] - - assert len(X_track) == len(X_hit) - nev = len(X_track) - - Xs = [] - ygens = [] - ycands = [] - gp_to_tracks = [] - gp_to_hits = [] - for iev in range(nev): - - X1 = ak.to_numpy(X_track[iev]) - X2 = ak.to_numpy(X_hit[iev]) - - if len(X1) == 0 and len(X2) == 0: - continue - - ygen_track = ak.to_numpy(ret["ygen_track"][iev]) - ygen_hit = ak.to_numpy(ret["ygen_hit"][iev]) - ycand_track = ak.to_numpy(ret["ycand_track"][iev]) - ycand_hit = ak.to_numpy(ret["ycand_hit"][iev]) - - if tracks_assoc_mats is not None: - gp_to_track = ak.to_numpy(ret["gp_to_track"][iev]) - gp_to_calohit = ak.to_numpy(ret["gp_to_calohit"][iev]) - - gp_to_tracks.append(gp_to_track) - gp_to_hits.append(gp_to_calohit) - - if ygen_track.shape[0] == 0: - ygen_track = np.zeros((0, 7), dtype=np.float32) - if ycand_track.shape[0] == 0: - ycand_track = np.zeros((0, 7), dtype=np.float32) - if ygen_hit.shape[0] == 0: - ygen_hit = np.zeros((0, 7), dtype=np.float32) - if ycand_hit.shape[0] == 0: - ycand_hit = np.zeros((0, 7), dtype=np.float32) - - if len(ygen_track) == 0 and len(ygen_hit) == 0: - continue - if len(ycand_track) == 0 and len(ycand_hit) == 0: - continue - - # pad feature dim between tracks and hits to the same size - X1 = np.pad(X1, [[0, 0], [0, X_FEAT_NUM - X1.shape[1]]]) - X2 = np.pad(X2, [[0, 0], [0, X_FEAT_NUM - X2.shape[1]]]) - - # concatenate tracks and hits in features and targets - X = np.concatenate([X1, X2]) - ygen = np.concatenate([ygen_track, ygen_hit]) - ycand = np.concatenate([ycand_track, ycand_hit]) - assert ygen.shape[0] == X.shape[0] - assert ycand.shape[0] == X.shape[0] - - # replace PID with index in labels array - arr = np.array([labels.index(p) for p in ygen[:, 0]]) - ygen[:, 0][:] = arr[:] - arr = np.array([labels.index(p) for p in ycand[:, 0]]) - ycand[:, 0][:] = arr[:] - Xs.append(X) - ygens.append(ygen) - ycands.append(ycand) - - return Xs, ygens, ycands, gp_to_tracks, gp_to_hits - - -def generate_examples(files): - for fi in tqdm.tqdm(files): - Xs, ygens, ycands, gp_to_tracks, gp_to_hits = prepare_data_cld(fi) - for iev in range(len(Xs)): - if gp_to_tracks == []: - yield str(fi) + "_" + str(iev), { - "X": Xs[iev].astype(np.float32), - "ygen": ygens[iev].astype(np.float32), - "ycand": ycands[iev].astype(np.float32), - } - else: - yield str(fi) + "_" + str(iev), { - "X": Xs[iev].astype(np.float32), - "ygen": ygens[iev].astype(np.float32), - "ycand": ycands[iev].astype(np.float32), - "gp_to_tracks": gp_to_tracks[iev].astype(np.float32), - "gp_to_hits": gp_to_hits[iev].astype(np.float32), - } - - -if __name__ == "__main__": - fn = "/local/joosep/mlpf_hits/clic_edm4hep_2023_02_27/p8_ee_qq_ecm380/reco_p8_ee_qq_ecm380_111398.parquet" - ret = prepare_data_cld(fn) diff --git a/mlpf/heptfds/clic_pf_edm4hep/qq.py b/mlpf/heptfds/clic_pf_edm4hep/qq.py index ccd8817f3..6c78ff57f 100644 --- a/mlpf/heptfds/clic_pf_edm4hep/qq.py +++ b/mlpf/heptfds/clic_pf_edm4hep/qq.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import numpy as np @@ -20,7 +21,7 @@ class ClicEdmQqPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("2.5.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "1.0.0": "Initial release.", "1.1.0": "update stats, move to 380 GeV", @@ -34,6 +35,7 @@ class ClicEdmQqPf(tfds.core.GeneratorBasedBuilder): "2.2.0": "New target definition, fix truth jets, add targetjets and jet idx", "2.3.0": "Fix target/truth momentum, st=1 more inclusive: PR352", "2.5.0": "Use 10 splits, skip 2.4.0 to unify with CMS datasets", + "3.0.0": "New generation with v1.2.4_key4hep_2025-05-29_CLIC_819e4e", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ For the raw input files in ROOT EDM4HEP format, please see the citation above. diff --git a/mlpf/heptfds/clic_pf_edm4hep/ttbar.py b/mlpf/heptfds/clic_pf_edm4hep/ttbar.py index 5443bbe39..b82041d9e 100644 --- a/mlpf/heptfds/clic_pf_edm4hep/ttbar.py +++ b/mlpf/heptfds/clic_pf_edm4hep/ttbar.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import numpy as np @@ -20,7 +21,7 @@ class ClicEdmTtbarPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("2.5.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "1.0.0": "Initial release.", "1.1.0": "update stats, move to 380 GeV", @@ -33,6 +34,7 @@ class ClicEdmTtbarPf(tfds.core.GeneratorBasedBuilder): "2.2.0": "New target definition, fix truth jets, add targetjets and jet idx", "2.3.0": "Fix target/truth momentum, st=1 more inclusive: PR352", "2.5.0": "Use 10 splits, skip 2.4.0 to unify with CMS datasets", + "3.0.0": "New generation with v1.2.4_key4hep_2025-05-29_CLIC_819e4e", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ For the raw input files in ROOT EDM4HEP format, please see the citation above. @@ -80,7 +82,7 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): path = dl_manager.manual_dir - return split_sample(Path(path / "p8_ee_tt_ecm380/"), self.builder_config, num_splits=NUM_SPLITS) + return split_sample(Path(path / "p8_ee_ttbar_ecm380/"), self.builder_config, num_splits=NUM_SPLITS) def _generate_examples(self, files): return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py b/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py index 9245f14aa..aa8d4826d 100644 --- a/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py +++ b/mlpf/heptfds/clic_pf_edm4hep/ww_fullhad.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import numpy as np @@ -20,7 +21,7 @@ class ClicEdmWwFullhadPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("2.5.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "2.5.0")) RELEASE_NOTES = { "1.3.0": "Update stats to ~1M events", "1.4.0": "Fix ycand matching", @@ -29,6 +30,7 @@ class ClicEdmWwFullhadPf(tfds.core.GeneratorBasedBuilder): "2.2.0": "New target definition, fix truth jets, add targetjets and jet idx", "2.3.0": "Fix target/truth momentum, st=1 more inclusive: PR352", "2.5.0": "Use 10 splits, skip 2.4.0 to unify with CMS datasets", + "3.0.0": "New generation with v1.2.4_key4hep_2025-05-29_CLIC_819e4e", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ For the raw input files in ROOT EDM4HEP format, please see the citation above. diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/qq.py b/mlpf/heptfds/clic_pf_edm4hep_hits/qq.py deleted file mode 100644 index fbaf112a4..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/qq.py +++ /dev/null @@ -1,82 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with qq with raw calorimeter hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmQqHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "0.9.0": "Small stats", - "1.0.0": "Initial release", - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmQqHitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample(Path(path / "p8_ee_qq_ecm380/")) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/qq_10k.py b/mlpf/heptfds/clic_pf_edm4hep_hits/qq_10k.py deleted file mode 100644 index 29a721727..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/qq_10k.py +++ /dev/null @@ -1,66 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample, -) - -import tensorflow_datasets as tfds - -from qq import _DESCRIPTION, _CITATION - - -class ClicEdmQqHitsPf10k(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: https://zenodo.org/record/8414225 - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmQqHitsPf10k, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample(Path(path / "p8_ee_qq_ecm380/"), max_files=100) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/single_ele.py b/mlpf/heptfds/clic_pf_edm4hep_hits/single_ele.py deleted file mode 100644 index 04812410c..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/single_ele.py +++ /dev/null @@ -1,80 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample_several, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with single electron with raw calorimeter hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmSingleElectronHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticels", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmSingleElectronHitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample_several([Path(path / "e-/"), Path(path / "e+/")]) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/single_gamma.py b/mlpf/heptfds/clic_pf_edm4hep_hits/single_gamma.py deleted file mode 100644 index 43e6c6738..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/single_gamma.py +++ /dev/null @@ -1,80 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with single gamma with raw calorimeter hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmSingleGammaHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmSingleGammaHitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample(Path(path / "gamma/")) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/single_kaon0L.py b/mlpf/heptfds/clic_pf_edm4hep_hits/single_kaon0L.py deleted file mode 100644 index 89533e2a1..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/single_kaon0L.py +++ /dev/null @@ -1,80 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with single kaon0L with raw calorimeter hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmSingleKaon0lHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmSingleKaon0lHitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample(Path(path / "kaon0L/")) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/single_mu.py b/mlpf/heptfds/clic_pf_edm4hep_hits/single_mu.py deleted file mode 100644 index 03235a5df..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/single_mu.py +++ /dev/null @@ -1,80 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample_several, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with single muon with raw hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmSingleMuonHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmSingleMuonHitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample_several([Path(path / "mu-/"), Path(path / "mu+/")]) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/single_neutron.py b/mlpf/heptfds/clic_pf_edm4hep_hits/single_neutron.py deleted file mode 100644 index 046396aab..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/single_neutron.py +++ /dev/null @@ -1,80 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with single neutron with raw hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmSingleNeutronHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmSingleNeutronHitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample(Path(path / "neutron/")) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/single_pi.py b/mlpf/heptfds/clic_pf_edm4hep_hits/single_pi.py deleted file mode 100644 index cd12b49d1..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/single_pi.py +++ /dev/null @@ -1,80 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample_several, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with single pi- with raw hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmSinglePiHitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmSinglePiHitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample_several([Path(path / "pi-/"), Path(path / "pi+/")]) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/single_pi0.py b/mlpf/heptfds/clic_pf_edm4hep_hits/single_pi0.py deleted file mode 100644 index 92cf40cf2..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/single_pi0.py +++ /dev/null @@ -1,80 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample, -) - -import tensorflow_datasets as tfds - -_DESCRIPTION = """ -CLIC EDM4HEP dataset with single pi0 with raw hits. - - X: reconstructed tracks and calorimeter hits, variable number N per event - - ygen: stable generator particles, zero-padded to N per event - - ycand: baseline particle flow particles, zero-padded to N per event -""" - -_CITATION = """ -Pata, Joosep, Wulff, Eric, Duarte, Javier, Mokhtar, Farouk, Zhang, Mengke, Girone, Maria, & Southwick, David. (2023). -Simulated datasets for detector and particle flow reconstruction: CLIC detector (1.1) [Data set]. -Zenodo. https://doi.org/10.5281/zenodo.8260741 -""" - - -class ClicEdmSinglePi0HitsPf(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.1.0": "Remove track referencepoint feature", - "1.2.0": "Keep all interacting genparticles", - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow_dataset can also be downloaded from: - FIXME - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmSinglePi0HitsPf, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample(Path(path / "pi0/")) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar_10k.py b/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar_10k.py deleted file mode 100644 index 1d79c5f4d..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/ttbar_10k.py +++ /dev/null @@ -1,66 +0,0 @@ -from pathlib import Path - -import numpy as np -from utils_edm import ( - X_FEATURES_CH, - X_FEATURES_TRK, - Y_FEATURES, - generate_examples, - split_sample, -) - -import tensorflow_datasets as tfds - -from ttbar import _DESCRIPTION, _CITATION - - -class ClicEdmTtbarHitsPf10k(tfds.core.GeneratorBasedBuilder): - VERSION = tfds.core.Version("1.7.0") - RELEASE_NOTES = { - "1.5.0": "Regenerate with ARRAY_RECORD", - "1.7.0": "Update track features", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - For the raw input files in ROOT EDM4HEP format, please see the citation above. - - The processed tensorflow dataset can also be downloaded from: https://zenodo.org/record/8414225 - """ - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(ClicEdmTtbarHitsPf10k, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor( - shape=( - None, - max(len(X_FEATURES_TRK), len(X_FEATURES_CH)), - ), - dtype=np.float32, - ), - "ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - } - ), - supervised_keys=None, - homepage="", - citation=_CITATION, - metadata=tfds.core.MetadataDict( - x_features_track=X_FEATURES_TRK, - x_features_calohit=X_FEATURES_CH, - y_features=Y_FEATURES, - ), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - path = dl_manager.manual_dir - return split_sample(Path(path / "p8_ee_tt_ecm380/"), max_files=100) - - def _generate_examples(self, files): - return generate_examples(files) diff --git a/mlpf/heptfds/clic_pf_edm4hep_hits/utils_edm.py b/mlpf/heptfds/clic_pf_edm4hep_hits/utils_edm.py deleted file mode 100644 index 84309c462..000000000 --- a/mlpf/heptfds/clic_pf_edm4hep_hits/utils_edm.py +++ /dev/null @@ -1,151 +0,0 @@ -import awkward as ak -import numpy as np -import tqdm -import random - -# from fcc/postprocessing_hits.py -X_FEATURES_TRK = [ - "elemtype", - "pt", - "eta", - "sin_phi", - "cos_phi", - "p", - "chi2", - "ndf", - "dEdx", - "dEdxError", - "radiusOfInnermostHit", - "tanLambda", - "D0", - "omega", - "Z0", - "time", -] -X_FEATURES_CH = [ - "elemtype", - "et", - "eta", - "sin_phi", - "cos_phi", - "energy", - "position.x", - "position.y", - "position.z", - "time", - "subdetector", - "type", -] -X_FEAT_NUM = max(len(X_FEATURES_TRK), len(X_FEATURES_CH)) - -Y_FEATURES = ["PDG", "charge", "pt", "eta", "sin_phi", "cos_phi", "energy"] -labels = [0, 211, 130, 22, 11, 13] - - -def split_sample(path, test_frac=0.8, max_files=0): - files = sorted(list(path.glob("*.parquet"))) - if max_files > 0: - files = files[:max_files] - print("Found {} files in {}".format(len(files), path)) - assert len(files) > 0 - idx_split = int(test_frac * len(files)) - files_train = files[:idx_split] - files_test = files[idx_split:] - assert len(files_train) > 0 - assert len(files_test) > 0 - return { - "train": generate_examples(files_train), - "test": generate_examples(files_test), - } - - -def split_sample_several(paths, test_frac=0.8): - files = sum([list(path.glob("*.parquet")) for path in paths], []) - random.shuffle(files) - print("Found {} files".format(len(files))) - assert len(files) > 0 - idx_split = int(test_frac * len(files)) - files_train = files[:idx_split] - files_test = files[idx_split:] - assert len(files_train) > 0 - assert len(files_test) > 0 - return { - "train": generate_examples(files_train), - "test": generate_examples(files_test), - } - - -def prepare_data_clic(fn): - ret = ak.from_parquet(fn) - - X_track = ret["X_track"] - X_hit = ret["X_hit"] - - assert len(X_track) == len(X_hit) - nev = len(X_track) - - Xs = [] - ygens = [] - ycands = [] - for iev in range(nev): - - X1 = ak.to_numpy(X_track[iev]) - X2 = ak.to_numpy(X_hit[iev]) - - if len(X1) == 0 and len(X2) == 0: - continue - - ygen_track = ak.to_numpy(ret["ygen_track"][iev]) - ygen_hit = ak.to_numpy(ret["ygen_hit"][iev]) - ycand_track = ak.to_numpy(ret["ycand_track"][iev]) - ycand_hit = ak.to_numpy(ret["ycand_hit"][iev]) - if ygen_track.shape[0] == 0: - ygen_track = np.zeros((0, 7), dtype=np.float32) - if ycand_track.shape[0] == 0: - ycand_track = np.zeros((0, 7), dtype=np.float32) - if ygen_hit.shape[0] == 0: - ygen_hit = np.zeros((0, 7), dtype=np.float32) - if ycand_hit.shape[0] == 0: - ycand_hit = np.zeros((0, 7), dtype=np.float32) - - if len(ygen_track) == 0 and len(ygen_hit) == 0: - continue - if len(ycand_track) == 0 and len(ycand_hit) == 0: - continue - - # pad feature dim between tracks and hits to the same size - X1 = np.pad(X1, [[0, 0], [0, X_FEAT_NUM - X1.shape[1]]]) - X2 = np.pad(X2, [[0, 0], [0, X_FEAT_NUM - X2.shape[1]]]) - - # concatenate tracks and hits in features and targets - X = np.concatenate([X1, X2]) - ygen = np.concatenate([ygen_track, ygen_hit]) - ycand = np.concatenate([ycand_track, ycand_hit]) - assert ygen.shape[0] == X.shape[0] - assert ycand.shape[0] == X.shape[0] - - # replace PID with index in labels array - arr = np.array([labels.index(p) for p in ygen[:, 0]]) - ygen[:, 0][:] = arr[:] - arr = np.array([labels.index(p) for p in ycand[:, 0]]) - ycand[:, 0][:] = arr[:] - Xs.append(X) - ygens.append(ygen) - ycands.append(ycand) - return Xs, ygens, ycands - - -def generate_examples(files): - for fi in tqdm.tqdm(files): - Xs, ygens, ycands = prepare_data_clic(fi) - for iev in range(len(Xs)): - yield str(fi) + "_" + str(iev), { - "X": Xs[iev].astype(np.float32), - "ygen": ygens[iev].astype(np.float32), - "ycand": ycands[iev].astype(np.float32), - } - - -if __name__ == "__main__": - fn = "/local/joosep/mlpf_hits/clic_edm4hep_2023_02_27/p8_ee_qq_ecm380/reco_p8_ee_qq_ecm380_111398.parquet" - ret = prepare_data_clic(fn) diff --git a/mlpf/heptfds/cms_pf/cms_utils.py b/mlpf/heptfds/cms_pf/cms_utils.py index 4b60a8709..5d5ba7172 100644 --- a/mlpf/heptfds/cms_pf/cms_utils.py +++ b/mlpf/heptfds/cms_pf/cms_utils.py @@ -243,6 +243,8 @@ def split_sample(path, builder_config, num_splits=NUM_SPLITS, train_frac=0.9): split_index = int(builder_config.name) - 1 files_train_split = split_list(files_train, num_splits) files_test_split = split_list(files_test, num_splits) + assert len(files_train_split[split_index]) > 0 + assert len(files_test_split[split_index]) > 0 return { "train": generate_examples(files_train_split[split_index]), "test": generate_examples(files_test_split[split_index]), diff --git a/mlpf/heptfds/cms_pf/qcd.py b/mlpf/heptfds/cms_pf/qcd.py index 0044d37e9..768759680 100644 --- a/mlpf/heptfds/cms_pf/qcd.py +++ b/mlpf/heptfds/cms_pf/qcd.py @@ -1,5 +1,7 @@ """CMS PF QCD dataset.""" +import os + import cms_utils import numpy as np import tensorflow_datasets as tfds @@ -21,7 +23,7 @@ class CmsPfQcd(tfds.core.GeneratorBasedBuilder): """DatasetBuilder for cms_pf_qcd dataset.""" - VERSION = tfds.core.Version("2.8.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "1.3.0": "12_2_0_pre2 generation with updated caloparticle/trackingparticle", "1.3.1": "Remove PS again", @@ -41,6 +43,7 @@ class CmsPfQcd(tfds.core.GeneratorBasedBuilder): "2.7.0": "Remove split_caloparticle", "2.7.1": "Use fixed split_caloparticle", "2.8.0": "Add Pythia", + "3.0.0": "updated beamspot, 13.6 TeV", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_qcd ~/tensorflow_datasets/ @@ -77,8 +80,8 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.manual_dir - sample_dir = "QCDForPF_14TeV_TuneCUETP8M1_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=cms_utils.NUM_SPLITS) + sample_dir = "QCDForPF_13p6TeV_TuneCUETP8M1_cfi" + return cms_utils.split_sample(path / sample_dir, self.builder_config, num_splits=cms_utils.NUM_SPLITS) def _generate_examples(self, files): return cms_utils.generate_examples(files) diff --git a/mlpf/heptfds/cms_pf/qcd_13p6.py b/mlpf/heptfds/cms_pf/qcd_13p6.py deleted file mode 100644 index 236c627e2..000000000 --- a/mlpf/heptfds/cms_pf/qcd_13p6.py +++ /dev/null @@ -1,66 +0,0 @@ -"""CMS PF QCD 13p6 dataset.""" - -import cms_utils -import numpy as np -import tensorflow_datasets as tfds - -X_FEATURES = cms_utils.X_FEATURES -Y_FEATURES = cms_utils.Y_FEATURES - -_DESCRIPTION = """ -Dataset generated with CMSSW and full detector sim. -QCD events with PU 55~75 in a Run3 setup, 13.6 TeV. -""" - -# TODO(cms_pf): BibTeX citation -_CITATION = """ -""" - - -class CmsPfQcd13p6(tfds.core.GeneratorBasedBuilder): - """DatasetBuilder for cms_pf_qcd dataset.""" - - VERSION = tfds.core.Version("2.8.0") - RELEASE_NOTES = { - "2.6.0": "First version", - "2.8.0": "Add Pythia", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_qcd ~/tensorflow_datasets/ - """ - - # create configs 1 ... NUM_SPLITS + 1 that allow to parallelize the dataset building - BUILDER_CONFIGS = [tfds.core.BuilderConfig(name=str(group)) for group in range(1, cms_utils.NUM_SPLITS + 1)] - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(CmsPfQcd13p6, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor(shape=(None, len(X_FEATURES)), dtype=np.float32), - "ytarget": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "genmet": tfds.features.Scalar(dtype=np.float32), - "genjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), - "targetjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), - } - ), - homepage="https://github.com/jpata/particleflow", - citation=_CITATION, - metadata=tfds.core.MetadataDict(x_features=X_FEATURES, y_features=Y_FEATURES), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - """Returns SplitGenerators.""" - path = dl_manager.manual_dir - sample_dir = "QCDForPF_13p6TeV_TuneCUETP8M1_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=cms_utils.NUM_SPLITS) - - def _generate_examples(self, files): - return cms_utils.generate_examples(files) diff --git a/mlpf/heptfds/cms_pf/qcd_nopu.py b/mlpf/heptfds/cms_pf/qcd_nopu.py index b089a79a1..49624391b 100644 --- a/mlpf/heptfds/cms_pf/qcd_nopu.py +++ b/mlpf/heptfds/cms_pf/qcd_nopu.py @@ -1,5 +1,7 @@ """CMS PF TTbar dataset.""" +import os + import cms_utils import numpy as np @@ -22,7 +24,7 @@ class CmsPfQcdNopu(tfds.core.GeneratorBasedBuilder): """DatasetBuilder for cms_pf_qcd_nopu dataset.""" - VERSION = tfds.core.Version("2.8.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "2.0.0": "New truth def based primarily on CaloParticles", "2.4.0": "Add gp_to_track, gp_to_cluster, jet_idx", @@ -33,13 +35,15 @@ class CmsPfQcdNopu(tfds.core.GeneratorBasedBuilder): "2.7.1": "Use fixed split_caloparticle", "2.7.2": "Bump stats to 20M", "2.8.0": "Add Pythia", + "3.0.0": "updated beamspot, 13.6 TeV", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_qcd_nopu ~/tensorflow_datasets/ """ # create configs 1 ... NUM_SPLITS + 1 that allow to parallelize the dataset building - BUILDER_CONFIGS = [tfds.core.BuilderConfig(name=str(group)) for group in range(1, 40 + 1)] + NUM_SPLITS = 10 + BUILDER_CONFIGS = [tfds.core.BuilderConfig(name=str(group)) for group in range(1, NUM_SPLITS + 1)] def __init__(self, *args, **kwargs): kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD @@ -69,8 +73,8 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.manual_dir - sample_dir = "QCDForPF_14TeV_TuneCUETP8M1_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=40) + sample_dir = "QCDForPF_13p6TeV_TuneCUETP8M1_cfi" + return cms_utils.split_sample(path / sample_dir, self.builder_config, num_splits=self.NUM_SPLITS) def _generate_examples(self, files): return cms_utils.generate_examples(files) diff --git a/mlpf/heptfds/cms_pf/singleele.py b/mlpf/heptfds/cms_pf/singleele.py deleted file mode 100644 index e0ef36b97..000000000 --- a/mlpf/heptfds/cms_pf/singleele.py +++ /dev/null @@ -1,66 +0,0 @@ -"""CMS PF SingleEle dataset.""" - -import cms_utils -import numpy as np -import tensorflow_datasets as tfds - -X_FEATURES = cms_utils.X_FEATURES -Y_FEATURES = cms_utils.Y_FEATURES - -_DESCRIPTION = """ -Dataset generated with CMSSW and full detector sim. - -""" - -# TODO(cms_pf): BibTeX citation -_CITATION = """ -""" - - -class CmsPfSingleEle(tfds.core.GeneratorBasedBuilder, skip_registration=True): - """DatasetBuilder for cms_pf_ttbar dataset.""" - - VERSION = tfds.core.Version("2.5.0") - RELEASE_NOTES = { - "2.5.0": "First version", - } - MANUAL_DOWNLOAD_INSTRUCTIONS = """ - rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_ttbar ~/tensorflow_datasets/ - """ - - # create configs 1 ... NUM_SPLITS + 1 that allow to parallelize the dataset building - BUILDER_CONFIGS = [tfds.core.BuilderConfig(name=str(1))] - - def __init__(self, *args, **kwargs): - kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD - super(CmsPfSingleEle, self).__init__(*args, **kwargs) - - def _info(self) -> tfds.core.DatasetInfo: - """Returns the dataset metadata.""" - return tfds.core.DatasetInfo( - builder=self, - description=_DESCRIPTION, - features=tfds.features.FeaturesDict( - { - "X": tfds.features.Tensor(shape=(None, len(X_FEATURES)), dtype=np.float32), - "ytarget": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=np.float32), - "genmet": tfds.features.Scalar(dtype=np.float32), - "genjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), - "targetjets": tfds.features.Tensor(shape=(None, 4), dtype=np.float32), - "pythia": tfds.features.Tensor(shape=(None, 5), dtype=np.float32), - } - ), - homepage="https://github.com/jpata/particleflow", - citation=_CITATION, - metadata=tfds.core.MetadataDict(x_features=X_FEATURES, y_features=Y_FEATURES), - ) - - def _split_generators(self, dl_manager: tfds.download.DownloadManager): - """Returns SplitGenerators.""" - path = dl_manager.manual_dir - sample_dir = "SingleElectronFlatPt1To1000_pythia8_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=1, train_frac=0.1) - - def _generate_examples(self, files): - return cms_utils.generate_examples(files) diff --git a/mlpf/heptfds/cms_pf/ttbar.py b/mlpf/heptfds/cms_pf/ttbar.py index f95215f68..dfd00620b 100644 --- a/mlpf/heptfds/cms_pf/ttbar.py +++ b/mlpf/heptfds/cms_pf/ttbar.py @@ -1,5 +1,6 @@ """CMS PF TTbar dataset.""" +import os import cms_utils import numpy as np import tensorflow_datasets as tfds @@ -21,7 +22,7 @@ class CmsPfTtbar(tfds.core.GeneratorBasedBuilder, skip_registration=True): """DatasetBuilder for cms_pf_ttbar dataset.""" - VERSION = tfds.core.Version("2.8.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "1.0.0": "Initial release.", "1.1.0": "Add muon type, fix electron GSF association", @@ -46,6 +47,7 @@ class CmsPfTtbar(tfds.core.GeneratorBasedBuilder, skip_registration=True): "2.7.0": "Remove split_caloparticle", "2.7.1": "Use fixed split_caloparticle", "2.8.0": "Add Pythia", + "3.0.0": "updated beamspot, 13.6 TeV", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_ttbar ~/tensorflow_datasets/ @@ -82,8 +84,8 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.manual_dir - sample_dir = "TTbar_14TeV_TuneCUETP8M1_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=cms_utils.NUM_SPLITS) + sample_dir = "TTbar_13p6TeV_TuneCUETP8M1_cfi" + return cms_utils.split_sample(path / sample_dir, self.builder_config, num_splits=cms_utils.NUM_SPLITS) def _generate_examples(self, files): return cms_utils.generate_examples(files) diff --git a/mlpf/heptfds/cms_pf/ttbar_nopu.py b/mlpf/heptfds/cms_pf/ttbar_nopu.py index 25ac8ea60..21cc5b081 100644 --- a/mlpf/heptfds/cms_pf/ttbar_nopu.py +++ b/mlpf/heptfds/cms_pf/ttbar_nopu.py @@ -1,5 +1,7 @@ """CMS PF TTbar dataset.""" +import os + import cms_utils import numpy as np @@ -22,7 +24,7 @@ class CmsPfTtbarNopu(tfds.core.GeneratorBasedBuilder): """DatasetBuilder for cms_pf_ttbar_nopu dataset.""" - VERSION = tfds.core.Version("2.8.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "1.7.1": "First version", "1.8.0": "Add ispu, genjets, genmet; disable genjet_idx; improved merging", @@ -36,6 +38,7 @@ class CmsPfTtbarNopu(tfds.core.GeneratorBasedBuilder): "2.7.0": "Remove split_caloparticle", "2.7.1": "Use fixed split_caloparticle", "2.8.0": "Add Pythia", + "3.0.0": "updated beamspot, 13.6 TeV", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_ttbar_nopu ~/tensorflow_datasets/ @@ -72,8 +75,8 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.manual_dir - sample_dir = "TTbar_14TeV_TuneCUETP8M1_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=cms_utils.NUM_SPLITS) + sample_dir = "TTbar_13p6TeV_TuneCUETP8M1_cfi" + return cms_utils.split_sample(path / sample_dir, self.builder_config, num_splits=cms_utils.NUM_SPLITS) def _generate_examples(self, files): return cms_utils.generate_examples(files) diff --git a/mlpf/heptfds/cms_pf/ztt.py b/mlpf/heptfds/cms_pf/ztt.py index 702635b7c..3dc840f1e 100644 --- a/mlpf/heptfds/cms_pf/ztt.py +++ b/mlpf/heptfds/cms_pf/ztt.py @@ -1,5 +1,7 @@ """CMS PF TTbar dataset.""" +import os + import cms_utils import numpy as np import tensorflow_datasets as tfds @@ -21,7 +23,7 @@ class CmsPfZtt(tfds.core.GeneratorBasedBuilder, skip_registration=True): """DatasetBuilder for cms_pf_ztt dataset.""" - VERSION = tfds.core.Version("2.8.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "2.5.0": "Remove neutrinos from genjets, split to 10", "2.5.1": "Associate ele with GSF first", @@ -29,6 +31,7 @@ class CmsPfZtt(tfds.core.GeneratorBasedBuilder, skip_registration=True): "2.7.0": "Remove split_caloparticle", "2.7.1": "Use fixed split_caloparticle", "2.8.0": "Add Pythia", + "3.0.0": "updated beamspot, 13.6 TeV", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_ztt ~/tensorflow_datasets/ @@ -65,8 +68,8 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.manual_dir - sample_dir = "ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=cms_utils.NUM_SPLITS) + sample_dir = "ZTT_All_hadronic_13p6TeV_TuneCUETP8M1_cfi" + return cms_utils.split_sample(path / sample_dir, self.builder_config, num_splits=cms_utils.NUM_SPLITS) def _generate_examples(self, files): return cms_utils.generate_examples(files) diff --git a/mlpf/heptfds/cms_pf/ztt_nopu.py b/mlpf/heptfds/cms_pf/ztt_nopu.py index de2f29240..58ae0b158 100644 --- a/mlpf/heptfds/cms_pf/ztt_nopu.py +++ b/mlpf/heptfds/cms_pf/ztt_nopu.py @@ -1,5 +1,7 @@ """CMS PF TTbar dataset.""" +import os + import cms_utils import numpy as np import tensorflow_datasets as tfds @@ -21,7 +23,7 @@ class CmsPfZttNopu(tfds.core.GeneratorBasedBuilder): """DatasetBuilder for cms_pf_ztt_nopu dataset.""" - VERSION = tfds.core.Version("2.8.0") + VERSION = tfds.core.Version(os.environ.get("TFDS_VERSION", "3.0.0")) RELEASE_NOTES = { "2.5.0": "Remove neutrinos from genjets, split to 10", "2.5.1": "Associate ele with GSF first", @@ -29,6 +31,7 @@ class CmsPfZttNopu(tfds.core.GeneratorBasedBuilder): "2.7.0": "Remove split_caloparticle", "2.7.1": "Use fixed split_caloparticle", "2.8.0": "Add Pythia", + "3.0.0": "updated beamspot, 13.6 TeV", } MANUAL_DOWNLOAD_INSTRUCTIONS = """ rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_ztt ~/tensorflow_datasets/ @@ -65,8 +68,8 @@ def _info(self) -> tfds.core.DatasetInfo: def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" path = dl_manager.manual_dir - sample_dir = "ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi" - return cms_utils.split_sample(path / sample_dir / "raw", self.builder_config, num_splits=cms_utils.NUM_SPLITS) + sample_dir = "ZTT_All_hadronic_13p6TeV_TuneCUETP8M1_cfi" + return cms_utils.split_sample(path / sample_dir, self.builder_config, num_splits=cms_utils.NUM_SPLITS) def _generate_examples(self, files): return cms_utils.generate_examples(files) diff --git a/mlpf/model/inference.py b/mlpf/model/inference.py index f8884cdee..83a69c33a 100644 --- a/mlpf/model/inference.py +++ b/mlpf/model/inference.py @@ -178,7 +178,7 @@ def run_predictions(world_size, rank, model, loader, sample, outpath, jetdef, je _logger.info(f"Time taken to make predictions on device {rank} is: {time_total_min:.2f} min") -def make_plots(outpath, sample, dataset, dir_name="", ntest_files=-1): +def make_plots(outpath, sample, dataset, dir_name="", num_test_events=None): """Uses the predictions stored as .parquet files from run_predictions to make plots.""" import matplotlib.pyplot as plt @@ -190,7 +190,7 @@ def make_plots(outpath, sample, dataset, dir_name="", ntest_files=-1): plots_path = Path(f"{outpath}/plots{dir_name}/{sample}/") pred_path = Path(f"{outpath}/preds{dir_name}/{sample}/") - yvals, X, _ = load_eval_data(str(pred_path / "*.parquet"), ntest_files) + yvals, X, _ = load_eval_data(str(pred_path / "*.parquet"), num_test_events) _logger.info(f"Loaded data for plotting from {pred_path}") plot_num_elements(X, cp_dir=plots_path) diff --git a/mlpf/model/training.py b/mlpf/model/training.py index 3e441e74d..ff996e129 100644 --- a/mlpf/model/training.py +++ b/mlpf/model/training.py @@ -697,7 +697,7 @@ def run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtyp torch.cuda.empty_cache() # FIXME: import this from a central place - if config["dataset"] == "clic": + if config["dataset"] == "clic" or config["dataset"] == "cld": import fastjet jetdef = fastjet.JetDefinition(fastjet.ee_genkt_algorithm, 0.4, -1.0) @@ -708,7 +708,7 @@ def run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtyp jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4) jet_ptcut = 3 else: - raise Exception("not implemented") + raise Exception("jet configuration for dataset {} not implemented".format(config["dataset"])) device_type = "cuda" if isinstance(rank, int) else "cpu" with torch.autocast(device_type=device_type, dtype=dtype, enabled=device_type == "cuda"): diff --git a/mlpf/model/utils.py b/mlpf/model/utils.py index 70453b15e..1c07974f2 100644 --- a/mlpf/model/utils.py +++ b/mlpf/model/utils.py @@ -14,33 +14,39 @@ ELEM_TYPES = { "cms": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], "clic": [0, 1, 2], + "cld": [0, 1, 2], } # Some element types are defined, but do not exist in the dataset at all ELEM_TYPES_NONZERO = { "cms": [1, 4, 5, 6, 8, 9, 10, 11], "clic": [1, 2], + "cld": [1, 2], } CLASS_LABELS = { "cms": [0, 211, 130, 1, 2, 22, 11, 13, 15], # we never actually predict 15/taus (not there in targets) "clic": [0, 211, 130, 22, 11, 13], + "cld": [0, 211, 130, 22, 11, 13], "clic_hits": [0, 211, 130, 22, 11, 13], } CLASS_NAMES_LATEX = { "cms": ["none", "Charged Hadron", "Neutral Hadron", "HFEM", "HFHAD", r"$\gamma$", r"$e^\pm$", r"$\mu^\pm$", r"$\tau$"], "clic": ["none", "Charged Hadron", "Neutral Hadron", r"$\gamma$", r"$e^\pm$", r"$\mu^\pm$"], + "cld": ["none", "Charged Hadron", "Neutral Hadron", r"$\gamma$", r"$e^\pm$", r"$\mu^\pm$"], "clic_hits": ["none", "Charged Hadron", "Neutral Hadron", r"$\gamma$", r"$e^\pm$", r"$\mu^\pm$"], } CLASS_NAMES = { "cms": ["none", "chhad", "nhad", "HFEM", "HFHAD", "gamma", "ele", "mu", "tau"], "clic": ["none", "chhad", "nhad", "gamma", "ele", "mu"], + "cld": ["none", "chhad", "nhad", "gamma", "ele", "mu"], "clic_hits": ["none", "chhad", "nhad", "gamma", "ele", "mu"], } CLASS_NAMES_CAPITALIZED = { "cms": ["none", "Charged hadron", "Neutral hadron", "HFEM", "HFHAD", "Photon", "Electron", "Muon", "Tau"], "clic": ["none", "Charged hadron", "Neutral hadron", "Photon", "Electron", "Muon"], + "ccldlic": ["none", "Charged hadron", "Neutral hadron", "Photon", "Electron", "Muon"], "clic_hits": ["none", "Charged hadron", "Neutral hadron", "Photon", "Electron", "Muon"], } @@ -121,6 +127,25 @@ "time | sigma_y", "Null | sigma_z", ], + "cld": [ + "type", + "pt | et", + "eta", + "sin_phi", + "cos_phi", + "p | energy", + "chi2 | position.x", + "ndf | position.y", + "dEdx | position.z", + "dEdxError | iTheta", + "radiusOfInnermostHit | energy_ecal", + "tanLambda | energy_hcal", + "D0 | energy_other", + "omega | num_hits", + "Z0 | sigma_x", + "time | sigma_y", + "Null | sigma_z", + ], "clic_hits": [ "elemtype", "pt | et", diff --git a/mlpf/pipeline.py b/mlpf/pipeline.py index 159eced77..a6d9cd59c 100644 --- a/mlpf/pipeline.py +++ b/mlpf/pipeline.py @@ -23,7 +23,7 @@ from mlpf.model.training import device_agnostic_run, override_config from mlpf.model.distributed_ray import run_hpo, run_ray_training from mlpf.model.PFDataset import SHARING_STRATEGY -from mlpf.utils import create_experiment_dir +from mlpf.utils import create_experiment_dir, load_spec, resolve_path def get_parser(): @@ -31,7 +31,10 @@ def get_parser(): parser = argparse.ArgumentParser() # --- Define top-level, global arguments --- - parser.add_argument("--config", type=str, required=True, help="Path to the yaml config file") + parser.add_argument("--spec-file", type=str, required=True, help="Path to the yaml spec file (particleflow_spec.yaml)") + parser.add_argument("--model-name", type=str, required=True, help="Model name from spec file to train") + parser.add_argument("--production-name", type=str, required=True, help="Production name from spec file") + parser.add_argument("--experiment-dir", type=str, help="The directory where to save the weights and configs. If None, create a new one.") parser.add_argument("--prefix", type=str, help="Prefix prepended to the experiment dir name") parser.add_argument("--data-dir", type=str, help="Path to the `tensorflow_datasets/` directory") @@ -43,7 +46,7 @@ def get_parser(): # --- 'train' command parser --- parser_train = subparsers.add_parser("train", help="Run standard training on a single node (CPU, single-GPU, or DDP)") - parser_train.add_argument("--gpus", type=int, default=0, help="Number of GPUs to use. Set to 0 for CPU.") + parser_train.add_argument("--gpus", type=int, default=None, help="Number of GPUs to use. Set to 0 for CPU.") parser_train.add_argument("--gpu-batch-multiplier", type=int, default=None, help="Increase batch size per GPU by this constant factor") parser_train.add_argument("--num-workers", type=int, default=None, help="Number of processes to load data") parser_train.add_argument("--prefetch-factor", type=int, default=None, help="Number of samples to fetch & prefetch per worker") @@ -73,7 +76,7 @@ def get_parser(): # --- 'test' command parser --- parser_test = subparsers.add_parser("test", help="Run evaluation on a trained model") parser_test.add_argument("--load", type=str, required=True, help="Path to a saved model checkpoint to test") - parser_test.add_argument("--gpus", type=int, default=0, help="Number of GPUs to use. Set to 0 for CPU.") + parser_test.add_argument("--gpus", type=int, default=None, help="Number of GPUs to use. Set to 0 for CPU.") parser_test.add_argument("--gpu-batch-multiplier", type=int, default=None, help="Increase batch size per GPU by this constant factor") parser_test.add_argument("--num-workers", type=int, default=None, help="Number of processes to load data") parser_test.add_argument("--prefetch-factor", type=int, default=None, help="Number of samples to fetch & prefetch per worker") @@ -115,6 +118,122 @@ def get_parser(): return parser +def build_config_from_spec(spec, model_name, production_name): + if model_name not in spec["models"]: + raise ValueError(f"Model {model_name} not found in spec") + if production_name not in spec["productions"]: + raise ValueError(f"Production {production_name} not found in spec") + + model_config = spec["models"][model_name] + prod_config = spec["productions"][production_name] + + # Initialize config with model parameters + config = {} + config["load"] = None + config["num_steps"] = 100000 + config["comet"] = False + config["comet_step_freq"] = 10000 + config["ntrain"] = None + config["ntest"] = 1000 + config["nvalid"] = None + config["sort_data"] = False + config["num_workers"] = 1 + config["prefetch_factor"] = 1 + config["patience"] = 10000 + config["checkpoint_freq"] = 10000 + config["val_freq"] = 1000 + + # Copy hyperparameters and other top-level settings + for k, v in model_config.items(): + if k not in ["architecture", "train_datasets", "validation_datasets", "test_datasets"]: + print(k, v) + config[k] = v + + # Handle hyperparameters specifically if they are nested + if "hyperparameters" in model_config: + for k, v in model_config["hyperparameters"].items(): + config[k] = v + + # Model Architecture + config["model"] = model_config["architecture"] + config["conv_type"] = config["model"]["type"] + + if "gnn_lsh" in config["model"]: + config["model"]["gnn_lsh"]["conv_type"] = "gnn_lsh" + if "attention" in config["model"]: + config["model"]["attention"]["conv_type"] = "attention" + config["model"]["trainable"] = "all" + config["model"]["learned_representation_mode"] = "last" + config["model"]["input_encoding"] = "split" + config["model"]["pt_mode"] = "direct-elemtype-split" + config["model"]["eta_mode"] = "linear" + config["model"]["sin_phi_mode"] = "linear" + config["model"]["cos_phi_mode"] = "linear" + config["model"]["energy_mode"] = "direct-elemtype-split" + + # Dataset and Production + config["dataset"] = model_config.get("dataset", prod_config.get("type")) + + workspace_dir = resolve_path(prod_config["workspace_dir"], spec) + config["data_dir"] = os.path.join(workspace_dir, "tfds") + + def build_dataset_config(dataset_input): + ds_config = {} + ds_config[config["dataset"]] = {} + dataset_groups = dataset_input + + for phys_key, phys_val in dataset_groups.items(): + ds_config[config["dataset"]][phys_key] = { + "batch_size": phys_val.get("batch_size", config.get("batch_size", 1)), + "samples": {}, + } + target_dict = ds_config[config["dataset"]][phys_key]["samples"] + + for ds_item in phys_val["samples"]: + name = ds_item["name"] + + entry = {} + if "version" in ds_item: + entry["version"] = ds_item["version"] + + if "splits" in ds_item: + entry["splits"] = ds_item["splits"] + + # Copy batch size if specific + if "batch_size" in ds_item: + entry["batch_size"] = ds_item["batch_size"] + + target_dict[name] = entry + + return ds_config + + if "train_datasets" in model_config: + config["train_dataset"] = build_dataset_config(model_config["train_datasets"]) + + if "validation_datasets" in model_config: + config["valid_dataset"] = build_dataset_config(model_config["validation_datasets"]) + + if "test_datasets" in model_config: + config["test_dataset"] = {} + for ds_item in model_config.get("test_datasets", []): + name = ds_item["name"] + entry = {} + entry["version"] = ds_item.get("version") + entry["splits"] = ds_item.get("splits", ["test"]) + entry["batch_size"] = ds_item.get("batch_size", 1) + config["test_dataset"][name] = entry + + # Ensure some defaults for testing/validation if not present + if "test_dataset" not in config: + config["test_dataset"] = {} + + # Default fields expected by pipeline/training + if "comet_name" not in config: + config["comet_name"] = "particleflow" + print(config) + return config + + def main(): # https://github.com/pytorch/pytorch/issues/11201#issuecomment-895047235 import torch @@ -124,10 +243,11 @@ def main(): parser = get_parser() args = parser.parse_args() - logging.basicConfig(level=logging.INFO) + logging.basicConfig(level=logging.DEBUG) - with open(args.config, "r") as stream: # load config (includes: which physics samples, model params) - config = yaml.safe_load(stream) + # Load Spec and Build Config + spec = load_spec(args.spec_file) + config = build_config_from_spec(spec, args.model_name, args.production_name) # --- Manually set action flags based on the command, for override_config --- if args.command == "train": @@ -155,26 +275,29 @@ def main(): # override some options for the pipeline test if args.pipeline: + if "gnn_lsh" not in config["model"]: + config["model"]["gnn_lsh"] = {} config["model"]["gnn_lsh"]["num_convs"] = 1 config["model"]["gnn_lsh"]["width"] = 32 config["model"]["gnn_lsh"]["embedding_dim"] = 32 + if "attention" not in config["model"]: + config["model"]["attention"] = {} config["model"]["attention"]["num_convs"] = 1 config["model"]["attention"]["num_heads"] = 2 config["model"]["attention"]["head_dim"] = 2 if config["dataset"] == "cms": for ds in ["train_dataset", "valid_dataset"]: - config[ds]["cms"] = { - "physical_pu": { - "batch_size": config[ds]["cms"]["physical_pu"]["batch_size"], - "samples": {"cms_pf_ttbar": config[ds]["cms"]["physical_pu"]["samples"]["cms_pf_ttbar"]}, + if ds in config: + config[ds]["cms"] = { + "physical_pu": { + "batch_size": config[ds]["cms"]["physical_pu"]["batch_size"], + "samples": {"cms_pf_ttbar": {"splits": ["10"], "version": "3.0.0"}}, + } } - } - # load only the last config split - config[ds]["cms"]["physical_pu"]["samples"]["cms_pf_ttbar"]["splits"] = ["10"] - config["test_dataset"] = {"cms_pf_ttbar": config["test_dataset"]["cms_pf_ttbar"]} - config["test_dataset"]["cms_pf_ttbar"]["splits"] = ["10"] + config["test_dataset"] = {"cms_pf_ttbar": config["test_dataset"]["cms_pf_ttbar"]} + config["test_dataset"]["cms_pf_ttbar"]["splits"] = ["10"] # override loaded config with values from command line args config = override_config(config, args) @@ -185,20 +308,29 @@ def main(): else: experiment_dir = args.experiment_dir if experiment_dir is None: + # Use model_name and production_name for prefix if available + prefix = (args.prefix or "") + f"{args.model_name}_{args.production_name}_" experiment_dir = create_experiment_dir( - prefix=(args.prefix or "") + Path(args.config).stem + "_", + prefix=prefix, experiments_dir=args.experiments_dir if args.experiments_dir else "experiments", ) - # Save config for later reference. Note that saving happens after parameters are overwritten by cmd line args. + # Save config for later reference. config_filename = f"{args.command}-config.yaml" with open((Path(experiment_dir) / config_filename), "w") as file: yaml.dump(config, file) + # Also save the spec file for reproducibility + with open((Path(experiment_dir) / "particleflow_spec.yaml"), "w") as file: + yaml.dump(spec, file) + if args.command == "ray-train": run_ray_training(config, args, experiment_dir) elif args.command in ["train", "test"]: - world_size = args.gpus if args.gpus > 0 else 1 + if args.gpus is not None: + config["gpus"] = args.gpus + gpus = config.get("gpus", 0) + world_size = gpus if gpus > 0 else 1 device_agnostic_run(config, world_size, experiment_dir) diff --git a/mlpf/plotting/plot_utils.py b/mlpf/plotting/plot_utils.py index 8663e070b..3bde9b86c 100644 --- a/mlpf/plotting/plot_utils.py +++ b/mlpf/plotting/plot_utils.py @@ -124,6 +124,8 @@ def get_class_names(sample_name): # overline results in misalignment EVALUATION_DATASET_NAMES = { "cld_edm_ttbar_pf": r"$e^+e^- \rightarrow \mathrm{t}\bar{\mathrm{t}}$", + "cld_edm_qq_pf": r"$e^+e^- \rightarrow \mathrm{q}\bar{\mathrm{q}}$", + "cld_edm_ww_pf": r"$e^+e^- \rightarrow WW \rightarrow \mathrm{hadrons}$", "clic_edm_ttbar_pf": r"$e^+e^- \rightarrow \mathrm{t}\bar{\mathrm{t}}$", "clic_edm_ttbar_pu10_pf": r"$e^+e^- \rightarrow \mathrm{t}\bar{\mathrm{t}}$, PU10", "clic_edm_ttbar_hits_pf": r"$e^+e^- \rightarrow \mathrm{t}\bar{\mathrm{t}}$", @@ -311,25 +313,40 @@ def particle_label(ax, pid): ) -def load_eval_data(path, max_files=None): +def load_eval_data(path, max_events=None): yvals = [] filenames = [] print("path", path) - filelist = list(glob.glob(path)) - - if max_files is not None: - filelist = filelist[:max_files] + filelist = sorted(list(glob.glob(path))) assert len(filelist) > 0 is_interactive = sys.stdout.isatty() + iterator = filelist if is_interactive: - filelist = tqdm.tqdm(filelist, total=len(filelist), desc="Loading eval data") + iterator = tqdm.tqdm(filelist, desc="Loading eval data") - for fi in filelist: + total_events = 0 + for fi in iterator: dd = awkward.from_parquet(fi) + num_in_file = len(dd) + print(fi, num_in_file, total_events, max_events) + + if max_events is not None and max_events > 0: + if total_events + num_in_file > max_events: + to_take = max_events - total_events + yvals.append(dd[:to_take]) + filenames.append(fi) + total_events += to_take + break + yvals.append(dd) filenames.append(fi) + total_events += num_in_file + + if max_events is not None and max_events > 0 and total_events >= max_events: + break + assert len(yvals) > 0 data = awkward.concatenate(yvals, axis=0) diff --git a/mlpf/produce_snakemake.py b/mlpf/produce_snakemake.py new file mode 100644 index 000000000..2e64bab0b --- /dev/null +++ b/mlpf/produce_snakemake.py @@ -0,0 +1,461 @@ +import os +import stat +import argparse +from mlpf.utils import load_spec, resolve_path + +# Configuration +CHUNK_SIZE = 1 +LOCAL_JOBS_DIR = "snakemake_jobs" +SPEC_FILE = "particleflow_spec.yaml" + + +def ensure_dir(d): + os.makedirs(d, exist_ok=True) + + +def write_bash_script(path, content): + with open(path, "w") as f: + f.write("#!/bin/bash\n") + f.write("set -e\n") + f.write(content) + os.chmod(path, os.stat(path).st_mode | stat.S_IEXEC) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--production", type=str, default="cms_2025_main", help="Production name from spec file") + parser.add_argument("--model", type=str, help="Model name from spec file to train") + parser.add_argument("--ignore-failures", action="store_true", help="Ignore failures in gen/post steps") + parser.add_argument("--steps", type=str, default="gen,post,tfds,train", help="Comma-separated steps to run: gen,post,tfds,train") + args = parser.parse_args() + + req_steps = args.steps.split(",") + + spec = load_spec(SPEC_FILE) + + # Target specific production + if args.production not in spec["productions"]: + raise ValueError(f"Production {args.production} not found in {SPEC_FILE}") + + prod_config = spec["productions"][args.production] + prod_type = prod_config.get("type", "cms") + + cpu_partition = prod_config.get("slurm_partition", "main") + cpu_runtime = prod_config.get("slurm_runtime", "120m") + + memory_config = prod_config.get("memory", {}) + mem_gen = memory_config.get("gen", 2000) + mem_post = memory_config.get("post", 2000) + mem_tfds = memory_config.get("tfds", 4000) + mem_train = memory_config.get("train", 8000) + + # Resolve workspace dir and TFDS dir + workspace_dir = resolve_path(prod_config["workspace_dir"], spec) + # Unify TFDS output directory to be within the workspace + tfds_root_dir = os.path.join(workspace_dir, "tfds") + + # Apptainer/Singularity configuration + main_container_img = spec["project"].get("container") + gen_container_img = prod_config.get("gen_container", main_container_img) + + bind_mounts = spec["project"].get("bind_mounts", []) + bind_args = "" + for bm in bind_mounts: + bind_args += f" -B {bm}" + + # Get postprocessing script from spec + postproc_script = prod_config["postprocessing"]["script"] + postproc_extra_args = prod_config["postprocessing"].get("args", {}) + + config_dir = prod_config.get("config_dir", "") + + scratch_root = resolve_path(spec["project"]["paths"]["scratch_root"], spec) + + samples = prod_config["samples"] + tfds_mappings = prod_config.get("tfds_mapping", {}) + + # Update jobs dir to include production name to avoid conflicts + jobs_dir = f"{LOCAL_JOBS_DIR}/{args.production}" + + ensure_dir(f"{jobs_dir}/gen") + ensure_dir(f"{jobs_dir}/post") + ensure_dir(f"{jobs_dir}/tfds") + ensure_dir(tfds_root_dir) + + snakefile_content = "rule all:\n input:\n" + + # Track completion files for all chunks of all samples + all_sample_post_sentinels = {} + + # Track targets for rule all + final_targets = [] + + rules_content = "" + + # ------------------------------------------------------------------------- + # PART 1: Generation & Postprocessing (Per Chunk) + # ------------------------------------------------------------------------- + for sample_key, sample_data in samples.items(): + process_name = sample_data["process_name"] + seed_start, seed_end = sample_data["seed_range"] + gen_script = sample_data["gen_script"] + output_subdir = sample_data.get("output_subdir", process_name) + events_per_job = sample_data.get("events_per_job", 100) + pu_type = sample_data.get("pu_type", "nopu") + copy_step2 = sample_data.get("copy_step2", False) + + # Unified Directory Structure + if prod_type == "cms": + # CMS: workspace/gen/subdir/process + sample_gen_dir = os.path.join(workspace_dir, "gen", output_subdir, process_name) + sample_post_dir = os.path.join(workspace_dir, "post", output_subdir, process_name) + else: + # Key4Hep/Simple: workspace/gen/process + sample_gen_dir = os.path.join(workspace_dir, "gen", process_name) + sample_post_dir = os.path.join(workspace_dir, "post", process_name) + + # Convention: generated files go into a 'root' subdirectory + sample_gen_root_dir = os.path.join(sample_gen_dir, "root") + + ensure_dir(sample_gen_dir) + ensure_dir(sample_gen_root_dir) + ensure_dir(sample_post_dir) + + sample_post_sentinels = [] + + # Iterate in chunks + for chunk_start in range(seed_start, seed_end, CHUNK_SIZE): + chunk_end = min(chunk_start + CHUNK_SIZE, seed_end) + chunk_id = f"{sample_key}_{chunk_start}" + + # Sentinels + gen_sentinel = f"{jobs_dir}/gen/gen_{chunk_id}.done" + post_sentinel = f"{jobs_dir}/post/post_{chunk_id}.done" + + # 1. Generation + gen_script_path = f"{jobs_dir}/gen/gen_{chunk_id}.sh" + gen_cmd_lines = [] + + for seed in range(chunk_start, chunk_end): + # Prepare common variables + if prod_type == "cms": + gen_base_dir = os.path.join(workspace_dir, "gen", output_subdir) + root_file = os.path.join(sample_gen_root_dir, f"pfntuple_{seed}.root") + elif prod_type == "key4hep": + gen_base_dir = os.path.join(workspace_dir, "gen") + root_file = os.path.join(sample_gen_root_dir, f"reco_{process_name}_{seed}.root") + + exports = ( + f"export OUTDIR={gen_base_dir}/" + + f" && export CONFIG_DIR={config_dir}" + + f" && export WORKDIR={scratch_root}/{process_name}_{seed}" + + f" && export NEV={events_per_job}" + ) + gen_cmd = f"bash {gen_script} {process_name} {seed} {pu_type}" + if copy_step2: + gen_cmd += " true" + + if args.ignore_failures: + gen_cmd += " || echo 'WARNING: Generation failed'" + + cmd = f""" +if [ ! -f {root_file} ]; then + echo "Generating {root_file}" + {exports} + {gen_cmd} +else + echo "Skipping {root_file}, already exists" +fi +""" + gen_cmd_lines.append(cmd) + + write_bash_script(gen_script_path, "\n".join(gen_cmd_lines)) + + if "gen" in req_steps: + final_targets.append(gen_sentinel) + + rules_content += f""" +rule gen_{chunk_id}: + output: + "{gen_sentinel}" + resources: + mem_mb_per_cpu={mem_gen}, + slurm_partition="{cpu_partition}", + runtime="{cpu_runtime}" + container: + "{gen_container_img}" + shell: + "{gen_script_path} && touch {{output}}" +""" + + # 2. Postprocessing + post_script_path = f"{jobs_dir}/post/post_{chunk_id}.sh" + post_cmd_lines = [] + + for seed in range(chunk_start, chunk_end): + if prod_type == "cms": + root_file = os.path.join(sample_gen_root_dir, f"pfntuple_{seed}.root") + post_file_final = os.path.join(sample_post_dir, f"pfntuple_{seed}.pkl.bz2") + post_file_inter = os.path.join(sample_post_dir, f"pfntuple_{seed}.pkl") + elif prod_type == "key4hep": + root_file = os.path.join(sample_gen_root_dir, f"reco_{process_name}_{seed}.root") + post_file_final = os.path.join(sample_post_dir, f"reco_{process_name}_{seed}.parquet") + post_file_inter = post_file_final + + args_str = f"--input {root_file} --outpath {sample_post_dir}" + for k, v in postproc_extra_args.items(): + if isinstance(v, bool): + if v: + args_str += f" --{k}" + else: + args_str += f" --{k} {v}" + + postproc_cmd = f"python3 {postproc_script} {args_str}" + + exit_cmd = "exit 1" + if args.ignore_failures: + postproc_cmd += " || echo 'WARNING: Postprocessing failed'" + exit_cmd = "echo 'Ignoring failure'; true" + + if prod_type == "cms": + cmd = f""" +if [ ! -f {post_file_final} ]; then + if [ -f {root_file} ]; then + echo "Postprocessing {root_file}" + {postproc_cmd} + if [ -f {post_file_inter} ]; then + bzip2 -z {post_file_inter} + else + echo "Error: Postprocessing failed to produce {post_file_inter}" + {exit_cmd} + fi + else + echo "Error: Input file {root_file} missing for postprocessing" + {exit_cmd} + fi +else + echo "Skipping {post_file_final}, already exists" +fi +""" + else: # key4hep / parquet + cmd = f""" +if [ ! -f {post_file_final} ]; then + if [ -f {root_file} ]; then + echo "Postprocessing {root_file}" + {postproc_cmd} + else + echo "Error: Input file {root_file} missing for postprocessing" + {exit_cmd} + fi +else + echo "Skipping {post_file_final}, already exists" +fi +""" + + post_cmd_lines.append(cmd) + + write_bash_script(post_script_path, "\n".join(post_cmd_lines)) + + sample_post_sentinels.append(post_sentinel) + if "post" in req_steps and (sample_key in tfds_mappings): + final_targets.append(post_sentinel) + + post_rule_input = "" + if "gen" in req_steps: + post_rule_input = f'\n input:\n "{gen_sentinel}"' + + rules_content += f""" +rule post_{chunk_id}:{post_rule_input} + output: + "{post_sentinel}" + resources: + mem_mb_per_cpu={mem_post}, + slurm_partition="{cpu_partition}", + runtime="{cpu_runtime}" + container: + "{main_container_img}" + shell: + "{post_script_path} && touch {{output}}" +""" + + all_sample_post_sentinels[sample_key] = sample_post_sentinels + + # ------------------------------------------------------------------------- + # PART 2: TFDS Conversion (Per Config ID / Split) + # ------------------------------------------------------------------------- + tfds_sentinels = [] + + for sample_key, mapping in tfds_mappings.items(): + if sample_key not in samples: + print(f"Warning: TFDS mapping found for {sample_key} but no sample definition.") + continue + + sample_data = samples[sample_key] + builder_path = mapping["builder_path"] + config_ids = mapping.get("config_ids", [1]) + + process_name = sample_data["process_name"] + output_subdir = sample_data.get("output_subdir", process_name) + + # Determine manual_dir for TFDS + if prod_type == "cms": + # For CMS, data is in workspace/post/subdir/process + # TFDS builder expects workspace/post/subdir (containing process folder) + manual_dir = os.path.join(workspace_dir, "post", output_subdir) + else: + # For Key4Hep, data is in workspace/post/process + # TFDS builder expects workspace/post (containing process folder) + manual_dir = os.path.join(workspace_dir, "post") + + for config_id in config_ids: + tfds_id = f"{sample_key}_tfds_{config_id}" + tfds_script_path = f"{jobs_dir}/tfds/tfds_{tfds_id}.sh" + tfds_sentinel = f"{jobs_dir}/tfds/tfds_{tfds_id}.done" + + # Use a scratch directory for TFDS generation to avoid IO bottleneck on shared storage + job_scratch_dir = os.path.join(scratch_root, "tfds_tmp", tfds_id) + + version = mapping.get("version") + tfds_build_cmd = f"tfds build {builder_path} --config {config_id} --data_dir {job_scratch_dir} --manual_dir {manual_dir} --overwrite" + + cmd = f""" +export PYTHONPATH=$(pwd):$PYTHONPATH +export KERAS_BACKEND=torch +hostname +{f'export TFDS_VERSION={version}' if version else ''} +env + +echo "Building TFDS for {builder_path} config {config_id}" +echo "Manual dir: {manual_dir}" +echo "Scratch dir: {job_scratch_dir}" + +mkdir -p {job_scratch_dir} + +# Ensure cleanup on exit, even if the job fails +cleanup() {{ + if [ ! -z "{job_scratch_dir}" ] && [ "{job_scratch_dir}" != "{scratch_root}" ]; then + echo "Cleaning up scratch directory {job_scratch_dir}" + rm -Rf {job_scratch_dir} + fi +}} +trap cleanup EXIT + +{tfds_build_cmd} + +echo "Copying from {job_scratch_dir} to {tfds_root_dir}" +cp -r {job_scratch_dir}/* {tfds_root_dir}/ +""" + write_bash_script(tfds_script_path, cmd) + + tfds_sentinels.append(tfds_sentinel) + if "tfds" in req_steps: + final_targets.append(tfds_sentinel) + + input_sentinels_str = ",\n ".join([f'"{s}"' for s in all_sample_post_sentinels.get(sample_key, [])]) + tfds_rule_input = "" + if "post" in req_steps: + tfds_rule_input = f"\n input:\n {input_sentinels_str}" + + rules_content += f""" +rule tfds_{tfds_id}:{tfds_rule_input} + output: + "{tfds_sentinel}" + resources: + mem_mb_per_cpu={mem_tfds}, + slurm_partition="{cpu_partition}", + runtime="{cpu_runtime}" + container: + "{main_container_img}" + shell: + "{tfds_script_path} && touch {{output}}" +""" + + # ------------------------------------------------------------------------- + # PART 3: Model Training + # ------------------------------------------------------------------------- + if args.model: + if args.model not in spec["models"]: + raise ValueError(f"Model {args.model} not found in {SPEC_FILE}") + + ensure_dir(f"{jobs_dir}/train") + + model_spec = spec["models"][args.model] + gpu_count = model_spec.get("gpus", 0) + gpu_type = model_spec.get("gpu_type", "") + mem_per_gpu_mb = model_spec.get("mem_per_gpu_mb", 8000) + gpu_partition = model_spec.get("slurm_partition", "gpu") + gpu_runtime = model_spec.get("slurm_runtime", "120m") + + exp_name = f"{args.model}_{args.production}" + + train_script_path = f"{jobs_dir}/train/train_{exp_name}.sh" + train_sentinel = f"{jobs_dir}/train/train_{exp_name}.done" + + train_cmd = f"python3 mlpf/pipeline.py --spec-file {SPEC_FILE} --model-name {args.model} --production-name {args.production} train" + + cmd = f""" +export PYTHONPATH=$(pwd):$PYTHONPATH +export TFDS_DATA_DIR={tfds_root_dir} +env +nvidia-smi +{train_cmd} +""" + write_bash_script(train_script_path, cmd) + + input_sentinels_str = " ,\n ".join([f'"{s}"' for s in tfds_sentinels]) + train_rule_input = "" + if "tfds" in req_steps: + train_rule_input = f"\n input:\n {input_sentinels_str}" + + # snakemake rule names cannot contain hyphens + rule_model_name = args.model.replace("-", "_") + + # Constructing the resources string + resources_str = f'mem_mb_per_cpu={mem_train}, slurm_partition="{gpu_partition}", runtime="{gpu_runtime}"' + if gpu_count > 0: + if gpu_type: + resources_str += f',gres="gpu:{gpu_type}:{gpu_count}"' + else: + resources_str += f", gpu={gpu_count}" + if mem_per_gpu_mb > 0 and gpu_count > 0: + resources_str += f",mem_per_gpu={mem_per_gpu_mb}" + + if "train" in req_steps: + final_targets.append(train_sentinel) + + rules_content += f""" +rule train_{rule_model_name}:{train_rule_input} + output: + "{train_sentinel}" + resources: + {resources_str} + container: + "{main_container_img}" + shell: + "{train_script_path} && touch {{output}}" +""" + + elif "train" in req_steps: + raise ValueError("A model must be specified with --model for the 'train' step.") + + # ------------------------------------------------------------------------- + # Finalize Snakefile + # ------------------------------------------------------------------------- + + def fmt_list(lst): + return "[" + ", ".join([f'"{x}"' for x in lst]) + "]" + + snakefile_content += " " + fmt_list(final_targets) + "\n" + snakefile_content += rules_content + + snakefile_path = f"{jobs_dir}/Snakefile" + with open(snakefile_path, "w") as f: + f.write(snakefile_content) + + print(f"Generated Snakemake workflow in {snakefile_path}") + print(f"Generated {len(final_targets)} target jobs.") + print(f'Run with: snakemake --snakefile {snakefile_path} --cores 1 --use-apptainer --apptainer-args "{bind_args} --nv"') + + +if __name__ == "__main__": + main() diff --git a/mlpf/utils.py b/mlpf/utils.py index 82ba15d99..883720cca 100644 --- a/mlpf/utils.py +++ b/mlpf/utils.py @@ -1,10 +1,32 @@ import datetime import logging import platform +import re +import yaml from pathlib import Path from comet_ml import OfflineExperiment, Experiment # isort:skip +def load_spec(spec_file): + with open(spec_file, "r") as f: + spec = yaml.safe_load(f) + return spec + + +def resolve_path(path, spec): + # Simple recursive substitution for ${...} + def replace(match): + key_path = match.group(1).split(".") + val = spec + for k in key_path: + val = val.get(k) + if val is None: + return match.group(0) # fail gracefully + return str(val) + + return re.sub(r"\$\{(.+?)\}", replace, path) + + def create_experiment_dir(prefix=None, suffix=None, experiments_dir="experiments"): if prefix is None: train_dir = Path(experiments_dir) / datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f") diff --git a/notebooks/clic/clic-target-plots.ipynb b/notebooks/clic/clic-target-plots.ipynb index d684e5635..3460a7cde 100644 --- a/notebooks/clic/clic-target-plots.ipynb +++ b/notebooks/clic/clic-target-plots.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "651a0557-169b-4fd5-b723-f5a0fe790d14", "metadata": {}, "outputs": [], @@ -24,13 +24,13 @@ "mplhep.style.use(mplhep.style.CMS)\n", "\n", "import sys\n", - "sys.path.append(\"mlpf\")\n", + "sys.path.append(\"../../mlpf\")\n", "from plotting.plot_utils import pid_to_text, EVALUATION_DATASET_NAMES, save_img, EXPERIMENT_LABELS, labels, sample_label, med_iqr, compute_jet_ratio" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "574605f7-0371-4ddf-b709-e87c32fad93d", "metadata": {}, "outputs": [], @@ -70,48 +70,92 @@ "\n", " for fi in tqdm.tqdm(filelist):\n", " dd = awkward.from_parquet(fi)\n", - " yvals.append(dd)\n", + " yvals.append([{k: dd[k] for k in dd.fields}])\n", " filenames.append(fi)\n", "\n", " data = awkward.concatenate(yvals, axis=0)\n", - " X = data[\"inputs\"]\n", - "\n", - " yvals = {}\n", - " for typ in [\"target\", \"cand\", \"pred\"]:\n", - " for k in data[\"particles\"][typ].fields:\n", - " yvals[\"{}_{}\".format(typ, k)] = data[\"particles\"][typ][k]\n", - "\n", - " for typ in [\"target\", \"cand\", \"pred\"]:\n", - " # Compute phi, px, py, pz\n", - " yvals[typ + \"_phi\"] = np.arctan2(yvals[typ + \"_sin_phi\"], yvals[typ + \"_cos_phi\"])\n", - " yvals[typ + \"_px\"] = yvals[typ + \"_pt\"] * yvals[typ + \"_cos_phi\"]\n", - " yvals[typ + \"_py\"] = yvals[typ + \"_pt\"] * yvals[typ + \"_sin_phi\"]\n", - " yvals[typ + \"_pz\"] = yvals[typ + \"_pt\"] * np.sinh(yvals[typ + \"_eta\"])\n", - "\n", - " for typ in [\"gen\", \"cand\", \"pred\", \"target\"]:\n", - " # Get the jet vectors\n", - " jetvec = vector.awk(data[\"jets\"][typ])\n", - " jetvec = awkward.Array(jetvec, with_name=\"Momentum4D\")\n", - " for k in [\"pt\", \"eta\", \"phi\", \"energy\"]:\n", - " yvals[\"jets_{}_{}\".format(typ, k)] = getattr(jetvec, k)\n", - "\n", - " for typ in [\"target\", \"cand\", \"pred\"]:\n", - " for val in [\"pt\", \"eta\", \"sin_phi\", \"cos_phi\", \"energy\"]:\n", - " yvals[\"{}_{}\".format(typ, val)] = yvals[\"{}_{}\".format(typ, val)] * (yvals[\"{}_cls_id\".format(typ)] != 0)\n", - "\n", - " yvals.update(compute_jet_ratio(data, yvals))\n", - " yvals[\"gen_met\"] = data[\"genmet\"]\n", - " return yvals, X, filenames" + " # X = data[\"inputs\"]\n", + "\n", + " # yvals = {}\n", + " # for typ in [\"target\", \"cand\", \"pred\"]:\n", + " # for k in data[\"particles\"][typ].fields:\n", + " # yvals[\"{}_{}\".format(typ, k)] = data[\"particles\"][typ][k]\n", + "\n", + " # for typ in [\"target\", \"cand\", \"pred\"]:\n", + " # # Compute phi, px, py, pz\n", + " # yvals[typ + \"_phi\"] = np.arctan2(yvals[typ + \"_sin_phi\"], yvals[typ + \"_cos_phi\"])\n", + " # yvals[typ + \"_px\"] = yvals[typ + \"_pt\"] * yvals[typ + \"_cos_phi\"]\n", + " # yvals[typ + \"_py\"] = yvals[typ + \"_pt\"] * yvals[typ + \"_sin_phi\"]\n", + " # yvals[typ + \"_pz\"] = yvals[typ + \"_pt\"] * np.sinh(yvals[typ + \"_eta\"])\n", + "\n", + " # for typ in [\"gen\", \"cand\", \"pred\", \"target\"]:\n", + " # # Get the jet vectors\n", + " # jetvec = vector.awk(data[\"jets\"][typ])\n", + " # jetvec = awkward.Array(jetvec, with_name=\"Momentum4D\")\n", + " # for k in [\"pt\", \"eta\", \"phi\", \"energy\"]:\n", + " # yvals[\"jets_{}_{}\".format(typ, k)] = getattr(jetvec, k)\n", + "\n", + " # for typ in [\"target\", \"cand\", \"pred\"]:\n", + " # for val in [\"pt\", \"eta\", \"sin_phi\", \"cos_phi\", \"energy\"]:\n", + " # yvals[\"{}_{}\".format(typ, val)] = yvals[\"{}_{}\".format(typ, val)] * (yvals[\"{}_cls_id\".format(typ)] != 0)\n", + "\n", + " # yvals.update(compute_jet_ratio(data, yvals))\n", + " # yvals[\"gen_met\"] = data[\"genmet\"]\n", + " return data" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "58cbb4ea-3c74-4999-b7db-0a5b15a25a20", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "path ../../data/cld/*.parquet\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 64.12it/s]\n" + ] + } + ], + "source": [ + "data = load_eval_data(\"../../data/cld/*.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "3123f9f5-142a-472f-bd81-95a852b5c4dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['X_track',\n", + " 'X_cluster',\n", + " 'ytarget_track',\n", + " 'ytarget_cluster',\n", + " 'ycand_track',\n", + " 'ycand_cluster',\n", + " 'genmet',\n", + " 'genjet',\n", + " 'targetjet']" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "yvals, X, _ = load_eval_data(\"experiments/cld_evaluation_parquets/*.parquet\")" + "data.fields" ] }, { @@ -515,7 +559,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.10" + "version": "3.11.14" } }, "nbformat": 4, diff --git a/notebooks/collideml.ipynb b/notebooks/collideml.ipynb new file mode 100644 index 000000000..1c6b856cc --- /dev/null +++ b/notebooks/collideml.ipynb @@ -0,0 +1,423 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aea76cc2-37e6-43ea-b8fb-a294755a67ff", + "metadata": {}, + "outputs": [], + "source": [ + "import awkward\n", + "import numpy as np\n", + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import particle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "989dd250-2a1a-4e8f-a4ae-bd2b2be443c8", + "metadata": {}, + "outputs": [], + "source": [ + "from particle import Particle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a8e0e3-cf8d-47ab-a831-8c925d7e0259", + "metadata": {}, + "outputs": [], + "source": [ + "Particle.from_pdgid(311).name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0dd25e6a-7716-4a09-aeae-59f8f3e49e25", + "metadata": {}, + "outputs": [], + "source": [ + "particles = awkward.from_parquet(\"../data/particles.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50d7693e-9ded-467f-bfa2-f5719df941ef", + "metadata": {}, + "outputs": [], + "source": [ + "calohits = awkward.from_parquet(\"../data/calohits.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "313b6433-4851-4fff-89c8-76f73c8be1b1", + "metadata": {}, + "outputs": [], + "source": [ + "particles[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7780e422-1e08-48b3-b534-5a52a634ef56", + "metadata": {}, + "outputs": [], + "source": [ + "calohits[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d140b45-f533-4132-8418-29deed3d5b8d", + "metadata": {}, + "outputs": [], + "source": [ + "import networkx as nx\n", + "def gen_to_graph(gen_features):\n", + " g = nx.DiGraph()\n", + " for igp in gen_features[\"particle_id\"]:\n", + " g.add_node(igp)\n", + "\n", + " for igen, parent in zip(gen_features[\"particle_id\"], gen_features[\"parent_id\"]):\n", + " if igen in g.nodes and parent in g.nodes:\n", + " g.add_edge(parent, igen)\n", + " return g\n", + "\n", + "\n", + "def color_node(st):\n", + " if st:\n", + " return \"blue\"\n", + " else:\n", + " return \"red\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7bab162-9baf-40b2-97f2-63976171b415", + "metadata": {}, + "outputs": [], + "source": [ + "iev = 2\n", + "gen_features = particles[iev]\n", + "calo_features = calohits[iev]\n", + "g = gen_to_graph(gen_features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07c7fa72-f604-4af8-9d82-424a3b900993", + "metadata": {}, + "outputs": [], + "source": [ + "map_to_parent = {}\n", + "for k, v in zip(gen_features[\"particle_id\"], gen_features[\"parent_id\"]):\n", + " map_to_parent[k] = v\n", + "\n", + "map_to_primary = {}\n", + "for k, v in zip(gen_features[\"particle_id\"], gen_features[\"primary\"]):\n", + " map_to_primary[k] = v\n", + "\n", + "map_to_idx = {}\n", + "for v, k in enumerate(gen_features[\"particle_id\"]):\n", + " map_to_idx[k] = v" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59a9fd91-a68e-4678-beff-4dbccdf31f9c", + "metadata": {}, + "outputs": [], + "source": [ + "#hit idx\n", + "genparticle_to_hit_matrix_coo0 = np.repeat(\n", + " np.arange(len(calohits[iev][\"contrib_particle_ids\"])),\n", + " awkward.count(calohits[iev][\"contrib_particle_ids\"], axis=1)\n", + ")\n", + "\n", + "#gen idx\n", + "genparticle_to_hit_matrix_coo1 = awkward.flatten(calohits[iev][\"contrib_particle_ids\"])\n", + "genparticle_to_hit_matrix_w = awkward.flatten(calohits[iev][\"contrib_energies\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb3bfdea-d296-4d49-bde5-b06a3591f256", + "metadata": {}, + "outputs": [], + "source": [ + "len(g.nodes), len(gen_features[\"pdg_id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "185c015b-4fb6-4dec-8301-638bb1465a3f", + "metadata": {}, + "outputs": [], + "source": [ + "node_color = [color_node(st) for st in gen_features[\"primary\"]]\n", + "node_size = [np.clip(10 * e, 1, 100) for e in gen_features[\"energy\"]]\n", + "alpha = [1.0 if n in genparticle_to_hit_matrix_coo1 else 0.2 for n in g.nodes]\n", + "\n", + "labels = {n: \"{}\".format(Particle.from_pdgid(pid).name) for n, pid in zip(g.nodes, gen_features[\"pdg_id\"])}\n", + "pos = nx.nx_agraph.graphviz_layout(g, prog=\"circo\")\n", + "fig = plt.figure(figsize=(20, 20))\n", + "nx.draw_networkx_nodes(g, pos, node_color=node_color, node_size=node_size, alpha=alpha)\n", + "nx.draw_networkx_edges(\n", + " g,\n", + " pos,\n", + " arrowsize=1,\n", + " width=0.5,\n", + " alpha=0.2,\n", + " node_size=node_size,\n", + ")\n", + "nx.draw_networkx_labels(g, pos, labels=labels, font_size=2)\n", + "plt.savefig(\"graph.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f21a9aed-8c58-487d-8905-7aed045fb39b", + "metadata": {}, + "outputs": [], + "source": [ + "def get_hit_labels(hit_idx, gen_idx, weights, max_hits=None):\n", + " # Initialize an array to store labels for each hit\n", + " if not max_hits:\n", + " max_hits = int(np.max(hit_idx)) + 1\n", + " hit_labels = np.full(max_hits, -1, dtype=int) # Default label is -1 (unassigned)\n", + " hit_label_weights = dict() # To keep track of the highest weight for each hit\n", + "\n", + " # Iterate through the sparse COO matrix data\n", + " for h_idx, g_idx, weight in zip(hit_idx, gen_idx, weights):\n", + " if hit_labels[h_idx] == -1 or weight > hit_label_weights[h_idx]:\n", + " hit_labels[h_idx] = g_idx\n", + " hit_label_weights[h_idx] = weight\n", + "\n", + " # hit_labels now contains the genparticle index for each hit\n", + "\n", + " return hit_labels\n", + "\n", + "def get_hit_labels_p(hit_idx, gen_idx, weights, map_to_parent, map_to_primary, max_hits=None):\n", + " # Initialize an array to store labels for each hit\n", + " if not max_hits:\n", + " max_hits = int(np.max(hit_idx)) + 1\n", + " hit_labels = np.full(max_hits, -1, dtype=int) # Default label is -1 (unassigned)\n", + " hit_label_weights = dict() # To keep track of the highest weight for each hit\n", + " \n", + " for h_idx, g_idx, weight in zip(hit_idx, gen_idx, weights):\n", + " if hit_labels[h_idx] == -1 or weight > hit_label_weights[h_idx]:\n", + " hit_labels[h_idx] = g_idx\n", + " hit_label_weights[h_idx] = weight\n", + "\n", + " #recurse up the gen particle tree to the first primary particle\n", + " hit_labels_p = hit_labels.copy()\n", + " for h_idx, g_idx in enumerate(hit_labels):\n", + " while not map_to_primary[g_idx]:\n", + " g_idx = map_to_parent[g_idx]\n", + " hit_labels_p[h_idx] = g_idx\n", + "\n", + " return hit_labels_p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2def7e94-52fd-4ade-a367-4c610d34231e", + "metadata": {}, + "outputs": [], + "source": [ + "max_hits = len(calo_features[\"x\"])\n", + "hit_labels = get_hit_labels(genparticle_to_hit_matrix_coo0, genparticle_to_hit_matrix_coo1, genparticle_to_hit_matrix_w, max_hits=max_hits)\n", + "hit_labels_p = get_hit_labels_p(genparticle_to_hit_matrix_coo0, genparticle_to_hit_matrix_coo1, genparticle_to_hit_matrix_w, map_to_parent, map_to_primary, max_hits=max_hits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f38c75d-d59d-4283-89b2-ccccfbfc2317", + "metadata": {}, + "outputs": [], + "source": [ + "calo_hit_positions = np.array(awkward.to_numpy(np.stack([\n", + " calo_features[\"x\"],\n", + " calo_features[\"y\"],\n", + " calo_features[\"z\"],\n", + "]))).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be92bcdf-f9f4-4b7c-ba31-ce7128328a8b", + "metadata": {}, + "outputs": [], + "source": [ + "len(calo_features[\"x\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bc0c0ff-8946-457e-8368-4e0eac9d984d", + "metadata": {}, + "outputs": [], + "source": [ + "import plotly.graph_objects as go\n", + "import random\n", + "\n", + "def plot_calo_hits_colored_by_genparticle(\n", + " hit_energies, hit_labels, calo_hit_positions, title=\"Calorimeter hits colored by genparticle\"\n", + "):\n", + " # Assign unique colors to each genparticle ID\n", + " unique_ids = np.unique(hit_labels)\n", + " colors = plt.cm.tab10(np.linspace(0, 1, len(unique_ids)))\n", + " color_map = {\n", + " gen_id: f\"rgba({int(color[0]*255)}, {int(color[1]*255)}, {int(color[2]*255)}, {color[3]})\"\n", + " for gen_id, color in zip(unique_ids, colors)\n", + " }\n", + "\n", + " # random color map\n", + " def random_color():\n", + " \"\"\"Generate a random color in RGBA format.\"\"\"\n", + " return f\"rgba({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)}, 1)\"\n", + "\n", + " random_color_map = {gen_id: random_color() for gen_id in unique_ids}\n", + " random_color_map[-1] = \"rgba(0,0,0)\"\n", + "\n", + " # Create traces for each genparticle ID\n", + " traces = []\n", + " for gen_id in unique_ids:\n", + " if gen_id != -1:\n", + " mask = hit_labels == gen_id # Create a mask for hits belonging to the current genparticle ID\n", + " traces.append(\n", + " go.Scatter3d(\n", + " x=calo_hit_positions[mask, 0],\n", + " y=calo_hit_positions[mask, 1],\n", + " z=calo_hit_positions[mask, 2],\n", + " mode=\"markers\",\n", + " marker=dict(\n", + " #size=2*np.exp(0.1*np.log(hit_energies)+1)+1,\n", + " size=2,\n", + " color=random_color_map[gen_id]\n", + " ),\n", + " name=f\"gp {gen_id}\",\n", + " )\n", + " )\n", + "\n", + " # Customize the axis names\n", + " layout = go.Layout(\n", + " scene=dict(\n", + " xaxis=dict(title=\"X\"),\n", + " yaxis=dict(title=\"Y\"),\n", + " zaxis=dict(title=\"Z\"),\n", + " camera=dict(\n", + " up=dict(x=1, y=0, z=0), # Sets the orientation of the camera\n", + " center=dict(x=0, y=0, z=0), # Sets the center point of the plot\n", + " eye=dict(x=0, y=0, z=2.1), # Sets the position of the camera\n", + " ),\n", + " ),\n", + " showlegend=False,\n", + " width=700,\n", + " height=700,\n", + " title=title,\n", + " )\n", + "\n", + " # Create the figure and display the plot\n", + " fig = go.Figure(data=traces, layout=layout)\n", + " fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93c4f813-776f-4560-ac5f-def29fb3393d", + "metadata": {}, + "outputs": [], + "source": [ + "plot_calo_hits_colored_by_genparticle(calo_features[\"total_energy\"], hit_labels_p, calo_hit_positions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a05e2901-d713-4a98-8275-504578c27696", + "metadata": {}, + "outputs": [], + "source": [ + "ehits = []\n", + "egen = []\n", + "for h in np.unique(hit_labels_p):\n", + " msk = hit_labels_p == h\n", + " energy_hits = np.sum(calo_features[\"total_energy\"][msk])\n", + " energy_gen = gen_features[\"energy\"][map_to_idx[h]]\n", + " ehits.append(energy_hits)\n", + " egen.append(energy_gen)\n", + "ehits = np.array(ehits)\n", + "egen = np.array(egen)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53be52a8-2ed8-475f-930f-84a74d4b1d1a", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(5,5))\n", + "plt.scatter(ehits, egen);\n", + "\n", + "plt.xscale(\"log\")\n", + "plt.yscale(\"log\")\n", + "plt.xlim(1e-3, 1e3)\n", + "plt.ylim(1e-3, 1e3)\n", + "plt.xlabel(\"$\\sum E_{hits}$\")\n", + "plt.ylabel(\"$E_{gen}^{primary}$\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa5b1eb5-acd2-4b62-a235-0aba6becca85", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/particleflow_spec.yaml b/particleflow_spec.yaml new file mode 100644 index 000000000..40a54961b --- /dev/null +++ b/particleflow_spec.yaml @@ -0,0 +1,499 @@ +# ParticleFlow Project Unified Configuration Specification +# This file serves as the single source of truth for dataset generation, +# processing pipeline, and model definitions. + +project: + name: "particleflow" + description: "Machine Learning for Particle Flow Reconstruction" + container: "/home/software/singularity/pytorch.simg:2026-02-04" + bind_mounts: + - "/local" + - "/cvmfs" + - "/scratch/local" + - "/scratch/persistent" + paths: + storage_root: "/local/joosep/mlpf/" + scratch_root: "/scratch/local/joosep/" + +# ----------------------------------------------------------------------------- +# Dataset Productions +# Defines how raw data is generated (or found), processed, and registered in TFDS. +# Each production follows a unified directory structure: +# $workspace_dir/gen - Raw ROOT/EDM files from simulation +# $workspace_dir/post - Postprocessed intermediate files (parquet/pkl) +# $workspace_dir/tfds - Final TensorFlow Datasets +# ----------------------------------------------------------------------------- +productions: + # CMS Production Campaign + cms_2025_main: + type: "cms" + comment: "Main production with CMSSW_15_0_5" + workspace_dir: "${project.paths.storage_root}/cms/20260204_cmssw_15_0_5_117d32" + config_dir: "/home/joosep/particleflow/" + gen_container: "/cvmfs/singularity.opensciencegrid.org/cmssw/cms:rhel8-x86_64" + + # Common execution environment for this campaign + environment: + cmssw_release: "CMSSW_15_0_5" + scram_arch: "el8_amd64_gcc12" + + memory: + gen: 6000 + post: 4000 + tfds: 8000 + + slurm_partition: "main" + slurm_runtime: "24h" + + # Step 1: Generation + # Defines the MC samples to be generated or processed + samples: + ttbar_pu: + process_name: "TTbar_13p6TeV_TuneCUETP8M1_cfi" + gen_script: "mlpf/data/cms/genjob_pu.sh" + seed_range: [100000, 100210] + events_per_job: 50 + output_subdir: "pu55to75" + pu_type: "pu55to75" + + ztt_pu: + process_name: "ZTT_All_hadronic_13p6TeV_TuneCUETP8M1_cfi" + gen_script: "mlpf/data/cms/genjob_pu.sh" + seed_range: [200000, 200210] + events_per_job: 100 + output_subdir: "pu55to75" + pu_type: "pu55to75" + + qcd_pu: + process_name: "QCDForPF_13p6TeV_TuneCUETP8M1_cfi" + gen_script: "mlpf/data/cms/genjob_pu.sh" + seed_range: [300000, 300210] + events_per_job: 100 + output_subdir: "pu55to75" + pu_type: "pu55to75" + + qcd_val: + process_name: "QCDForPF_13p6TeV_TuneCUETP8M1_cfi" + gen_script: "mlpf/data/cms/genjob_pu.sh" + seed_range: [400000, 400210] + events_per_job: 100 + output_subdir: "pu55to75_val" + pu_type: "pu55to75" + copy_step2: true + + ttbar_nopu: + process_name: "TTbar_13p6TeV_TuneCUETP8M1_cfi" + gen_script: "mlpf/data/cms/genjob_pu.sh" + seed_range: [800000, 800210] + events_per_job: 100 + output_subdir: "nopu" + pu_type: "nopu" + + qcd_nopu: + process_name: "QCDForPF_13p6TeV_TuneCUETP8M1_cfi" + gen_script: "mlpf/data/cms/genjob_pu.sh" + seed_range: [1000000, 1000210] + events_per_job: 100 + output_subdir: "nopu" + pu_type: "nopu" + + ztt_nopu: + process_name: "ZTT_All_hadronic_13p6TeV_TuneCUETP8M1_cfi" + gen_script: "mlpf/data/cms/genjob_pu.sh" + seed_range: [1100000, 1100210] + events_per_job: 100 + output_subdir: "nopu" + pu_type: "nopu" + + # Step 2: Postprocessing + # Converts ROOT/EDM output to intermediate format (pkl/parquet) for ML + postprocessing: + script: "mlpf/data/cms/postprocessing2.py" + args: + save_full_graph: false + num_events: -1 # process all + + # Step 3: TFDS Conversion + # Mapping samples to TensorFlow Dataset Builders + tfds_mapping: + ttbar_pu: + builder_path: "mlpf/heptfds/cms_pf/ttbar" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + qcd_pu: + builder_path: "mlpf/heptfds/cms_pf/qcd" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ztt_pu: + builder_path: "mlpf/heptfds/cms_pf/ztt" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ttbar_nopu: + builder_path: "mlpf/heptfds/cms_pf/ttbar_nopu" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + qcd_nopu: + builder_path: "mlpf/heptfds/cms_pf/qcd_nopu" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ztt_nopu: + builder_path: "mlpf/heptfds/cms_pf/ztt_nopu" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + # CLD Production + cld_2025_edm4hep: + type: "key4hep" + comment: "CLD production with Key4Hep" + workspace_dir: "${project.paths.storage_root}/cld/v1.2.3_key4hep_2025-05-29_CLD_f1e8f9" + config_dir: "/home/joosep/particleflow/mlpf/data/key4hep/gen/cld/CLDConfig" + gen_container: "/home/software/singularity/alma9.simg" + + memory: + gen: 4000 + post: 4000 + tfds: 8000 + + slurm_partition: "main" + slurm_runtime: "120m" + + samples: + zz: + process_name: "p8_ee_ZZ_ecm365" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [100000, 101010] + events_per_job: 100 + zz_tautau_365: + process_name: "p8_ee_ZZ_tautau_ecm365" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [100000, 101010] + events_per_job: 100 + zz_tautau_240: + process_name: "p8_ee_ZZ_tautau_ecm240" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [100000, 101010] + events_per_job: 100 + zh_tautau_365: + process_name: "p8_ee_ZH_Htautau_ecm365" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [200000, 201010] + events_per_job: 100 + zh_tautau_240: + process_name: "p8_ee_ZH_Htautau_ecm240" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [200000, 201010] + events_per_job: 100 + ttbar: + process_name: "p8_ee_ttbar_ecm365" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [300000, 301010] + events_per_job: 100 + ww_fullhad: + process_name: "p8_ee_WW_ecm365" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [400000, 401010] + events_per_job: 100 + qq: + process_name: "p8_ee_qq_ecm365" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [500000, 501010] + events_per_job: 100 + z_qq: + process_name: "p8_ee_Z_qq_ecm91" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [600000, 601010] + events_per_job: 100 + z_tautau: + process_name: "p8_ee_Z_tautau_ecm91" + gen_script: "mlpf/data/key4hep/gen/cld/run_sim.sh" + seed_range: [700000, 701010] + events_per_job: 100 + + postprocessing: + script: "mlpf/data/key4hep/postprocessing.py" + args: {} + + tfds_mapping: + ttbar: + builder_path: "mlpf/heptfds/cld_pf_edm4hep/ttbar" + version: "2.6.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ww_fullhad: + builder_path: "mlpf/heptfds/cld_pf_edm4hep/ww" + version: "2.6.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + zz: + builder_path: "mlpf/heptfds/cld_pf_edm4hep/zz" + version: "2.6.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + qq: + builder_path: "mlpf/heptfds/cld_pf_edm4hep/qq" + version: "2.6.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + # CLIC Production + clic_2025_edm4hep: + type: "key4hep" + comment: "CLIC production with Key4Hep" + workspace_dir: "${project.paths.storage_root}/clic/v1.2.4_key4hep_2025-05-29_CLIC_819e4e" + config_dir: "/home/joosep/particleflow/mlpf/data/key4hep/gen/clic" + gen_container: "/home/software/singularity/alma9.simg" + + memory: + gen: 4000 + post: 4000 + tfds: 8000 + + slurm_partition: "main" + slurm_runtime: "240m" + + samples: + ttbar: + process_name: "p8_ee_ttbar_ecm380" + gen_script: "mlpf/data/key4hep/gen/clic/run_sim.sh" + seed_range: [300000, 301010] + events_per_job: 100 + ww_fullhad: + process_name: "p8_ee_WW_fullhad_ecm380" + gen_script: "mlpf/data/key4hep/gen/clic/run_sim.sh" + seed_range: [400000, 401010] + events_per_job: 100 + qq: + process_name: "p8_ee_qq_ecm380" + gen_script: "mlpf/data/key4hep/gen/clic/run_sim.sh" + seed_range: [500000, 501010] + events_per_job: 100 + + postprocessing: + script: "mlpf/data/key4hep/postprocessing.py" + args: {} + + tfds_mapping: + ttbar: + builder_path: "mlpf/heptfds/clic_pf_edm4hep/ttbar" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ww_fullhad: + builder_path: "mlpf/heptfds/clic_pf_edm4hep/ww_fullhad" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + qq: + builder_path: "mlpf/heptfds/clic_pf_edm4hep/qq" + version: "3.0.0" + config_ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +# ----------------------------------------------------------------------------- +# Model Configurations +# Defines training recipes. Can inherit from defaults or other models. +# ----------------------------------------------------------------------------- +models: + defaults: &model_defaults + backend: pytorch + gpus: 1 + gpu_type: l40 + mem_per_gpu_mb: 100000 + slurm_partition: "gpu" + slurm_runtime: "48h" + optimizer: adamw + lr_schedule: cosinedecay + dtype: bfloat16 + + # Example: CMS Model (GNN + Attention) + pyg-cms-v1: + <<: *model_defaults + dataset: cms + gpu_batch_multiplier: 4 + + # Dataset Selection + train_datasets: + physical_pu: + batch_size: 1 + samples: + - name: "cms_pf_ttbar" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_qcd" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_ztt" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + physical_nopu: + batch_size: 16 + samples: + - name: "cms_pf_ttbar_nopu" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_qcd_nopu" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_ztt_nopu" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + validation_datasets: + physical_pu: + batch_size: 1 + samples: + - name: "cms_pf_ttbar" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_qcd" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_ztt" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + physical_nopu: + batch_size: 16 + samples: + - name: "cms_pf_ttbar_nopu" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_qcd_nopu" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_ztt_nopu" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + test_datasets: + - name: "cms_pf_ttbar" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_qcd" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cms_pf_ztt" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + # Hyperparameters + hyperparameters: + batch_size: 1 + lr: 0.0004 + + # Architecture Definition + architecture: + type: "attention" + input_encoding: "split" + attention: + num_convs: 3 + head_dim: 32 + num_heads: 32 + + # Example: CLD Model + pyg-cld-v1: + <<: *model_defaults + dataset: cld + gpu_batch_multiplier: 128 + + hyperparameters: + batch_size: 1 + lr: 0.0005 + + architecture: + type: "attention" + input_encoding: "split" + attention: + num_convs: 3 + head_dim: 32 + num_heads: 32 + + # Dataset Selection + train_datasets: + physical: + batch_size: 1 + samples: + - name: "cld_edm_ttbar_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cld_edm_ww_fullhad_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cld_edm_qq_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + validation_datasets: + physical: + batch_size: 1 + samples: + - name: "cld_edm_ttbar_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cld_edm_ww_fullhad_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cld_edm_qq_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + test_datasets: + - name: "cld_edm_ttbar_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cld_edm_ww_fullhad_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "cld_edm_qq_pf" + version: "2.6.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + # CLIC Model + pyg-clic-v1: + <<: *model_defaults + dataset: clic + gpu_batch_multiplier: 128 + + hyperparameters: + batch_size: 1 + lr: 0.0005 + + architecture: + type: "attention" + input_encoding: "split" + attention: + num_convs: 3 + head_dim: 32 + num_heads: 32 + + # Dataset Selection + train_datasets: + physical: + batch_size: 1 + samples: + - name: "clic_edm_ttbar_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "clic_edm_ww_fullhad_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "clic_edm_qq_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + validation_datasets: + physical: + batch_size: 1 + samples: + - name: "clic_edm_ttbar_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "clic_edm_ww_fullhad_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "clic_edm_qq_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + + test_datasets: + - name: "clic_edm_ttbar_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "clic_edm_ww_fullhad_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + - name: "clic_edm_qq_pf" + version: "3.0.0" + splits: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] diff --git a/scripts/local_test_torch.sh b/scripts/local_test_torch.sh index f27d13c56..4d98e20e8 100755 --- a/scripts/local_test_torch.sh +++ b/scripts/local_test_torch.sh @@ -14,9 +14,9 @@ python -m pytest tests/test_interleaved_iterator.py python -m pytest tests/test_lr_schedule.py #create data directories -rm -Rf local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi -mkdir -p local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/root -cd local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/root +rm -Rf local_test_data/TTbar_13p6TeV_TuneCUETP8M1_cfi +mkdir -p local_test_data/TTbar_13p6TeV_TuneCUETP8M1_cfi/root +cd local_test_data/TTbar_13p6TeV_TuneCUETP8M1_cfi/root #Only CMS-internal use is permitted by CMS rules! Do not use these MC simulation files otherwise! wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/cms/20240823_simcluster/pu55to75/TTbar_14TeV_TuneCUETP8M1_cfi/root/pfntuple_100000.root @@ -25,13 +25,12 @@ wget -q --no-check-certificate -nc https://jpata.web.cern.ch/jpata/mlpf/cms/2024 cd ../../.. #Create the ntuples using postprocessing2.py -rm -Rf local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/raw -mkdir -p local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/raw -for file in `\ls -1 local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/root/*.root`; do +for file in `\ls -1 local_test_data/TTbar_13p6TeV_TuneCUETP8M1_cfi/root/*.root`; do python mlpf/data/cms/postprocessing2.py \ --input $file \ - --outpath local_test_data/TTbar_14TeV_TuneCUETP8M1_cfi/raw + --outpath local_test_data/TTbar_13p6TeV_TuneCUETP8M1_cfi done +find local_test_data #create the tensorflow dataset for the last split config only tfds build mlpf/heptfds/cms_pf/ttbar --config 10 --manual_dir ./local_test_data @@ -42,7 +41,9 @@ mkdir -p experiments # Test 1: Initial training using the 'train' sub-command # -------------------------------------------------------------------------------------------- python mlpf/pipeline.py \ - --config parameters/pytorch/pyg-cms.yaml \ + --spec-file particleflow_spec.yaml \ + --model-name pyg-cms-v1 \ + --production cms_2025_main \ --data-dir ./tensorflow_datasets/ \ --prefix MLPF_test_ \ --pipeline \ @@ -66,7 +67,9 @@ export EXP_DIR=$(ls -d experiments/MLPF_test_*/) # --experiment-dir is omitted, so a new one is created. # -------------------------------------------------------------------------------------------- python mlpf/pipeline.py \ - --config parameters/pytorch/pyg-cms.yaml \ + --spec-file particleflow_spec.yaml \ + --model-name pyg-cms-v1 \ + --production cms_2025_main \ --data-dir ./tensorflow_datasets/ \ --prefix MLPF_test_ \ --pipeline \ diff --git a/scripts/tallinn/cmssw-el8.sh b/scripts/tallinn/cmssw-el8.sh index 7cc7e82c5..8d847d4a0 100755 --- a/scripts/tallinn/cmssw-el8.sh +++ b/scripts/tallinn/cmssw-el8.sh @@ -6,4 +6,4 @@ source /cvmfs/cms.cern.ch/cmsset_default.sh export UNPACKED_IMAGE=/cvmfs/singularity.opensciencegrid.org/cmssw/cms\:rhel8-x86_64/ -cmssw-el8 -B /root -B /cms -B /local -B /scratch/persistent -B /scratch/local --command-to-run $@ +cmssw-el8 -B /root -B /local -B /scratch/persistent -B /scratch/local --command-to-run $@ diff --git a/scripts/tallinn/container-python b/scripts/tallinn/container-python new file mode 100755 index 000000000..93ffd6ec2 --- /dev/null +++ b/scripts/tallinn/container-python @@ -0,0 +1,12 @@ +#!/bin/bash +CORES=${SLURM_CPUS_PER_TASK:-1} +export OMP_NUM_THREADS=$CORES +export OPENBLAS_NUM_THREADS=$CORES +export MKL_NUM_THREADS=$CORES +export VECLIB_MAXIMUM_THREADS=$CORES +export NUMEXPR_NUM_THREADS=$CORES + +FIX_DIR="/home/joosep/particleflow/scripts/tallinn/snakemake_fix" +export PYTHONPATH="$FIX_DIR:$PYTHONPATH" + +apptainer exec -B /home -B /local -B /scratch -B /cvmfs -B /etc/munge/ -B /etc/slurm/ -B /var/run/munge/ --nv /home/joosep/singularity/alma9-kbfi.simg python3.13 "$@" diff --git a/scripts/tallinn/genjob.sh b/scripts/tallinn/genjob.sh deleted file mode 100644 index cd8b2e404..000000000 --- a/scripts/tallinn/genjob.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -#SBATCH -p main -#SBATCH --mem-per-cpu=4G -#SBATCH --cpus-per-task=1 -#SBATCH -o logs/slurm-%x-%j-%N.out -set -e -set -x - -env -df -h - -WORKDIR=/scratch/$USER/${SLURM_JOB_ID} -SAMPLE=$1 -SEED=$2 -OUTDIR=/local/joosep/mlpf/gen/v3/ - -mkdir -p $WORKDIR -cd $WORKDIR - -time /home/joosep/particleflow/mlpf/data_cms/genjob.sh $SAMPLE $SEED - -#cp $WORKDIR/$SAMPLE/$SEED/pfntuple_*.root $OUTDIR/$SAMPLE/root/ -cp $WORKDIR/$SAMPLE/$SEED/pfntuple_*.pkl.bz2 $OUTDIR/$SAMPLE/raw/ - -rm -Rf $WORKDIR diff --git a/scripts/tallinn/genjob_pu.sh b/scripts/tallinn/genjob_pu.sh deleted file mode 100644 index e1387a5c2..000000000 --- a/scripts/tallinn/genjob_pu.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -#SBATCH -p main -#SBATCH --mem-per-cpu=6G -#SBATCH --cpus-per-task=1 -#SBATCH -o logs/slurm-%x-%j-%N.out - -env -df -h - -WORKDIR=/scratch/$USER/${SLURM_JOB_ID} -SAMPLE=$1 -SEED=$2 -OUTDIR=/local/joosep/mlpf/gen/v3/ - -mkdir -p $WORKDIR -cd $WORKDIR - -/home/joosep/particleflow/mlpf/data_cms/genjob_pu.sh $SAMPLE $SEED - -#cp $WORKDIR/$SAMPLE/$SEED/pfntuple_*.root $OUTDIR/$SAMPLE/root/ -cp $WORKDIR/$SAMPLE/$SEED/pfntuple_*.pkl.bz2 $OUTDIR/$SAMPLE/raw/ - -rm -Rf $WORKDIR diff --git a/scripts/tallinn/produce_snakemake.sh b/scripts/tallinn/produce_snakemake.sh new file mode 100755 index 000000000..74a61f294 --- /dev/null +++ b/scripts/tallinn/produce_snakemake.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -e + +WORKFLOW=cms_2025_main +MODEL=pyg-cms-v1 + +#WORKFLOW=cld_2025_edm4hep +#MODEL=pyg-cld-v1 + +#WORKFLOW=clic_2025_edm4hep +#MODEL=pyg-clic-v1 + +#singularity exec -B /local --env PYTHONPATH=`pwd` /home/software/singularity/pytorch.simg\:2026-02-04 python3 mlpf/produce_snakemake.py --production $WORKFLOW --steps gen,post,tfds +#./scripts/tallinn/container-python -m snakemake --executor slurm --profile tallinn -s snakemake_jobs/$WORKFLOW/Snakefile --jobs unlimited --use-apptainer --apptainer-args " -B /local -B /cvmfs -B /scratch/local" + +singularity exec -B /local --env PYTHONPATH=`pwd` /home/software/singularity/pytorch.simg\:2026-02-04 python3 mlpf/produce_snakemake.py --production $WORKFLOW --steps train --model $MODEL +./scripts/tallinn/container-python -m snakemake --executor slurm --profile tallinn -s snakemake_jobs/$WORKFLOW/Snakefile --jobs unlimited --use-apptainer --apptainer-args " -B /local -B /cvmfs -B /scratch/local --nv" diff --git a/scripts/tallinn/snakemake_fix/sitecustomize.py b/scripts/tallinn/snakemake_fix/sitecustomize.py new file mode 100644 index 000000000..c1241ef43 --- /dev/null +++ b/scripts/tallinn/snakemake_fix/sitecustomize.py @@ -0,0 +1,10 @@ +import sys +import os + +# The path to your wrapper script +wrapper = "/home/joosep/particleflow/scripts/tallinn/container-python" + +# Force sys.executable to look like the wrapper. +# This happens BEFORE Snakemake is even imported. +if os.path.exists(wrapper): + sys.executable = wrapper