diff --git a/.gitignore b/.gitignore index 87178727..3caee75e 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,6 @@ AutoDict_* # Output dirs in signal and bkg Signal/outdir_* Background/outdir_* + +# Lint and formatting +*.pylintrc \ No newline at end of file diff --git a/Background/RunBackgroundScripts.py b/Background/RunBackgroundScripts.py index d9eaf71e..c876d00e 100644 --- a/Background/RunBackgroundScripts.py +++ b/Background/RunBackgroundScripts.py @@ -1,5 +1,6 @@ # Script for running background fitting jobs for flashggFinalFit -import os, sys +import os +import sys from optparse import OptionParser from collections import OrderedDict as od @@ -7,6 +8,7 @@ from tools.submissionTools import * from commonTools import * from commonObjects import * +from CollectModels import collect_models, create_empty_json def get_options(): parser = OptionParser() @@ -15,6 +17,12 @@ def get_options(): parser.add_option('--mode', dest='mode', default='std', help="Which script to run. Options: ['fTestOnly','fTestParallel','bkgPlotsOnly']") parser.add_option('--jobOpts', dest='jobOpts', default='', help="Additional options to add to job submission. For Condor separate individual options with a colon (specify all within quotes e.g. \"option_xyz = abc+option_123 = 456\")") parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print submission files only") + parser.add_option('--fitType', dest='fitType', default='mgg', help="Fit type: mgg, mjj or 2D. (default: mgg)") + parser.add_option('--mggLow', dest='mggLow', default=100, type='int', help="Lower mgg fit range (default: 100)") + parser.add_option('--mggHigh', dest='mggHigh', default=180, type='int', help="Upper mgg fit range (default: 180)") + parser.add_option('--mjjLow', dest='mjjLow', default=80, type='int', help="Lower mjj fit range (default: 80)") + parser.add_option('--mjjHigh', dest='mjjHigh', default=190, type='int', help="Upper mjj fit range (default: 190)") + parser.add_option('--noClean', dest='noClean', default=False, action="store_true", help="Do not clean up old ROOT files and plots. (default: False)") return parser.parse_args() (opt,args) = get_options() @@ -49,6 +57,12 @@ def leave(): options['mode'] = opt.mode options['jobOpts'] = opt.jobOpts options['printOnly'] = opt.printOnly + options['fitType'] = opt.fitType + options['mggLow'] = opt.mggLow + options['mggHigh'] = opt.mggHigh + options['mjjLow'] = opt.mjjLow + options['mjjHigh'] = opt.mjjHigh + options['noClean'] = opt.noClean # Delete copy of file os.system("rm config.py") @@ -66,6 +80,32 @@ def leave(): print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING BACKGROUND SCRIPTS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") sys.exit(1) +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Remove existing root files +if options["fitType"] == "2D" and options["mode"] == "fTestParallel": + pattern = f"{bwd__}/outdir_{options['ext']}/CMS-*.root" +elif options["fitType"] == "mgg" and options["mode"] == "fTestParallel": + pattern = f"{bwd__}/outdir_{options['ext']}/CMS-HGG*.root" +elif options["fitType"] == "mjj" and options["mode"] == "fTestParallel": + pattern = f"{bwd__}/outdir_{options['ext']}/CMS-HBB*.root" +else: + print(" --> Invalid fitType. Exiting to avoid accidental deletion.") + sys.exit(1) +root_files = glob.glob(pattern) + +if len(co.bwd__) < 5: # change this number if needed + print(" --> Directory name too short. Exiting to avoid accidental deletion.") + leave() +if len(options["ext"]) == 0: + print(" --> Extension name blank. Exiting to avoid accidental deletion.") + leave() + +if len(root_files) > 0: + print(" --> Removing existing root files") + for f in root_files: + print(f" --> Removing {f}") + os.remove(f) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # If cat == auto: extract list of categories from datafile if options['cats'] == 'auto': @@ -83,6 +123,7 @@ def leave(): print(" --> Extension: %s"%options['ext']) print(" --> Category offset: %g"%options['catOffset']) print(" --> Year: %s ::: Corresponds to intLumi = %s fb^-1"%(options['year'],options['lumi'])) +print(" --> Fit type: %s"%options['fitType']) print("") print(" --> Job information:") print(" * Batch: %s"%options['batch']) @@ -97,13 +138,35 @@ def leave(): # Write submission files: style depends on batch system writeSubFiles(options) +# if options['fitType'] == "mgg" or options['fitType'] == "2D": +# writeSubFilesMgg(options) +# if options['fitType'] == "mjj" or options['fitType'] == "2D": +# writeSubFilesMjj(options) print(" --> Finished writing submission scripts") # Submit scripts to batch system if not options['printOnly']: submitFiles(options) + # if options["fitType"] == "mgg" or options['fitType'] == "2D": + # submitFilesMgg(options) + # if options["fitType"] == "mjj" or options['fitType'] == "2D": + # submitFilesMjj(options) else: print(" --> Running with printOnly option. Will not submit scripts") +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# 2D Model Construction +output_path_base = "%s/outdir_%s/"%(bwd__,options['ext']) +if options['fitType'] == "2D": + print(" --> Running 2D model construction") + if options['mode'] == "fTestParallel": + collect_models( + json_file=f"{output_path_base}/models.json", + output_path=f"{output_path_base}/CMS-2D_multipdf_{options['ext']}_%CAT.root", + ws_type="bkg-nonres", + no_clear=options['noClean'], + ) + print(" --> Finished collecting models") + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave() diff --git a/Background/runBackgroundScripts.sh b/Background/runBackgroundScripts.sh index b4f824b3..42313559 100755 --- a/Background/runBackgroundScripts.sh +++ b/Background/runBackgroundScripts.sh @@ -20,6 +20,10 @@ BATCH="" QUEUE="" YEAR="2016" CATOFFSET=0 +FITTYPE="mgg" # mgg or mjj +MASSLOW=100 +MASSHIGH=180 +BINWIDTH=2 usage(){ echo "The script runs background scripts:" @@ -43,6 +47,10 @@ echo "--isData) specified in fb^-{1} (default $DATA)) " echo "--unblind) specified in fb^-{1} (default $UNBLIND)) " echo "--batch) which batch system to use (None (''),HTCONDOR,IC) (default '$BATCH')) " echo "--queue) queue to submit jobs to (specific to batch))" +echo "--fitType) mgg or mjj (default $FITTYPE)) " +echo "--massLow) lower fit range (default $MASSLOW)) " +echo "--massHigh) upper fit range (default $MASSHIGH)) " +echo "--binWidth) bin width for plots (default $BINWIDTH)) " } @@ -50,7 +58,7 @@ echo "--queue) queue to submit jobs to (specific to batch))" # options may be followed by one colon to indicate they have a required argument -if ! options=$(getopt -u -o hi:p:f: -l help,inputFile:,procs:,flashggCats:,ext:,catOffset:,fTestOnly,pseudoDataOnly,bkgPlotsOnly,pseudoDataDat:,sigFile:,seed:,intLumi:,year:,unblind,isData,batch:,queue: -- "$@") +if ! options=$(getopt -u -o hi:p:f: -l help,inputFile:,procs:,flashggCats:,ext:,catOffset:,fTestOnly,pseudoDataOnly,bkgPlotsOnly,pseudoDataDat:,sigFile:,seed:,intLumi:,year:,unblind,isData,batch:,queue:,fitType:,massLow:,massHigh:,binWidth: -- "$@") then # something went wrong, getopt will put out an error message for us exit 1 @@ -78,6 +86,10 @@ case $1 in --unblind) UNBLIND=1;; --batch) BATCH=$2; shift;; --queue) QUEUE=$2; shift;; +--fitType) FITTYPE=$2; shift;; +--massLow) MASSLOW=$2; shift;; +--massHigh) MASSHIGH=$2; shift;; +--binWidth) BINWIDTH=$2; shift;; (--) shift; break;; (-*) usage; echo "$0: error - unrecognized option $1" 1>&2; usage >> /dev/stderr; exit 1;; @@ -134,6 +146,7 @@ echo "--> Create fake data by fitting simulations, throwing toys and adding data echo "--> generating $INTLUMI fb^{-1} of pseudodata." echo "--------------------------------------" +# TODO: Add binWidth arg? echo " ./bin/pseudodataMaker -i $PSEUDODATADAT --pseudodata 1 --plotdir $OUTDIR/pseudoData -f $CATS --seed $SEED --intLumi $INTLUMI " ./bin/pseudodataMaker -i $PSEUDODATADAT --pseudodata 1 --plotdir $OUTDIR/pseudoData -f $CATS --seed $SEED --intLumi $INTLUMI -y $OUTDIR/pseudoData/yields_pseudodata.txt FILE=$OUTDIR/pseudoData/pseudoWS.root @@ -159,8 +172,14 @@ if [ $ISDATA == 1 ]; then OPT=" --isData 1" fi -echo " ./bin/fTest -i $FILE --saveMultiPdf $OUTDIR/CMS-HGG_multipdf_$EXT_$CATS.root -D $OUTDIR/bkgfTest$DATAEXT -f $CATS $OPT --year $YEAR --catOffset $CATOFFSET" -./bin/fTest -i $FILE --saveMultiPdf $OUTDIR/CMS-HGG_multipdf_$EXT_$CATS.root -D $OUTDIR/bkgfTest$DATAEXT -f $CATS $OPT --year $YEAR --catOffset $CATOFFSET +if [ $FITTYPE == "mgg" ]; then + echo " ./bin/fTest -i $FILE --saveMultiPdf $OUTDIR/CMS-HGG_multipdf_$EXT_$CATS.root -D $OUTDIR/bkgfTest$DATAEXT -f $CATS $OPT --year $YEAR --catOffset $CATOFFSET --massLow $MASSLOW --massHigh $MASSHIGH --fitType $FITTYPE --binWidth $BINWIDTH" + ./bin/fTest -i $FILE --saveMultiPdf $OUTDIR/CMS-HGG_multipdf_$EXT_$CATS.root -D $OUTDIR/bkgfTest$DATAEXT -f $CATS $OPT --year $YEAR --catOffset $CATOFFSET --massLow $MASSLOW --massHigh $MASSHIGH --fitType $FITTYPE --binWidth $BINWIDTH +fi +if [ $FITTYPE == "mjj" ]; then + echo " ./bin/fTest -i $FILE --saveMultiPdf $OUTDIR/CMS-HBB_multipdf_$EXT_$CATS.root -D $OUTDIR/bkgfTest$DATAEXT -f $CATS $OPT --year $YEAR --catOffset $CATOFFSET --massLow $MASSLOW --massHigh $MASSHIGH --fitType $FITTYPE --binWidth $BINWIDTH" + ./bin/fTest -i $FILE --saveMultiPdf $OUTDIR/CMS-HBB_multipdf_$EXT_$CATS.root -D $OUTDIR/bkgfTest$DATAEXT -f $CATS $OPT --year $YEAR --catOffset $CATOFFSET --massLow $MASSLOW --massHigh $MASSHIGH --fitType $FITTYPE --binWidth $BINWIDTH +fi OPT="" fi @@ -180,8 +199,14 @@ fi if [ $UNBLIND == 1 ]; then OPT=" --unblind" fi -echo "./scripts/subBkgPlots.py -b CMS-HGG_multipdf_$EXT.root -d $OUTDIR/bkgPlots$DATAEXT -S 13 --isMultiPdf --useBinnedData --doBands --massStep 1 $SIG -L 100 -H 180 -f $CATS -l $CATS --intLumi $INTLUMI $OPT --batch $BATCH -q $QUEUE --year $YEAR" -./scripts/subBkgPlots.py -b CMS-HGG_multipdf_$EXT.root -d $OUTDIR/bkgPlots$DATAEXT -S 13 --isMultiPdf --useBinnedData --doBands --massStep 1 $SIG -L 100 -H 180 -f $CATS -l $CATS --intLumi $INTLUMI $OPT --batch $BATCH -q $QUEUE --year $YEAR +if [ $FITTYPE == "mgg" ]; then # TODO: Add binWidth arg? + echo "./scripts/subBkgPlots.py -b CMS-HGG_multipdf_$EXT.root -d $OUTDIR/bkgPlots$DATAEXT -S 13 --isMultiPdf --useBinnedData --doBands --massStep 1 $SIG -L $MASSLOW -H $MASSHIGH -f $CATS -l $CATS --intLumi $INTLUMI $OPT --batch $BATCH -q $QUEUE --year $YEAR" + ./scripts/subBkgPlots.py -b CMS-HGG_multipdf_$EXT.root -d $OUTDIR/bkgPlots$DATAEXT -S 13 --isMultiPdf --useBinnedData --doBands --massStep 1 $SIG -L $MASSLOW -H $MASSHIGH -f $CATS -l $CATS --intLumi $INTLUMI $OPT --batch $BATCH -q $QUEUE --year $YEAR +fi +if [ $FITTYPE == "mjj" ]; then # TODO: Add binWidth arg? + echo "./scripts/subBkgPlots.py -b CMS-HBB_multipdf_$EXT.root -d $OUTDIR/bkgPlots$DATAEXT -S 13 --isMultiPdf --useBinnedData --doBands --massStep 1 $SIG -L $MASSLOW -H $MASSHIGH -f $CATS -l $CATS --intLumi $INTLUMI $OPT --batch $BATCH -q $QUEUE --year $YEAR" + ./scripts/subBkgPlots.py -b CMS-HBB_multipdf_$EXT.root -d $OUTDIR/bkgPlots$DATAEXT -S 13 --isMultiPdf --useBinnedData --doBands --massStep 1 $SIG -L $MASSLOW -H $MASSHIGH -f $CATS -l $CATS --intLumi $INTLUMI $OPT --batch $BATCH -q $QUEUE --year $YEAR +fi # FIX THIS FOR CONDOR: #continueLoop=1 diff --git a/Background/tools/submissionTools.py b/Background/tools/submissionTools.py index 6e2db82e..57e495d4 100644 --- a/Background/tools/submissionTools.py +++ b/Background/tools/submissionTools.py @@ -37,6 +37,187 @@ def writeCondorSub(_file,_exec,_queue,_nJobs,_jobOpts,doHoldOnFailure=True,doPer _file.write("queue %g"%_nJobs) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def writeSubFilesMgg(_opts): + if _opts['batch'] == 'condor': + writeCondorSubFilesMgg(_opts) + if (_opts['batch'] == "IC")|(_opts['batch'] == "SGE")|(_opts['batch'] == "local" ): + writeSGESubFilesMgg(_opts) + +def writeSubFilesMjj(_opts): + if _opts['batch'] == 'condor': + writeCondorSubFilesMjj(_opts) + if (_opts['batch'] == "IC")|(_opts['batch'] == "SGE")|(_opts['batch'] == "local" ): + writeSGESubFilesMjj(_opts) + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def writeCondorSubFilesMgg(_opts): + # CONDOR + _jobdir = "%s/outdir_%s/%s/jobs"%(bwd__,_opts['ext'],_opts['mode']) + _executable = "condor_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) + _f = open("%s/%s.sh"%(_jobdir,_executable),"w") # single .sh script split into separate jobs + writePreamble(_f) + + # For looping over categories + if( _opts['mode'] == "fTestParallel" ): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + co = _opts['catOffset']+cidx + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + # _cmd = "%s/runBackgroundScripts.sh -i %s -p %s -f %s --ext %s --catOffset %g --intLumi %s --year %s --batch %s --queue %s --sigFile %s --isData --fTest"%(bwd__,_opts['dataFile'],_opts['procs'],c,_opts['ext'],co,_opts['lumi'],_opts['year'],_opts['batch'],_opts['queue'],_opts['signalFitWSFile']) + _cmd = ( + f"{bwd__}/runBackgroundScripts.sh" + f" -i {_opts['dataFile']}" + f" -p {_opts['procs']}" + f" -f {c}" + f" --ext {_opts['ext']}" + f" --catOffset {co}" + f" --intLumi {_opts['lumi']}" + f" --year {_opts['year']}" + f" --batch {_opts['batch']}" + f" --queue {_opts['queue']}" + f" --sigFile {_opts['signalFitWSFile']}" + f" --massLow {_opts['mggLow']}" + f" --massHigh {_opts['mggHigh']}" + f" --binWidth {_opts['binWidth']}" + f" --fitType mgg" + f" --isData --fTest" + ) + _f.write(" %s\n"%_cmd) + _f.write("fi\n") + + # Close .sh file + _f.close() + os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) + + # Condor submission file + _fsub = open("%s/%s.sub"%(_jobdir,_executable),"w") + if( _opts['mode'] == "fTestParallel" ): + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) + _fsub.close() + + +def writeCondorSubFilesMjj(_opts): + # CONDOR + _jobdir = "%s/outdir_%s/%s/jobs"%(bwd__,_opts['ext'],_opts['mode']) + _executable = "condor_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) + _f = open("%s/%s.sh"%(_jobdir,_executable),"w") # single .sh script split into separate jobs + writePreamble(_f) + + # For looping over categories + if( _opts['mode'] == "fTestParallel" ): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + co = _opts['catOffset']+cidx + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + # _cmd = "%s/runBackgroundScripts.sh -i %s -p %s -f %s --ext %s --catOffset %g --intLumi %s --year %s --batch %s --queue %s --sigFile %s --isData --fTest"%(bwd__,_opts['dataFile'],_opts['procs'],c,_opts['ext'],co,_opts['lumi'],_opts['year'],_opts['batch'],_opts['queue'],_opts['signalFitWSFile']) + _cmd = ( + f"{bwd__}/runBackgroundScripts.sh" + f" -i {_opts['dataFile']}" + f" -p {_opts['procs']}" + f" -f {c}" + f" --ext {_opts['ext']}" + f" --catOffset {co}" + f" --intLumi {_opts['lumi']}" + f" --year {_opts['year']}" + f" --batch {_opts['batch']}" + f" --queue {_opts['queue']}" + f" --sigFile {_opts['signalFitWSFile']}" + f" --massLow {_opts['mjjLow']}" + f" --massHigh {_opts['mjjHigh']}" + f" --binWidth {_opts['binWidth']}" + f" --fitType mjj" + f" --isData --fTest" + ) + _f.write(" %s\n"%_cmd) + _f.write("fi\n") + + # Close .sh file + _f.close() + os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) + + # Condor submission file + _fsub = open("%s/%s.sub"%(_jobdir,_executable),"w") + if( _opts['mode'] == "fTestParallel" ): + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) + _fsub.close() + pass + + +def writeSGESubFilesMgg(_opts): + _jobdir = "%s/outdir_%s/%s/jobs"%(bwd__,_opts['ext'],_opts['mode']) + _executable = "sub_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) + + # Write details depending on mode + + # For separate submission file per category + if _opts['mode'] == "fTestParallel": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + co = _opts['catOffset']+cidx + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + # _cmd = "%s/runBackgroundScripts.sh -i %s -p %s -f %s --ext %s --catOffset %g --intLumi %s --year %s --batch %s --queue %s --sigFile %s --isData --fTest"%(bwd__,_opts['dataFile'],_opts['procs'],c,_opts['ext'],co,_opts['lumi'],_opts['year'],_opts['batch'],_opts['queue'],_opts['signalFitWSFile']) + _cmd = ( + f"{bwd__}/runBackgroundScripts.sh" + f" -i {_opts['dataFile']}" + f" -p {_opts['procs']}" + f" -f {c}" + f" --ext {_opts['ext']}" + f" --catOffset {co}" + f" --intLumi {_opts['lumi']}" + f" --year {_opts['year']}" + f" --batch {_opts['batch']}" + f" --queue {_opts['queue']}" + f" --sigFile {_opts['signalFitWSFile']}" + f" --massLow {_opts['mggLow']}" + f" --massHigh {_opts['mggHigh']}" + f" --binWidth {_opts['binWidth']}" + f" --fitType mgg" + f" --isData --fTest" + ) + _f.write("%s\n"%_cmd) + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + +def writeSGESubFilesMjj(_opts): + _jobdir = "%s/outdir_%s/%s/jobs"%(bwd__,_opts['ext'],_opts['mode']) + _executable = "sub_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) + + # Write details depending on mode + + # For separate submission file per category + if _opts['mode'] == "fTestParallel": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + co = _opts['catOffset']+cidx + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + # _cmd = "%s/runBackgroundScripts.sh -i %s -p %s -f %s --ext %s --catOffset %g --intLumi %s --year %s --batch %s --queue %s --sigFile %s --isData --fTest"%(bwd__,_opts['dataFile'],_opts['procs'],c,_opts['ext'],co,_opts['lumi'],_opts['year'],_opts['batch'],_opts['queue'],_opts['signalFitWSFile']) + _cmd = ( + f"{bwd__}/runBackgroundScripts.sh" + f" -i {_opts['dataFile']}" + f" -p {_opts['procs']}" + f" -f {c}" + f" --ext {_opts['ext']}" + f" --catOffset {co}" + f" --intLumi {_opts['lumi']}" + f" --year {_opts['year']}" + f" --batch {_opts['batch']}" + f" --queue {_opts['queue']}" + f" --sigFile {_opts['signalFitWSFile']}" + f" --massLow {_opts['mjjLow']}" + f" --massHigh {_opts['mjjHigh']}" + f" --binWidth {_opts['binWidth']}" + f" --fitType mjj" + f" --isData --fTest" + ) + _f.write("%s\n"%_cmd) + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def writeSubFiles(_opts): # Make directory to store sub files if not os.path.isdir("%s/outdir_%s"%(bwd__,_opts['ext'])): os.system("mkdir %s/outdir_%s"%(bwd__,_opts['ext'])) @@ -47,56 +228,62 @@ def writeSubFiles(_opts): # Remove current job files if len(glob.glob("%s/*"%_jobdir)): os.system("rm %s/*"%_jobdir) + # Mgg/Mjj logic + if _opts['fitType'] == "mgg" or _opts['fitType'] == "2D": + writeSubFilesMgg(_opts) + if _opts['fitType'] == "mjj" or _opts['fitType'] == "2D": + writeSubFilesMjj(_opts) + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Function for submitting files to batch system +def submitFilesMgg(_opts): + _jobdir = "%s/outdir_%s/%s/jobs"%(bwd__,_opts['ext'],_opts['mode']) # CONDOR if _opts['batch'] == "condor": - _executable = "condor_%s_%s"%(_opts['mode'],_opts['ext']) - _f = open("%s/%s.sh"%(_jobdir,_executable),"w") # single .sh script split into separate jobs - writePreamble(_f) + _executable = "condor_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) + if os.environ['PWD'].startswith("/eos"): + cmdLine = "cd %s; condor_submit -spool %s.sub; cd %s"%(_jobdir,_executable,bwd__) + else: + cmdLine = "cd %s; condor_submit %s.sub; cd %s"%(_jobdir,_executable,bwd__) + run(cmdLine) + print(" --> Finished submitting files") + + # SGE + elif _opts['batch'] in ['IC','SGE']: + _executable = "sub_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) + + # Extract job opts + jobOptsStr = _opts['jobOpts'] - # For looping over categories + # Separate submission per category if( _opts['mode'] == "fTestParallel" ): for cidx in range(_opts['nCats']): c = _opts['cats'].split(",")[cidx] - co = _opts['catOffset']+cidx - _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _cmd = "%s/runBackgroundScripts.sh -i %s -p %s -f %s --ext %s --catOffset %g --intLumi %s --year %s --batch %s --queue %s --sigFile %s --isData --fTest"%(bwd__,_opts['dataFile'],_opts['procs'],c,_opts['ext'],co,_opts['lumi'],_opts['year'],_opts['batch'],_opts['queue'],_opts['signalFitWSFile']) - _f.write(" %s\n"%_cmd) - _f.write("fi\n") - - # Close .sh file - _f.close() - os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) - - # Condor submission file - _fsub = open("%s/%s.sub"%(_jobdir,_executable),"w") - if( _opts['mode'] == "fTestParallel" ): writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) - _fsub.close() - - # SGE... - if (_opts['batch'] == "IC")|(_opts['batch'] == "SGE")|(_opts['batch'] == "local" ): - _executable = "sub_%s_%s"%(_opts['mode'],_opts['ext']) - - # Write details depending on mode + _subfile = "%s/%s_%s"%(_jobdir,_executable,c) + cmdLine = "qsub -q hep.q %s -o %s.log -e %s.err %s.sh"%(jobOptsStr,_subfile,_subfile,_subfile) + run(cmdLine) + print(" --> Finished submitting files") + + # Running locally + elif _opts['batch'] == 'local': + _executable = "sub_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) - # For separate submission file per category - if _opts['mode'] == "fTestParallel": + # Separate submission per category + if( _opts['mode'] == "fTestParallel" ): for cidx in range(_opts['nCats']): c = _opts['cats'].split(",")[cidx] - co = _opts['catOffset']+cidx - _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") - writePreamble(_f) - _cmd = "%s/runBackgroundScripts.sh -i %s -p %s -f %s --ext %s --catOffset %g --intLumi %s --year %s --batch %s --queue %s --sigFile %s --isData --fTest"%(bwd__,_opts['dataFile'],_opts['procs'],c,_opts['ext'],co,_opts['lumi'],_opts['year'],_opts['batch'],_opts['queue'],_opts['signalFitWSFile']) - _f.write("%s\n"%_cmd) - _f.close() - os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) - -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Function for submitting files to batch system -def submitFiles(_opts): + _subfile = "%s/%s_%s"%(_jobdir,_executable,c) + cmdLine = "bash %s.sh"%_subfile + run(cmdLine) + print(" --> Finished running files") + + +def submitFilesMjj(_opts): _jobdir = "%s/outdir_%s/%s/jobs"%(bwd__,_opts['ext'],_opts['mode']) # CONDOR if _opts['batch'] == "condor": - _executable = "condor_%s_%s"%(_opts['mode'],_opts['ext']) + _executable = "condor_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) if os.environ['PWD'].startswith("/eos"): cmdLine = "cd %s; condor_submit -spool %s.sub; cd %s"%(_jobdir,_executable,bwd__) else: @@ -106,7 +293,7 @@ def submitFiles(_opts): # SGE elif _opts['batch'] in ['IC','SGE']: - _executable = "sub_%s_%s"%(_opts['mode'],_opts['ext']) + _executable = "sub_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) # Extract job opts jobOptsStr = _opts['jobOpts'] @@ -122,7 +309,7 @@ def submitFiles(_opts): # Running locally elif _opts['batch'] == 'local': - _executable = "sub_%s_%s"%(_opts['mode'],_opts['ext']) + _executable = "sub_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) # Separate submission per category if( _opts['mode'] == "fTestParallel" ): @@ -133,4 +320,10 @@ def submitFiles(_opts): run(cmdLine) print(" --> Finished running files") + +def submitFiles(_opts): + if _opts['fitType'] == "mgg" or _opts['fitType'] == "2D": + submitFilesMgg(_opts) + if _opts['fitType'] == "mjj" or _opts['fitType'] == "2D": + submitFilesMjj(_opts) diff --git a/Signal/RunSignalScripts.py b/Signal/RunSignalScripts.py index 3dfea735..65eb41c2 100644 --- a/Signal/RunSignalScripts.py +++ b/Signal/RunSignalScripts.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 # Script for submitting signal fitting jobs for finalfitslite -import os, sys +import os +import sys from optparse import OptionParser from collections import OrderedDict as od @@ -9,6 +10,7 @@ from commonTools import * from commonObjects import * from tools.submissionTools import * +from CollectModels import collect_models, create_empty_json def get_options(): parser = OptionParser() @@ -19,6 +21,12 @@ def get_options(): parser.add_option('--jobOpts', dest='jobOpts', default='', help="Additional options to add to job submission. For Condor separate individual options with a colon (specify all within quotes e.g. \"option_xyz = abc+option_123 = 456\")") parser.add_option('--groupSignalFitJobsByCat', dest='groupSignalFitJobsByCat', default=False, action="store_true", help="Option to group signalFit jobs by category") parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print submission files only") + parser.add_option('--fitType', dest='fitType', default='mgg', help="Fit type: mgg, mjj or 2D. (default: mgg)") + parser.add_option('--mggLow', dest='mggLow', default=100, type='int', help="Lower mgg fit range (default: 100)") + parser.add_option('--mggHigh', dest='mggHigh', default=180, type='int', help="Upper mgg fit range (default: 180)") + parser.add_option('--mjjLow', dest='mjjLow', default=80, type='int', help="Lower mjj fit range (default: 80)") + parser.add_option('--mjjHigh', dest='mjjHigh', default=190, type='int', help="Upper mjj fit range (default: 190)") + parser.add_option('--noClean', dest='noClean', default=False, action="store_true", help="Do not clean up old ROOT files and plots. (default: False)") return parser.parse_args() (opt,args) = get_options() @@ -58,6 +66,12 @@ def leave(): options['jobOpts'] = opt.jobOpts options['groupSignalFitJobsByCat'] = opt.groupSignalFitJobsByCat options['printOnly'] = opt.printOnly + options['fitType'] = opt.fitType + options['mggLow'] = opt.mggLow + options['mggHigh'] = opt.mggHigh + options['mjjLow'] = opt.mjjLow + options['mjjHigh'] = opt.mjjHigh + options['noClean'] = opt.noClean #Delete copy of file os.system("rm config.py") @@ -106,6 +120,7 @@ def leave(): print(" --> Extension: %s"%options['ext']) print(" --> Analysis: %s"%options['analysis']) print(" --> Year: %s ::: Corresponds to intLumi = %.2f fb^-1"%(options['year'],lumiMap[options['year']])) +print(" --> Fit type: %s"%options['fitType']) if options['mode'] in ['calcPhotonSyst']: print(" --> Photon shape systematics:") print(" * scales = %s"%options['scales']) @@ -132,19 +147,96 @@ def leave(): elif options['mode'] == "packageOnly": print(" --> Packaging signal fits (one file per category)...") print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Remove existing plots +if options["fitType"] == "2D": + plot_fittype_prefix = "M*" +elif options["fitType"] == "mgg": + plot_fittype_prefix = "Mgg*" +elif options["fitType"] == "mjj": + plot_fittype_prefix = "Mjj*" +else: + print(f" --> Invalid fitType {options['fitType']}. Exiting to avoid accidental deletion.") + sys.exit(1) + +if options["mode"] == "fTest" and not options["noClean"]: + # Signal fTest + _plotdir = f"{swd__}/outdir_{options['ext']}/fTest/Plots" + pattern = f"{_plotdir}/{plot_fittype_prefix}" + files = glob.glob(pattern) + if len(files) == 0: + print(" --> No existing plots found") + else: + print(f" --> Archiving {len(files)} existing plots") + if not os.path.isdir(f"{_plotdir}/archive"): + os.system(f"mkdir -p {_plotdir}/archive") + for f in files: + os.system(f"mv {f} {_plotdir}/archive") +elif options["mode"] == "signalFit" and not options["noClean"]: + # Signal Fit + _plotdir = f"{swd__}/outdir_{options['ext']}/signalFit/Plots" + pattern = f"{_plotdir}/{plot_fittype_prefix}" + files = glob.glob(pattern) + if len(files) == 0: + print(" --> No existing plots found") + else: + print(f" --> Archiving {len(files)} existing plots") + if not os.path.isdir(f"{_plotdir}/archive"): + os.system(f"mkdir -p {_plotdir}/archive") + for f in files: + print(f" --> Archiving {f.split('/')[-1]}") + os.system(f"mv {f} {_plotdir}/archive") +elif not options["noClean"]: + print(f" --> Invalid mode {options['mode']}. Exiting to avoid accidental deletion.") + sys.exit(1) + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Remove old models.json file +_json_file = "%s/outdir_%s/signalFit/output/models.json"%(swd__,options['ext']) +if options["mode"] == "signalFit" and not options['noClean']: + if os.path.exists(_json_file): + print(" --> Removing old models.json file") + os.remove(_json_file) + else: + print(" --> No old models.json file to remove") + +if options['mode'] == "signalFit" and not os.path.exists(_json_file): + print(" --> Creating empty models.json file") + create_empty_json(_json_file) + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Make directory to store job scripts and output if not os.path.isdir("%s/outdir_%s"%(swd__,options['ext'])): os.system("mkdir %s/outdir_%s"%(swd__,options['ext'])) # Write submission files: style depends on batch system writeSubFiles(options) +# if options['fitType'] == "mgg" or options['fitType'] == "2D": +# writeSubFilesMgg(options) +# if options['fitType'] == "mjj" or options['fitType'] == "2D": +# writeSubFilesMjj(options) print(" --> Finished writing submission scripts") # Submit scripts to batch system if not options['printOnly']: submitFiles(options) + # if options["fitType"] == "mgg" or options['fitType'] == "2D": + # submitFilesMgg(options) + # if options["fitType"] == "mjj" or options['fitType'] == "2D": + # submitFilesMjj(options) else: print(" --> Running with printOnly option. Will not submit scripts") # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# 2D Model Construction +if options['fitType'] == "2D": + print(" --> Running 2D model construction") + output_path_base = "%s/outdir_%s/signalFit/output"%(swd__,options['ext']) + if options['mode'] == "fTestParallel": + collect_models( + json_file=f"{output_path_base}/models.json", + output_path=f"{output_path_base}/CMS-2D_sigfit_{options['ext']}_%PROC_%YEAR_%CAT.root", + ws_type="signal", + no_clear=options['noClean'], + ) + leave() diff --git a/Signal/scripts/fTest.py b/Signal/scripts/fTest.py index 06e67dfb..82380cd4 100644 --- a/Signal/scripts/fTest.py +++ b/Signal/scripts/fTest.py @@ -6,7 +6,8 @@ import pandas as pd import pickle import math -import os, sys +import os +import sys import json from optparse import OptionParser import glob @@ -39,6 +40,9 @@ def get_options(): parser.add_option('--threshold', dest='threshold', default=30, type='int', help="Threshold number of events") parser.add_option('--nGaussMax', dest='nGaussMax', default=5, type='int', help="Max number of gaussians to test") parser.add_option('--skipWV', dest='skipWV', default=False, action="store_true", help="Skip processing of WV case") + parser.add_option('--weightName', dest='weightName', default='weight', help="Weight name to apply to dataset (default: weight)") + parser.add_option('--mggLow', dest='mggLow', default=100, type='int', help="Lower mgg fit range (default: 100)") + parser.add_option('--mggHigh', dest='mggHigh', default=180, type='int', help="Upper mgg fit range (default: 180)") # Minimizer options parser.add_option('--minimizerMethod', dest='minimizerMethod', default='TNC', help="(Scipy) Minimizer method") parser.add_option('--minimizerTolerance', dest='minimizerTolerance', default=1e-8, type='float', help="(Scipy) Minimizer toleranve") @@ -101,7 +105,25 @@ def get_options(): min_reduced_chi2, nGauss_opt = 999, 1 for nGauss in range(1,opt.nGaussMax+1): k = "nGauss_%g"%nGauss - ssf = SimultaneousFit("fTest_RV_%g"%nGauss,proc,opt.cat,datasets_RV,xvar.Clone(),MH,MHLow,MHHigh,opt.mass,opt.nBins,0,opt.minimizerMethod,opt.minimizerTolerance,verbose=False) + ssf = SimultaneousFit( + "fTest_Mgg_RV_%g" % nGauss, + proc, + opt.cat, + datasets_RV, + xvar.Clone(), + MH, + MHLow, + MHHigh, + opt.mass, + opt.nBins, + 0, + opt.minimizerMethod, + opt.minimizerTolerance, + xhigh=opt.mggHigh, + xlow=opt.mggLow, + fitType="mgg", + verbose=False + ) ssf.buildNGaussians(nGauss) ssf.runFit() ssf.buildSplines() @@ -115,8 +137,26 @@ def get_options(): df.loc[df['proc']==proc,'nRV'] = nGauss_opt # Make plots if( opt.doPlots )&( len(ssfs.keys())!=0 ): - plotFTest(ssfs,_opt=nGauss_opt,_outdir="%s/outdir_%s/fTest/Plots"%(swd__,opt.ext),_extension="RV",_proc=proc,_cat=opt.cat,_mass=opt.mass) - plotFTestResults(ssfs,_opt=nGauss_opt,_outdir="%s/outdir_%s/fTest/Plots"%(swd__,opt.ext),_extension="RV",_proc=proc,_cat=opt.cat,_mass=opt.mass) + plotFTest( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots"%(swd__,opt.ext), + _extension="RV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mgg", + ) + plotFTestResults( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots" % (swd__, opt.ext), + _extension="RV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mgg", + ) # Run fTest: WV # If numEntries below threshold then keep as n = 1 @@ -126,7 +166,25 @@ def get_options(): min_reduced_chi2, nGauss_opt = 999, 1 for nGauss in range(1,opt.nGaussMax+1): k = "nGauss_%g"%nGauss - ssf = SimultaneousFit("fTest_WV_%g"%nGauss,proc,opt.cat,datasets_WV,xvar.Clone(),MH,MHLow,MHHigh,opt.mass,opt.nBins,0,opt.minimizerMethod,opt.minimizerTolerance,verbose=False) + ssf = SimultaneousFit( + "fTest_Mgg_WV_%g" % nGauss, + proc, + opt.cat, + datasets_WV, + xvar.Clone(), + MH, + MHLow, + MHHigh, + opt.mass, + opt.nBins, + 0, + opt.minimizerMethod, + opt.minimizerTolerance, + xhigh=opt.mggHigh, + xlow=opt.mggLow, + fitType="mgg", + verbose=False + ) ssf.buildNGaussians(nGauss) ssf.runFit() ssf.buildSplines() @@ -140,8 +198,26 @@ def get_options(): df.loc[df['proc']==proc,'nWV'] = nGauss_opt # Make plots if( opt.doPlots )&( len(ssfs.keys())!=0 ): - plotFTest(ssfs,_opt=nGauss_opt,_outdir="%s/outdir_%s/fTest/Plots"%(swd__,opt.ext),_extension="WV",_proc=proc,_cat=opt.cat,_mass=opt.mass) - plotFTestResults(ssfs,_opt=nGauss_opt,_outdir="%s/outdir_%s/fTest/Plots"%(swd__,opt.ext),_extension="WV",_proc=proc,_cat=opt.cat,_mass=opt.mass) + plotFTest( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots" % (swd__, opt.ext), + _extension="WV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mgg", + ) + plotFTestResults( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots" % (swd__, opt.ext), + _extension="WV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mgg", + ) # Close ROOT file inputWS.Delete() @@ -149,7 +225,7 @@ def get_options(): # Make output if not os.path.isdir("%s/outdir_%s/fTest/json"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/fTest/json"%(swd__,opt.ext)) -ff = open("%s/outdir_%s/fTest/json/nGauss_%s.json"%(swd__,opt.ext,opt.cat),"w") +ff = open("%s/outdir_%s/fTest/json/nGauss_Mgg_%s.json"%(swd__,opt.ext,opt.cat),"w") ff.write("{\n") # Iterate over rows in dataframe: sorted by sumEntries pitr = 1 diff --git a/Signal/scripts/fTestMjj.py b/Signal/scripts/fTestMjj.py new file mode 100644 index 00000000..de2e1a09 --- /dev/null +++ b/Signal/scripts/fTestMjj.py @@ -0,0 +1,250 @@ +# Python script to perform signal modelling fTest: extract number of gaussians for final fit +# * run per category over single mass point + +print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FTEST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +import ROOT +import pandas as pd +import pickle +import math +import os +import sys +import json +from optparse import OptionParser +import glob +import re +from collections import OrderedDict as od + +from commonTools import * +from commonObjects import * +from signalTools import * +from simultaneousFit import * +from plottingTools import * + +MHLow, MHHigh = '120', '130' + +def leave(): + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FTEST (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + exit(0) + +def get_options(): + parser = OptionParser() + parser.add_option("--xvar", dest='xvar', default='CMS_hgg_mass', help="Observable to fit") + parser.add_option("--inputWSDir", dest='inputWSDir', default='', help="Input flashgg WS directory") + parser.add_option("--ext", dest='ext', default='', help="Extension") + parser.add_option("--procs", dest='procs', default='', help="Signal processes") + parser.add_option("--nProcsToFTest", dest='nProcsToFTest', default=5, type='int',help="Number of signal processes to fTest (ordered by sum entries), others are set to nRV=1,nWV=1. Set to -1 to run over all") + parser.add_option("--cat", dest='cat', default='', help="RECO category") + parser.add_option('--mass', dest='mass', default='125', help="Mass point to fit") + parser.add_option('--doPlots', dest='doPlots', default=False, action="store_true", help="Produce Signal fTest plots") + parser.add_option('--nBins', dest='nBins', default=80, type='int', help="Number of bins for fit") + parser.add_option('--threshold', dest='threshold', default=30, type='int', help="Threshold number of events") + parser.add_option('--nGaussMax', dest='nGaussMax', default=5, type='int', help="Max number of gaussians to test") + parser.add_option('--skipWV', dest='skipWV', default=False, action="store_true", help="Skip processing of WV case") + parser.add_option('--weightName', dest='weightName', default='weight', help="Weight name to apply to dataset (default: weight)") + parser.add_option('--mjjLow', dest='mjjLow', default=80, type='int', help="Lower mjj fit range (default: 80)") + parser.add_option('--mjjHigh', dest='mjjHigh', default=190, type='int', help="Upper mjj fit range (default: 190)") + # Minimizer options + parser.add_option('--minimizerMethod', dest='minimizerMethod', default='TNC', help="(Scipy) Minimizer method") + parser.add_option('--minimizerTolerance', dest='minimizerTolerance', default=1e-8, type='float', help="(Scipy) Minimizer toleranve") + return parser.parse_args() +(opt,args) = get_options() + +ROOT.gStyle.SetOptStat(0) +ROOT.gROOT.SetBatch(True) +if opt.doPlots: + if not os.path.isdir("%s/outdir_%s/fTest/Plots"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/fTest/Plots"%(swd__,opt.ext)) + +# Load xvar to fit +nominalWSFileName = glob.glob("%s/output*"%(opt.inputWSDir))[0] +f0 = ROOT.TFile(nominalWSFileName,"read") +inputWS0 = f0.Get(inputWSName__) +xvar = inputWS0.var(opt.xvar) +xvarFit = xvar.Clone() +dZ = inputWS0.var("dZ") +aset = ROOT.RooArgSet(xvar,dZ) +f0.Close() + +# Create MH var +MH = ROOT.RooRealVar("MH","m_{H}", int(MHLow), int(MHHigh)) +MH.setUnit("GeV") +MH.setConstant(True) + +# Loop over processes: extract sum entries and fill dict. Default nRV,nWV = 1,1 +df = pd.DataFrame(columns=['proc','sumEntries','nRV','nWV']) +procYields = od() +for proc in opt.procs.split(","): + WSFileName = glob.glob("%s/output*M%s*%s.root"%(opt.inputWSDir,opt.mass,proc))[0] + f = ROOT.TFile(WSFileName,"read") + inputWS = f.Get(inputWSName__) + d = reduceDataset(inputWS.data("%s_%s_%s_%s"%(procToData(proc.split("_")[0]),opt.mass,sqrts__,opt.cat)),aset) + df.loc[len(df)] = [proc,d.sumEntries(),1,1] + inputWS.Delete() + f.Close() + +# Extract processes to perform fTest (i.e. first nProcsToFTest): +if( opt.nProcsToFTest == -1)|( opt.nProcsToFTest > len(opt.procs.split(",")) ): procsToFTest = opt.procs.split(",") +else: procsToFTest = list(df.sort_values('sumEntries',ascending=False)[0:opt.nProcsToFTest].proc.values) +for pidx, proc in enumerate(procsToFTest): + + print("\n --> Process (%g): %s"%(pidx,proc)) + + # Split dataset to RV/WV: ssf requires input as dict (with mass point as key) + datasets_RV, datasets_WV = od(), od() + WSFileName = glob.glob("%s/output*M%s*%s.root"%(opt.inputWSDir,opt.mass,proc))[0] + f = ROOT.TFile(WSFileName,"read") + inputWS = f.Get(inputWSName__) + d = reduceDataset(inputWS.data("%s_%s_%s_%s"%(procToData(proc.split("_")[0]),opt.mass,sqrts__,opt.cat)),aset) + datasets_RV[opt.mass] = splitRVWV(d,aset,mode="RV") + datasets_WV[opt.mass] = splitRVWV(d,aset,mode="WV") + + # Run fTest: RV + # If numEntries below threshold then keep as n = 1 + if datasets_RV[opt.mass].numEntries() < opt.threshold: continue + else: + ssfs = od() + min_reduced_chi2, nGauss_opt = 999, 1 + for nGauss in range(1,opt.nGaussMax+1): + k = "nGauss_%g"%nGauss + ssf = SimultaneousFit( + "fTest_Mjj_RV_%g" % nGauss, + proc, + opt.cat, + datasets_RV, + xvar.Clone(), + MH, + MHLow, + MHHigh, + opt.mass, + opt.nBins, + 0, + opt.minimizerMethod, + opt.minimizerTolerance, + xhigh=opt.mjjHigh, + xlow=opt.mjjLow, + fitType="mjj", + verbose=False + ) + ssf.buildNGaussians(nGauss) + ssf.runFit() + ssf.buildSplines() + if ssf.Ndof >= 1: + ssfs[k] = ssf + if ssfs[k].getReducedChi2() < min_reduced_chi2: + min_reduced_chi2 = ssfs[k].getReducedChi2() + nGauss_opt = nGauss + print(" * (%s,%s,RV): nGauss = %g, chi^2/n(dof) = %.4f"%(proc,opt.cat,nGauss,ssfs[k].getReducedChi2())) + funcname = "gauss" # change if adding other functions + # Set optimum + df.loc[df['proc']==proc,'nRV'] = nGauss_opt + # Make plots + if( opt.doPlots )&( len(ssfs.keys())!=0 ): + plotFTest( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots"%(swd__,opt.ext), + _extension="RV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mjj", + _xaxislabel="m_{jj} [GeV]", + _xrange=[opt.mjjLow,opt.mjjHigh], + ) + plotFTestResults( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots" % (swd__, opt.ext), + _extension="RV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mjj", + _funcname=funcname, + ) + + # Run fTest: WV + # If numEntries below threshold then keep as n = 1 + if( datasets_WV[opt.mass].numEntries() < opt.threshold )|( opt.skipWV ): continue + else: + ssfs = od() + min_reduced_chi2, nGauss_opt = 999, 1 + for nGauss in range(1,opt.nGaussMax+1): + k = "nGauss_%g"%nGauss + ssf = SimultaneousFit( + "fTest_Mjj_WV_%g" % nGauss, + proc, + opt.cat, + datasets_WV, + xvar.Clone(), + MH, + MHLow, + MHHigh, + opt.mass, + opt.nBins, + 0, + opt.minimizerMethod, + opt.minimizerTolerance, + xhigh=opt.mjjHigh, + xlow=opt.mjjLow, + fitType="mjj", + verbose=False + ) + ssf.buildNGaussians(nGauss) + ssf.runFit() + ssf.buildSplines() + if ssf.Ndof >= 1: + ssfs[k] = ssf + if ssfs[k].getReducedChi2() < min_reduced_chi2: + min_reduced_chi2 = ssfs[k].getReducedChi2() + nGauss_opt = nGauss + print(" * (%s,%s,WV): nGauss = %g, chi^2/n(dof) = %.4f"%(proc,opt.cat,nGauss,ssfs[k].getReducedChi2())) + funcname = "gauss" # change if adding other functions + # Set optimum + df.loc[df['proc']==proc,'nWV'] = nGauss_opt + # Make plots + if( opt.doPlots )&( len(ssfs.keys())!=0 ): + plotFTest( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots" % (swd__, opt.ext), + _extension="WV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mjj", + _xaxislabel="m_{jj} [GeV]", + _xrange=[opt.mjjLow,opt.mjjHigh], + ) + plotFTestResults( + ssfs, + _opt=nGauss_opt, + _outdir="%s/outdir_%s/fTest/Plots" % (swd__, opt.ext), + _extension="WV", + _proc=proc, + _cat=opt.cat, + _mass=opt.mass, + _fnameprefix="Mjj", + _funcname=funcname, + ) + + # Close ROOT file + inputWS.Delete() + f.Close() + +json_func_shortname = "nGauss" # change if adding other functions + +# Make output +if not os.path.isdir("%s/outdir_%s/fTest/json"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/fTest/json"%(swd__,opt.ext)) +ff = open("%s/outdir_%s/fTest/json/%s_Mjj_%s.json"%(swd__,opt.ext,json_func_shortname,opt.cat),"w") +ff.write("{\n") +# Iterate over rows in dataframe: sorted by sumEntries +pitr = 1 +for ir,r in df.sort_values('sumEntries',ascending=False).iterrows(): + k = "\"%s__%s\""%(r['proc'],opt.cat) + ff.write(" %-90s : {\"nRV\":%s,\"nWV\":%s}"%(k,r['nRV'],r['nWV'])) + # Drop comma for last proc + if pitr == len(df): ff.write("\n") + else: ff.write(",\n") + pitr += 1 +ff.write("}") +ff.close() diff --git a/Signal/tools/plottingTools.py b/Signal/tools/plottingTools.py index 579845b1..ccad7e37 100644 --- a/Signal/tools/plottingTools.py +++ b/Signal/tools/plottingTools.py @@ -67,7 +67,19 @@ def getEffSigma(_h): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Ftest: plots # Plot possible nGauss fits and chi2 values -def plotFTest(ssfs,_opt=1,_outdir='./',_extension='',_proc='',_cat='',_mass='125'): +def plotFTest( + ssfs, + _opt=1, + _outdir='./', + _extension='', + _proc='', + _cat='', + _mass='125', + _fnameprefix='', # Prefix for output file name + _xaxislabel='m_{#gamma#gamma} [GeV]', # Label for x-axis + _xrange=[115,140], # Range for x-axis + _funcname="gauss", # Short name of the order-varied function to display on plots +): canv = ROOT.TCanvas() canv.SetLeftMargin(0.15) LineColorMap = {'1':ROOT.kAzure+1,'2':ROOT.kRed-4,'3':ROOT.kGreen+2,'4':ROOT.kMagenta-9,'5':ROOT.kOrange} @@ -82,11 +94,11 @@ def plotFTest(ssfs,_opt=1,_outdir='./',_extension='',_proc='',_cat='',_mass='125 else: hists[k].SetLineWidth(1) hists[k].SetLineColor(LineColorMap[k.split("_")[-1]]) hists[k].SetTitle("") - hists[k].GetXaxis().SetTitle("m_{#gamma#gamma} [GeV]") + hists[k].GetXaxis().SetTitle(_xaxislabel) hists[k].SetMinimum(0) if hists[k].GetMaximum()>hmax: hmax = hists[k].GetMaximum() if hists[k].GetMinimum()hmax: hmax = hists['data'].GetMaximum() if hists['data'].GetMinimum() 0 and not _fnameprefix.endswith("_"): + _fnameprefix += "_" canv.Update() - canv.SaveAs("%s/fTest_%s_%s_%s.png"%(_outdir,_cat,_proc,_extension)) - canv.SaveAs("%s/fTest_%s_%s_%s.pdf"%(_outdir,_cat,_proc,_extension)) + canv.SaveAs("%s/%sfTest_%s_%s_%s.png"%(_outdir,_fnameprefix,_cat,_proc,_extension)) + canv.SaveAs("%s/%sfTest_%s_%s_%s.pdf"%(_outdir,_fnameprefix,_cat,_proc,_extension)) # Plot reduced chi2 vs nGauss -def plotFTestResults(ssfs,_opt,_outdir="./",_extension='',_proc='',_cat='',_mass='125'): +def plotFTestResults( + ssfs, + _opt, + _outdir="./", + _extension='', + _proc='', + _cat='', + _mass='125', + _fnameprefix='', # Prefix for output file name + _funcname="gauss", # Short name of the order-varied function to display on plots + +): canv = ROOT.TCanvas() gr = ROOT.TGraph() # Loop over nGuassians @@ -151,7 +176,7 @@ def plotFTestResults(ssfs,_opt,_outdir="./",_extension='',_proc='',_cat='',_mass # Draw axes haxes = ROOT.TH1F("h_axes_%s_%s"%(_proc,_extension),"h_axes_%s_%s"%(_proc,_extension),xmax+1,0,xmax+1) haxes.SetTitle("") - haxes.GetXaxis().SetTitle("N_{gauss}") + haxes.GetXaxis().SetTitle("N_{%s}"%(_funcname)) haxes.GetXaxis().SetTitleSize(0.05) haxes.GetXaxis().SetTitleOffset(0.85) haxes.GetXaxis().SetLabelSize(0.035) @@ -175,15 +200,34 @@ def plotFTestResults(ssfs,_opt,_outdir="./",_extension='',_proc='',_cat='',_mass lat.SetNDC() lat.SetTextSize(0.03) lat.DrawLatex(0.9,0.92,"( %s , %s , %s )"%(_extension,_proc,_cat)) - lat.DrawLatex(0.6,0.75,"Optimum N_{gauss} = %s"%_opt) + lat.DrawLatex(0.6,0.75,"Optimum N_{%s} = %s"%(_funcname,_opt)) + + if len(_fnameprefix) > 0 and not _fnameprefix.endswith("_"): + _fnameprefix += "_" + + funcname_ncap = "n" + for i, c in enumerate(_funcname): + if i == 0: + funcname_ncap += c.upper() + else: + funcname_ncap += c canv.Update() - canv.SaveAs("%s/fTest_%s_%s_%s_chi2_vs_nGauss.png"%(_outdir,_cat,_proc,_extension)) - canv.SaveAs("%s/fTest_%s_%s_%s_chi2_vs_nGauss.pdf"%(_outdir,_cat,_proc,_extension)) + canv.SaveAs("%s/%sfTest_%s_%s_%s_chi2_vs_%s.png"%(_outdir,_fnameprefix,_cat,_proc,_extension,funcname_ncap)) + canv.SaveAs("%s/%sfTest_%s_%s_%s_chi2_vs_%s.pdf"%(_outdir,_fnameprefix,_cat,_proc,_extension,funcname_ncap)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Signal fit plots # Plot final pdf at MH = 125 (with data) + individual Pdf components -def plotPdfComponents(ssf,_outdir='./',_extension='',_proc='',_cat=''): +def plotPdfComponents( + ssf, + _outdir='./', + _extension='', + _proc='', + _cat='', + _fnameprefix='', # Prefix for output file name + _xaxislabel='m_{#gamma#gamma} [GeV]', # Label for x-axis + _xrange=[100,150], # Range for x-axis +): canv = ROOT.TCanvas() canv.SetLeftMargin(0.15) ssf.MH.setVal(125) @@ -196,20 +240,20 @@ def plotPdfComponents(ssf,_outdir='./',_extension='',_proc='',_cat=''): hists['final'].SetLineWidth(2) hists['final'].SetLineColor(1) hists['final'].SetTitle("") - hists['final'].GetXaxis().SetTitle("m_{#gamma#gamma} [GeV]") + hists['final'].GetXaxis().SetTitle(_xaxislabel) hists['final'].SetMinimum(0) if hists['final'].GetMaximum()>hmax: hmax = hists['final'].GetMaximum() if hists['final'].GetMinimum() 0 and not _fnameprefix.endswith("_"): + _fnameprefix += "_" canv.Update() - canv.SaveAs("%s/%sshape_pdf_components_%s_%s.png"%(_outdir,_extension,_proc,_cat)) - canv.SaveAs("%s/%sshape_pdf_components_%s_%s.pdf"%(_outdir,_extension,_proc,_cat)) + canv.SaveAs("%s/%s%sshape_pdf_components_%s_%s.png"%(_outdir,_fnameprefix,_extension,_proc,_cat)) + canv.SaveAs("%s/%s%sshape_pdf_components_%s_%s.pdf"%(_outdir,_fnameprefix,_extension,_proc,_cat)) # Plot final pdf for each mass point -def plotInterpolation(_finalModel,_outdir='./',_massPoints='120,121,122,123,124,125,126,127,128,129,130'): +def plotInterpolation( + _finalModel, + _outdir='./', + _massPoints='120,121,122,123,124,125,126,127,128,129,130', + _fnameprefix='', # Prefix for output file name + _xaxislabel='m_{#gamma#gamma} [GeV]', # Label for x-axis + _xrange=[100,150], # Range for x-axis +): canv = ROOT.TCanvas() colors = [ROOT.kRed,ROOT.kCyan,ROOT.kBlue+1,ROOT.kOrange-3,ROOT.kMagenta-7,ROOT.kGreen+1,ROOT.kYellow-7,ROOT.kViolet+6,ROOT.kTeal+1,ROOT.kPink+1,ROOT.kAzure+1] @@ -315,11 +368,11 @@ def plotInterpolation(_finalModel,_outdir='./',_massPoints='120,121,122,123,124, # Extract first hist and clone for axes haxes = hists[list(hists.keys())[0]].Clone() - haxes.GetXaxis().SetTitle("m_{#gamma#gamma} [GeV]") + haxes.GetXaxis().SetTitle(_xaxislabel) haxes.GetYaxis().SetTitle("Events / %.2f GeV"%((_finalModel.xvar.getMax()-_finalModel.xvar.getMin())/_finalModel.xvar.getBins())) haxes.SetMinimum(0) haxes.SetMaximum(hmax*1.2) - haxes.GetXaxis().SetRangeUser(100,150) + haxes.GetXaxis().SetRangeUser(_xrange[0],_xrange[1]) haxes.Draw("AXIS") # Draw rest of histograms @@ -337,9 +390,11 @@ def plotInterpolation(_finalModel,_outdir='./',_massPoints='120,121,122,123,124, lat.DrawLatex(0.9,0.92,"%s"%(_finalModel.name)) + if len(_fnameprefix) > 0 and not _fnameprefix.endswith("_"): + _fnameprefix += "_" canv.Update() - canv.SaveAs("%s/%s_model_vs_mH.png"%(_outdir,_finalModel.name)) - canv.SaveAs("%s/%s_model_vs_mH.pdf"%(_outdir,_finalModel.name)) + canv.SaveAs("%s/%s%s_model_vs_mH.png"%(_outdir,_fnameprefix,_finalModel.name)) + canv.SaveAs("%s/%s%s_model_vs_mH.pdf"%(_outdir,_fnameprefix,_finalModel.name)) @@ -347,7 +402,14 @@ def plotInterpolation(_finalModel,_outdir='./',_massPoints='120,121,122,123,124, # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Plot splines -def plotSplines(_finalModel,_outdir="./",_nominalMass='125',splinesToPlot=['xs','br','ea','fracRV']): +def plotSplines( + _finalModel, + _outdir="./", + _nominalMass='125', + splinesToPlot=['xs','br','ea','fracRV'], + _fnameprefix='', # Prefix for output file name + _xaxislabel='m_{H} [GeV]', # Label for x-axis +): canv = ROOT.TCanvas() colorMap = {'xs':ROOT.kRed-4,'br':ROOT.kAzure+1,'ea':ROOT.kGreen+1,'fracRV':ROOT.kMagenta-7,'norm':ROOT.kBlack} grs = od() @@ -382,7 +444,7 @@ def plotSplines(_finalModel,_outdir="./",_nominalMass='125',splinesToPlot=['xs', # Draw axes haxes = ROOT.TH1F("h_axes_spl","h_axes_spl",int(_finalModel.MHHigh)-int(_finalModel.MHLow),int(_finalModel.MHLow),int(_finalModel.MHHigh)) haxes.SetTitle("") - haxes.GetXaxis().SetTitle("m_{H} [GeV]") + haxes.GetXaxis().SetTitle(_xaxislabel) haxes.GetXaxis().SetTitleSize(0.05) haxes.GetXaxis().SetTitleOffset(0.85) haxes.GetXaxis().SetLabelSize(0.035) @@ -417,13 +479,23 @@ def plotSplines(_finalModel,_outdir="./",_nominalMass='125',splinesToPlot=['xs', lat.SetNDC() lat.SetTextSize(0.03) lat.DrawLatex(0.9,0.92,"%s"%(_finalModel.name)) + if len(_fnameprefix) > 0 and not _fnameprefix.endswith("_"): + _fnameprefix += "_" canv.Update() - canv.SaveAs("%s/%s_splines.png"%(_outdir,_finalModel.name)) - canv.SaveAs("%s/%s_splines.pdf"%(_outdir,_finalModel.name)) + canv.SaveAs("%s/%s%s_splines.png"%(_outdir,_fnameprefix,_finalModel.name)) + canv.SaveAs("%s/%s%s_splines.pdf"%(_outdir,_fnameprefix,_finalModel.name)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Function for plotting final signal model: neat -def plotSignalModel(_hists,_opt,_outdir=".",offset=0.02): +def plotSignalModel( + _hists, + _opt, + _outdir=".", + offset=0.02, + _fnameprefix='', # Prefix for output file name + _xrange=[105,140], + _proctext='H #rightarrow #gamma#gamma', # LaTeX of process on plot + ): colorMap = {'2016':38,'2017':30,'2018':46,'2022preEE':38,'2022postEE':30} canv = ROOT.TCanvas("c","c",650,600) canv.SetBottomMargin(0.12) @@ -434,7 +506,7 @@ def plotSignalModel(_hists,_opt,_outdir=".",offset=0.02): h_axes.Reset() h_axes.SetMaximum(_hists['data'].GetMaximum()*1.2) h_axes.SetMinimum(0.) - h_axes.GetXaxis().SetRangeUser(105,140) + h_axes.GetXaxis().SetRangeUser(_xrange[0],_xrange[1]) h_axes.SetTitle("") h_axes.GetXaxis().SetTitle("%s (%s)"%(_opt.xvar.split(":")[1],_opt.xvar.split(":")[2])) h_axes.GetXaxis().SetTitleSize(0.05) @@ -536,7 +608,7 @@ def plotSignalModel(_hists,_opt,_outdir=".",offset=0.02): lat0.SetTextSize(0.045) lat0.DrawLatex(0.15,0.92,"#bf{CMS} #it{%s}"%_opt.label) lat0.DrawLatex(0.77,0.92,"%s TeV"%(sqrts__.split("TeV")[0])) - lat0.DrawLatex(0.16+offset,0.83,"H #rightarrow #gamma#gamma") + lat0.DrawLatex(0.16+offset,0.83,"%s"%_proctext) # Load translations translateCats = {} if _opt.translateCats is None else LoadTranslations(_opt.translateCats) @@ -563,14 +635,16 @@ def plotSignalModel(_hists,_opt,_outdir=".",offset=0.02): lat1.DrawLatex(0.83,0.8,"%s %s"%(procStr,yearStr)) canv.Update() + if len(_fnameprefix) > 0 and not _fnameprefix.endswith("_"): + _fnameprefix += "_" # Write effSigma to file if len(_opt.years.split(",")) >1: es = {} es['combined'] = effSigma for year in _opt.years.split(","): es[year] = getEffSigma(_hists['pdf_%s'%year]) - with open("%s/effSigma_%s.json"%(_outdir,catExt),"w") as jf: json.dump(es,jf) + with open("%s/%seffSigma_%s.json"%(_outdir,_fnameprefix,catExt),"w") as jf: json.dump(es,jf) # Save canvas - canv.SaveAs("%s/smodel_%s%s%s.pdf"%(_outdir,catExt,procExt,yearExt)) - canv.SaveAs("%s/smodel_%s%s%s.png"%(_outdir,catExt,procExt,yearExt)) + canv.SaveAs("%s/%ssmodel_%s%s%s.pdf"%(_outdir,_fnameprefix,catExt,procExt,yearExt)) + canv.SaveAs("%s/%ssmodel_%s%s%s.png"%(_outdir,_fnameprefix,catExt,procExt,yearExt)) diff --git a/Signal/tools/submissionTools.py b/Signal/tools/submissionTools.py index e0b99a3a..7688344f 100644 --- a/Signal/tools/submissionTools.py +++ b/Signal/tools/submissionTools.py @@ -37,153 +37,563 @@ def writeCondorSub(_file,_exec,_queue,_nJobs,_jobOpts,doHoldOnFailure=True,doPer _file.write("queue %g"%_nJobs) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -def writeSubFiles(_opts): - # Make directory to store sub files - if not os.path.isdir("%s/outdir_%s"%(swd__,_opts['ext'])): os.system("mkdir %s/outdir_%s"%(swd__,_opts['ext'])) - if not os.path.isdir("%s/outdir_%s/%s"%(swd__,_opts['ext'],_opts['mode'])): os.system("mkdir %s/outdir_%s/%s"%(swd__,_opts['ext'],_opts['mode'])) - if not os.path.isdir("%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode'])): os.system("mkdir %s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode'])) - +def writeCondorSubFilesMgg(_opts): _jobdir = "%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode']) - # Remove current job files - if len(glob.glob("%s/*"%_jobdir)): os.system("rm %s/*"%_jobdir) - - # CONDOR - if _opts['batch'] == "condor": - _executable = "condor_%s_%s"%(_opts['mode'],_opts['ext']) - _f = open("%s/%s.sh"%(_jobdir,_executable),"w") # single .sh script split into separate jobs - writePreamble(_f) + _executable = "condor_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) + _f = open("%s/%s.sh"%(_jobdir,_executable),"w") # single .sh script split into separate jobs + writePreamble(_f) - # Write details depending on mode + # Write details depending on mode - # For looping over proc x cat - if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): - for pidx in range(_opts['nProcs']): - for cidx in range(_opts['nCats']): - pcidx = pidx*_opts['nCats']+cidx - p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] - _f.write("if [ $1 -eq %g ]; then\n"%pcidx) - _f.write(" python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) - _f.write("fi\n") - - # For looping over categories - elif( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] ): + # For looping over proc x cat + if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): + for pidx in range(_opts['nProcs']): for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f.write("if [ $1 -eq %g ]; then\n"%cidx) - for pidx in range(_opts['nProcs']): - p = _opts['procs'].split(",")[pidx] - _f.write(" python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) + pcidx = pidx*_opts['nCats']+cidx + p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%pcidx) + _f.write( + f" python3 {swd__}/scripts/signalFit.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mggLow {_opts['mggLow']} " + + f"--mggHigh {_opts['mggHigh']} " + + f"{_opts['modeOpts']}\n" + ) _f.write("fi\n") + + # For looping over categories + elif( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] ): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + for pidx in range(_opts['nProcs']): + p = _opts['procs'].split(",")[pidx] + _f.write( + f" python3 {swd__}/scripts/signalFit.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mggLow {_opts['mggLow']} " + + f"--mggHigh {_opts['mggHigh']} " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") - elif _opts['mode'] == "calcPhotonSyst": - for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _f.write(" python3 %s/scripts/calcPhotonSyst.py --cat %s --procs %s --ext %s --inputWSDir %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) - _f.write("fi\n") + elif _opts['mode'] == "calcPhotonSyst": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + _f.write( + f" python3 {swd__}/scripts/calcPhotonSyst.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") - elif _opts['mode'] == "fTest": - for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _f.write(" python3 %s/scripts/fTest.py --cat %s --procs %s --ext %s --inputWSDir %s %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['modeOpts'])) - _f.write("fi\n") + elif _opts['mode'] == "fTest": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + _f.write( + f" python3 {swd__}/scripts/fTest.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--weightName {_opts['weightName']} " + + f"--mggLow {_opts['mggLow']} " + + f"--mggHigh {_opts['mggHigh']} " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") + + elif _opts['mode'] == "packageSignal": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + _f.write( + f" python3 {swd__}/scripts/packageSignal.py " + + f"--cat {c} " + + f"--outputExt {_opts['ext']} " + + f"--massPoints {_opts['massPoints']} " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") + + elif _opts['mode'] == 'getDiagProc': + _f.write( + f"python3 {swd__}/scripts/getDiagProc.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"{_opts['modeOpts']}\n" + ) + + # Close .sh file + _f.close() + os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) + + # Condor submission file + _fsub = open("%s/%s.sub"%(_jobdir,_executable),"w") + if _opts['mode'] == "signalFit": + if( not _opts['groupSignalFitJobsByCat'] ): + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats']*_opts['nProcs'],_opts['jobOpts']) + else: + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) + elif( _opts['mode'] == "calcPhotonSyst" )|( _opts['mode'] == "fTest" )|( _opts['mode'] == "packageSignal" ): + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) + _fsub.close() - elif _opts['mode'] == "packageSignal": + +def writeCondorSubFilesMjj(_opts): + _jobdir = "%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode']) + _executable = "condor_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) + _f = open("%s/%s.sh"%(_jobdir,_executable),"w") # single .sh script split into separate jobs + writePreamble(_f) + + # Write details depending on mode + + # For looping over proc x cat + if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): + for pidx in range(_opts['nProcs']): for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _f.write(" python3 %s/scripts/packageSignal.py --cat %s --outputExt %s --massPoints %s %s\n"%(swd__,c,_opts['ext'],_opts['massPoints'],_opts['modeOpts'])) + pcidx = pidx*_opts['nCats']+cidx + p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%pcidx) + _f.write( + f" python3 {swd__}/scripts/signalFitMjj.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mjjLow {_opts['mjjLow']} " + + f"--mjjHigh {_opts['mjjHigh']} " + + f"{_opts['modeOpts']}\n" + ) _f.write("fi\n") + + # For looping over categories + elif( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] ): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + for pidx in range(_opts['nProcs']): + p = _opts['procs'].split(",")[pidx] + _f.write( + f" python3 {swd__}/scripts/signalFitMjj.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mjjLow {_opts['mjjLow']} " + + f"--mjjHigh {_opts['mjjHigh']} " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") - elif _opts['mode'] == 'getDiagProc': - _f.write("python3 %s/scripts/getDiagProc.py --inputWSDir %s --ext %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['modeOpts'])) - - # Close .sh file - _f.close() - os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) + elif _opts['mode'] == "calcPhotonSyst": # TODO + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + _f.write( + f" python3 {swd__}/scripts/calcPhotonSyst.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") + + elif _opts['mode'] == "fTest": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + _f.write( + f" python3 {swd__}/scripts/fTestMjj.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--weightName {_opts['weightName']} " + + f"--mjjLow {_opts['mjjLow']} " + + f"--mjjHigh {_opts['mjjHigh']} " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") + + elif _opts['mode'] == "packageSignal": # TODO + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f.write("if [ $1 -eq %g ]; then\n"%cidx) + _f.write( + f" python3 {swd__}/scripts/packageSignal.py " + + f"--cat {c} " + + f"--outputExt {_opts['ext']} " + + f"--massPoints {_opts['massPoints']} " + + f"{_opts['modeOpts']}\n") + _f.write("fi\n") - # Condor submission file - _fsub = open("%s/%s.sub"%(_jobdir,_executable),"w") - if _opts['mode'] == "signalFit": - if( not _opts['groupSignalFitJobsByCat'] ): writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats']*_opts['nProcs'],_opts['jobOpts']) - else: writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) - elif( _opts['mode'] == "calcPhotonSyst" )|( _opts['mode'] == "fTest" )|( _opts['mode'] == "packageSignal" ): writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) - _fsub.close() + elif _opts['mode'] == 'getDiagProc': # TODO + _f.write( + f"python3 {swd__}/scripts/getDiagProc.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"{_opts['modeOpts']}\n" + ) - # SGE... - if (_opts['batch'] == "IC")|(_opts['batch'] == "SGE")|(_opts['batch'] == "local" ): - _executable = "sub_%s_%s"%(_opts['mode'],_opts['ext']) + # Close .sh file + _f.close() + os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) - # Write details depending on mode + # Condor submission file + _fsub = open("%s/%s.sub"%(_jobdir,_executable),"w") + if _opts['mode'] == "signalFit": + if( not _opts['groupSignalFitJobsByCat'] ): + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats']*_opts['nProcs'],_opts['jobOpts']) + else: + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) + elif( _opts['mode'] == "calcPhotonSyst" )|( _opts['mode'] == "fTest" )|( _opts['mode'] == "packageSignal" ): + writeCondorSub(_fsub,_executable,_opts['queue'],_opts['nCats'],_opts['jobOpts']) + _fsub.close() + pass - # For separate submission file per process x category - if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): - for pidx in range(_opts['nProcs']): - for cidx in range(_opts['nCats']): - pcidx = pidx*_opts['nCats']+cidx - p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] - _f = open("%s/%s_%g.sh"%(_jobdir,_executable,pcidx),"w") - writePreamble(_f) - _f.write("python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) - _f.close() - os.system("chmod 775 %s/%s_%g.sh"%(_jobdir,_executable,pcidx)) - - # For separate submission file per category - elif( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] ): - for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") - writePreamble(_f) - for pidx in range(_opts['nProcs']): - p = _opts['procs'].split(",")[pidx] - _f.write("python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) - _f.close() - os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) +def writeSGESubFilesMgg(_opts): + _jobdir = "%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode']) + _executable = "sub_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) - elif _opts['mode'] == "calcPhotonSyst": - for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") - writePreamble(_f) - _f.write("python3 %s/scripts/calcPhotonSyst.py --cat %s --procs %s --ext %s --inputWSDir %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) - _f.close() - os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + # Write details depending on mode - elif _opts['mode'] == "fTest": + # For separate submission file per process x category + if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): + for pidx in range(_opts['nProcs']): for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + pcidx = pidx*_opts['nCats']+cidx + p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%g.sh"%(_jobdir,_executable,pcidx),"w") writePreamble(_f) - _f.write("python3 %s/scripts/fTest.py --cat %s --procs %s --ext %s --inputWSDir %s %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['modeOpts'])) + _f.write( + f"python3 {swd__}/scripts/signalFit.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mggLow {_opts['mggLow']} " + + f"--mggHigh {_opts['mggHigh']} " + + f"{_opts['modeOpts']}\n" + ) _f.close() - os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + os.system("chmod 775 %s/%s_%g.sh"%(_jobdir,_executable,pcidx)) + + # For separate submission file per category + elif( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] ): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + for pidx in range(_opts['nProcs']): + p = _opts['procs'].split(",")[pidx] + _f.write( + f"python3 {swd__}/scripts/signalFit.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mggLow {_opts['mggLow']} " + + f"--mggHigh {_opts['mggHigh']} " + + f"{_opts['modeOpts']}\n\n" + ) + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + elif _opts['mode'] == "calcPhotonSyst": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + _f.write( + f"python3 {swd__}/scripts/calcPhotonSyst.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"{_opts['modeOpts']}\n") + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + elif _opts['mode'] == "fTest": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + _f.write( + f"python3 {swd__}/scripts/fTest.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--weightName {_opts['weightName']} " + + f"--mggLow {_opts['mggLow']} " + + f"--mggHigh {_opts['mggHigh']} " + + f"{_opts['modeOpts']}\n" + ) + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + elif _opts['mode'] == "packageSignal": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + _f.write( + f"python3 {swd__}/scripts/packageSignal.py " + + f"--cat {c} " + + f"--outputExt {_opts['ext']} " + + f"--massPoints {_opts['massPoints']} " + + f"{_opts['modeOpts']}\n" + ) + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + # For single submission file + elif _opts['mode'] == "getDiagProc": + _f = open("%s/%s.sh"%(_jobdir,_executable),"w") + writePreamble(_f) + _f.write( + f"python3 {swd__}/scripts/getDiagProc.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"{_opts['modeOpts']}\n" + ) + _f.close() + os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) - elif _opts['mode'] == "packageSignal": +def writeSGESubFilesMjj(_opts): + _jobdir = "%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode']) + _executable = "sub_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) + + # Write details depending on mode + + # For separate submission file per process x category + if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): + for pidx in range(_opts['nProcs']): for cidx in range(_opts['nCats']): - c = _opts['cats'].split(",")[cidx] - _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + pcidx = pidx*_opts['nCats']+cidx + p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%g.sh"%(_jobdir,_executable,pcidx),"w") writePreamble(_f) - _f.write("python3 %s/scripts/packageSignal.py --cat %s --outputExt %s --massPoints %s %s\n"%(swd__,c,_opts['ext'],_opts['massPoints'],_opts['modeOpts'])) + _f.write( + f"python3 {swd__}/scripts/signalFitMjj.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mjjLow {_opts['mjjLow']} " + + f"--mjjHigh {_opts['mjjHigh']} " + + f"{_opts['modeOpts']}\n" + ) _f.close() - os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + os.system("chmod 775 %s/%s_%g.sh"%(_jobdir,_executable,pcidx)) + + # For separate submission file per category + elif( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] ): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + for pidx in range(_opts['nProcs']): + p = _opts['procs'].split(",")[pidx] + _f.write( + f"python3 {swd__}/scripts/signalFitMjj.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"--proc {p} " + + f"--cat {c} " + + f"--year {_opts['year']} " + + f"--analysis {_opts['analysis']} " + + f"--massPoints {_opts['massPoints']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"--weightName {_opts['weightName']} " + + f"--mjjLow {_opts['mjjLow']} " + + f"--mjjHigh {_opts['mjjHigh']} " + + f"{_opts['modeOpts']}\n\n" + ) + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + elif _opts['mode'] == "calcPhotonSyst": # TODO + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + _f.write( + f"python3 {swd__}/scripts/calcPhotonSyst.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--scales '{_opts['scales']}' " + + f"--scalesCorr '{_opts['scalesCorr']}' " + + f"--scalesGlobal '{_opts['scalesGlobal']}' " + + f"--smears '{_opts['smears']}' " + + f"{_opts['modeOpts']}\n") + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) - # For single submission file - elif _opts['mode'] == "getDiagProc": - _f = open("%s/%s.sh"%(_jobdir,_executable),"w") + elif _opts['mode'] == "fTest": + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") writePreamble(_f) - _f.write("python3 %s/scripts/getDiagProc.py --inputWSDir %s --ext %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['modeOpts'])) + _f.write( + f"python3 {swd__}/scripts/fTestMjj.py " + + f"--cat {c} " + + f"--procs {_opts['procs']} " + + f"--ext {_opts['ext']} " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--weightName {_opts['weightName']} " + + f"--mjjLow {_opts['mjjLow']} " + + f"--mjjHigh {_opts['mjjHigh']} " + + f"{_opts['modeOpts']}\n" + ) _f.close() - os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + elif _opts['mode'] == "packageSignal": # TODO + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") + writePreamble(_f) + _f.write( + f"python3 {swd__}/scripts/packageSignal.py " + + f"--cat {c} " + + f"--outputExt {_opts['ext']} " + + f"--massPoints {_opts['massPoints']} " + + f"{_opts['modeOpts']}\n" + ) + _f.close() + os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) + + # For single submission file + elif _opts['mode'] == "getDiagProc": # TODO + _f = open("%s/%s.sh"%(_jobdir,_executable),"w") + writePreamble(_f) + _f.write( + f"python3 {swd__}/scripts/getDiagProc.py " + + f"--inputWSDir {_opts['inputWSDir']} " + + f"--ext {_opts['ext']} " + + f"{_opts['modeOpts']}\n" + ) + _f.close() + os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def writeSubFilesMgg(_opts): + if _opts['batch'] == "condor": + writeCondorSubFilesMgg(_opts) + if (_opts['batch'] == "IC")|(_opts['batch'] == "SGE")|(_opts['batch'] == "local" ): + writeSGESubFilesMgg(_opts) + +def writeSubFilesMjj(_opts): + if _opts['batch'] == "condor": + writeCondorSubFilesMjj(_opts) + if (_opts['batch'] == "IC")|(_opts['batch'] == "SGE")|(_opts['batch'] == "local" ): + writeSGESubFilesMjj(_opts) + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def writeSubFiles(_opts): + # Make directory to store sub files + if not os.path.isdir("%s/outdir_%s"%(swd__,_opts['ext'])): os.system("mkdir %s/outdir_%s"%(swd__,_opts['ext'])) + if not os.path.isdir("%s/outdir_%s/%s"%(swd__,_opts['ext'],_opts['mode'])): os.system("mkdir %s/outdir_%s/%s"%(swd__,_opts['ext'],_opts['mode'])) + if not os.path.isdir("%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode'])): os.system("mkdir %s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode'])) + + _jobdir = "%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode']) + # Remove current job files + if len(glob.glob("%s/*"%_jobdir)): os.system("rm %s/*"%_jobdir) + + # Mgg/Mjj logic + if _opts['fitType'] == "mgg" or _opts['fitType'] == "2D": + writeSubFilesMgg(_opts) + if _opts['fitType'] == "mjj" or _opts['fitType'] == "2D": + writeSubFilesMjj(_opts) - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Function for submitting files to batch system -def submitFiles(_opts): +def submitFilesMgg(_opts): _jobdir = "%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode']) # CONDOR if _opts['batch'] == "condor": - _executable = "condor_%s_%s"%(_opts['mode'],_opts['ext']) + _executable = "condor_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) if os.environ['PWD'].startswith("/eos"): cmdLine = "cd %s; condor_submit -spool %s.sub; cd %s"%(_jobdir,_executable,swd__) else: @@ -193,7 +603,7 @@ def submitFiles(_opts): # SGE elif _opts['batch'] in ['IC','SGE']: - _executable = "sub_%s_%s"%(_opts['mode'],_opts['ext']) + _executable = "sub_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) # Extract job opts jobOptsStr = _opts['jobOpts'] @@ -222,7 +632,7 @@ def submitFiles(_opts): # Running locally elif _opts['batch'] == 'local': - _executable = "sub_%s_%s"%(_opts['mode'],_opts['ext']) + _executable = "sub_Mgg_%s_%s"%(_opts['mode'],_opts['ext']) # For separate submission file per process x category if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): for pidx in range(_opts['nProcs']): @@ -245,4 +655,79 @@ def submitFiles(_opts): run(cmdLine) print(" --> Finished running files") + +def submitFilesMjj(_opts): + _jobdir = "%s/outdir_%s/%s/jobs"%(swd__,_opts['ext'],_opts['mode']) + # CONDOR + if _opts['batch'] == "condor": + _executable = "condor_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) + if os.environ['PWD'].startswith("/eos"): + cmdLine = "cd %s; condor_submit -spool %s.sub; cd %s"%(_jobdir,_executable,swd__) + else: + cmdLine = "cd %s; condor_submit %s.sub; cd %s"%(_jobdir,_executable,swd__) + run(cmdLine) + print(" --> Finished submitting files") + + # SGE + elif _opts['batch'] in ['IC','SGE']: + _executable = "sub_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) + + # Extract job opts + jobOptsStr = _opts['jobOpts'] + + # For separate submission file per process x category + if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): + for pidx in range(_opts['nProcs']): + for cidx in range(_opts['nCats']): + pcidx = pidx*_opts['nCats']+cidx + _subfile = "%s/%s_%g"%(_jobdir,_executable,pcidx) + cmdLine = "qsub -q hep.q %s -o %s.log -e %s.err %s.sh"%(jobOptsStr,_subfile,_subfile,_subfile) + run(cmdLine) + # Separate submission per category + elif( _opts['mode'] == "packageSignal" )|( _opts['mode'] == "fTest" )|( _opts['mode'] == "calcPhotonSyst" )|(( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] )): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _subfile = "%s/%s_%s"%(_jobdir,_executable,c) + cmdLine = "qsub -q hep.q %s -o %s.log -e %s.err %s.sh"%(jobOptsStr,_subfile,_subfile,_subfile) + run(cmdLine) + # Single submission + elif(_opts['mode'] == "getDiagProc"): + _subfile = "%s/%s"%(_jobdir,_executable) + cmdLine = "qsub -q hep.q %s -o %s.log -e %s.err %s.sh"%(jobOptsStr,_subfile,_subfile,_subfile) + run(cmdLine) + print(" --> Finished submitting files") + + # Running locally + elif _opts['batch'] == 'local': + _executable = "sub_Mjj_%s_%s"%(_opts['mode'],_opts['ext']) + # For separate submission file per process x category + if( _opts['mode'] == "signalFit" )&( not _opts['groupSignalFitJobsByCat'] ): + for pidx in range(_opts['nProcs']): + for cidx in range(_opts['nCats']): + pcidx = pidx*_opts['nCats']+cidx + _subfile = "%s/%s_%g"%(_jobdir,_executable,pcidx) + cmdLine = "bash %s.sh"%(_subfile) + run(cmdLine) + # Separate submission per category + elif( _opts['mode'] == "packageSignal" )|( _opts['mode'] == "fTest" )|( _opts['mode'] == "calcPhotonSyst" )|(( _opts['mode'] == "signalFit" )&( _opts['groupSignalFitJobsByCat'] )): + for cidx in range(_opts['nCats']): + c = _opts['cats'].split(",")[cidx] + _subfile = "%s/%s_%s"%(_jobdir,_executable,c) + cmdLine = "bash %s.sh"%_subfile + run(cmdLine) + # Single submission + elif(_opts['mode'] == "getDiagProc"): + _subfile = "%s/%s"%(_jobdir,_executable) + cmdLine = "bash %s.sh"%_subfile + run(cmdLine) + print(" --> Finished running files") + + +def submitFiles(_opts): + if _opts['fitType'] == "mgg" or _opts['fitType'] == "2D": + submitFilesMgg(_opts) + if _opts['fitType'] == "mjj" or _opts['fitType'] == "2D": + submitFilesMjj(_opts) + + diff --git a/tools/CollectModels.py b/tools/CollectModels.py new file mode 100644 index 00000000..1bacdccb --- /dev/null +++ b/tools/CollectModels.py @@ -0,0 +1,766 @@ +# ****************************************************************************** +# * * +# * Title: CollectModels.py * +# * Author: Benjamin Lawrence-Sanderson * +# * Northwestern University * +# * Created: 2025-04-25 * +# * Description: * +# * Collects the output mgg and mbb workspaces and merges the * +# * two 1D models into a single 2D model using RooProdPdf. * +# * * +# ****************************************************************************** + +# from CollectModels import collect_models +# collect_models( +# json_file="path/to/json/file.json", +# output_file="output_file.root", +# log_level="INFO" + +import glob +import os +import sys +import json +import logging +import ROOT +import re + +import commonObjects as co +import commonTools as ct + +# Set up a basic logger at module level that will be configured properly later +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +if not logger.handlers: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(levelname)s - %(message)s')) + logger.addHandler(handler) + +def _group_models_by_category(models: dict) -> dict: + grouped = {} # { + # cat0: {modelA0: pathA0, modelB0: pathB0}, + # cat1: {modelC1: pathC1, modelD1: pathD1}, + # ... + # } + for model_name, model_path in models.items(): + # Extract category from model name + cat = model_name.split("_")[-2] # Require cat name contains no underscores + if cat not in grouped: + grouped[cat] = {} + grouped[cat][model_name] = model_path + + # Check the sub-dicts are all the same length + lengths = [len(grouped[cat]) for cat in grouped] + if len(set(lengths)) != 1: + logger.warning("Not all categories have the same number of models.") + + return grouped + + + +def _parse_json(json_file: str) -> dict: + """Parse the JSON file to get the model names and paths.""" + # model_name: path/to/model.root + # { + # "mgg_GGHH_2022+2023_cat0_13TeV": "path/to/CMS-HGG_sigfit_bbgg_2022+2023_GGHH_2022+2023_cat0.root", + # "mjj_GGHH_2022+2023_cat0_13TeV": "path/to/CMS-HBB_sigfit_bbgg_2022+2023_GGHH_2022+2023_cat0.root", + # } + # obs_proc_year_cat_sqrts + + if not os.path.exists(json_file): + logger.error(f"JSON file {json_file} does not exist.") + sys.exit(1) + + with open(json_file, "r", encoding="utf-8") as f: + ungrouped_models = json.load(f) + + if len(ungrouped_models) == 0: + logger.error(f"JSON file {json_file} is empty.") + sys.exit(1) + + # TODO: Group by category AND YEAR (for separate year fits) + models = _group_models_by_category(ungrouped_models) # depth-2 dict + # { cat0: {modelA0: pathA0, modelB0: pathB0}, cat1: {modelC1: pathC1, modelD1: pathD1}, ... } + + return models + + +def _rooiter(x): + iter = x.iterator() + ret = iter.Next() + while ret: + yield ret + ret = iter.Next() + + +def _name_to_cat(name: str) -> str: + """Extract the category from the model name.""" + # e.g. mgg_GGHH_2022+2023_cat0_13TeV -> cat0 + return name.split("_")[-2] # Require cat name contains no underscores + + +def _name_to_year(name: str) -> str: + """Extract the year from the model name.""" + # e.g. mgg_GGHH_2022+2023_cat0_13TeV -> 2022+2023 + # n.b. We are indexing from the end since proc name may contain underscores, + # kappa samples for example + return name.split("_")[-3] # Require cat name contains no underscores + + +def _name_to_proc(name: str) -> str: + """Extract the process from the model name.""" + # e.g. mgg_GGHH_2022+2023_cat0_13TeV -> GGHH + # n.b. We are indexing from the end since proc name may contain underscores, + # kappa samples for example + split_name = name.split("_") + + return "_".join(split_name[1:-3]) + + +def _name_to_obs(name: str) -> str: + """Extract the observable from the model name.""" + # e.g. mgg_GGHH_2022+2023_cat0_13TeV -> mgg + # e.g. mjj_GGHH_2022+2023_cat0_13TeV -> mjj + return name.split("_")[0] + + + + + +def _get_PDF_name( + ws_type: str, + proc: str, + obs: str, + year: str, + cat: str, +) -> str: + """Get PDF name based on workspace type and process + + Args: + ws_type (str): Type of workspace (signal, bkg-res, bkg-nonres) + proc (str): Process name (i.e. ggh, vbf, tth, vh, gghh). + For background non-resonant (data), use gghh. + obs (str): Observable (mgg or mjj) + year (str): Year + + Returns: + str: PDF name + """ + # General process: + # 1) List all PDFs in workspace + # 2) Loose match to that PDF name + # 3) Return that PDF name + # 4) If not found, throw error and exit + + if year not in co.years_to_process: + logger.error("Year not listed in 'years to process' (see commonObjects.py): %s", year) + sys.exit(1) + if "," in cat: + logger.error("_get_PDF_name requires single category, multiple given: %s", cat) + sys.exit(1) + if "," in year: + logger.error("_get_PDF_name requires single year, multiple given: %s", year) + sys.exit(1) + if obs not in ["mgg", "mjj"]: + logger.error("Unknown observable: %s", obs) + sys.exit(1) + + proc_name = ct.dataToProc(proc) # ggh -> GG2H + + pdf_name = "" + # Signal + if ws_type == "signal": + if obs == "mgg": + # e.g. hggpdfsmrel_GGHH_all2022_cat0_13TeV + pdf_name = f"{co.outputMggWSObjectTitle__}_mgg_{proc_name}_{year}_{cat}_{co.sqrts__}" + elif obs == "mjj": + # e.g. hbbpdfsmrel_GGHH_all2022_cat0_13TeV + pdf_name = f"{co.outputMbbWSObjectTitle__}_mjj_{proc_name}_{year}_{cat}_{co.sqrts__}" + else: + logger.error(f"Unknown observable: {obs}") + sys.exit(1) + + elif ws_type == "bkg-res": + if obs == "mgg": + # e.g. hggpdfsmrel_mgg_TTH_all2022_cat0_13TeV + pdf_name = f"{co.outputMggWSObjectTitle__}_mgg_{proc_name}_{year}_{cat}_{co.sqrts__}" + elif obs == "mjj": + # e.g. hbbpdfsmrel_mjj_TTH_all2022_cat0_13TeV + pdf_name = f"{co.outputMbbWSObjectTitle__}_mjj_{proc_name}_{year}_{cat}_{co.sqrts__}" + else: + logger.error(f"Unknown observable: {obs}") + sys.exit(1) + + elif ws_type == "bkg-nonres": + if obs == "mgg": + # e.g. CMS_hgg_cat1_13TeV_bkgshape + pdf_name = f"CMS_hgg_{cat}_{co.sqrts__}_bkgshape" + elif obs == "mjj": + # e.g. hbbpdfsmrel_mjj_GGHH_all2022_cat1_13TeV + # pdf_name = f"{co.outputMbbWSObjectTitle__}_mjj_{proc_name}_{year}_{cat}_{co.sqrts__}" + # e.g. CMS_hbb_cat0_13TeV_bkgshape + pdf_name = f"CMS_hbb_{cat}_{co.sqrts__}_bkgshape" + else: + logger.error(f"Unknown observable: {obs}") + sys.exit(1) + else: + logger.error(f"Unknown workspace type: {ws_type}") + sys.exit(1) + + return pdf_name + + +def _get_dataset_name( + ws_type: str, + obs: str, + year: str, + cat: str, +) -> str: + """Get dataset name in nonresonant background workspace + + Args: + obs (str): Observable (mgg or mjj) + year (str): Year + cat (str): Category + + Returns: + str: Dataset name + """ + if obs not in ["mgg", "mjj"]: + logger.error("Unknown observable: %s", obs) + sys.exit(1) + if year not in co.years_to_process: + logger.error("Year not listed in 'years to process' (see commonObjects.py): %s", year) + sys.exit(1) + if "," in cat: + logger.error("_get_dataset_name requires single category, multiple given: %s", cat) + sys.exit(1) + if "," in year: + logger.error("_get_dataset_name requires single year, multiple given: %s", year) + sys.exit(1) + + dataset_name = "" + if ws_type == "bkg-nonres": + if obs == "mgg": + # e.g. roohist_data_mass_cat1 + dataset_name = f"roohist_data_mass_{cat}" + elif obs == "mjj": + # e.g. roohist_data_mass_cat1 + dataset_name = f"roohist_data_mass_{cat}" + elif ws_type == "signal": + logger.warning("Signal workspace does not have relevant datasets") + elif ws_type == "bkg-res": + logger.warning("Background resonant workspace does not have relevant datasets") + else: + logger.error(f"Unknown workspace type: {ws_type}") + sys.exit(1) + + return dataset_name + + +def _build_output_name( + output_name: str, + proc: str, + year: str, + cat: str, +) -> str: + """Build the output name for the combined model. + + Args: + output_name (str): Output name template + ws_type (str): Type of workspace (signal, bkg-res, bkg-nonres) + proc (str): Process name (i.e. ggh, vbf, tth, vh, gghh). + For background non-resonant (data), use gghh. + year (str): Year + cat (str): Category + + Returns: + str: Output name + """ + name = output_name.replace("%YEAR", year) + name = name.replace("%PROC", proc) + name = name.replace("%CAT", cat) + return name + + +def _get_ws_name( + obs: str, + ws_type: str, +) -> str: + """Get the workspace name based on the observable and workspace type. + + Args: + obs (str): Observable (mgg or mjj) + ws_type (str): Type of workspace (signal, bkg-res, bkg-nonres) + + Returns: + str: Workspace name + """ + if ws_type == "signal" and obs == "mgg": + return f"{co.outputWSName__}_{co.sqrts__}" + elif ws_type == "signal" and obs == "mjj": + return f"{co.outputWSName__}_{co.sqrts__}" + elif ws_type == "bkg-res" and obs == "mgg": + return co.bkgWSName__ + elif ws_type == "bkg-res" and obs == "mjj": + return co.bkgWSName__ + elif ws_type == "bkg-nonres" and obs == "mgg": + return co.bkgWSName__ + elif ws_type == "bkg-nonres" and obs == "mjj": + return co.bkgWSName__ + else: + logger.error(f"Unknown workspace type: {ws_type}") + sys.exit(1) + + +def _build_2d_pdf_name( + ws_type: str, + proc: str, + year: str, + cat: str, +) -> str: + """Build the 2D PDF name for the combined model. + + Args: + ws_type (str): Type of workspace (signal, bkg-res, bkg-nonres) + proc (str): Process name (i.e. ggh, vbf, tth, vh, gghh). + For background non-resonant (data), use gghh. + year (str): Year + cat (str): Category + + Returns: + str: 2D PDF name + """ + if ws_type == "bkg-nonres": + return f"CMS-2D_{cat}_bkgshape" + elif ws_type == "bkg-res": + proc_name = ct.dataToProc(proc) + return f"{co.output2DWSObjectTitle__}_2D_{proc_name}_{year}_{cat}_{co.sqrts__}" + elif ws_type == "signal": + proc_name = ct.dataToProc(proc) + return f"{co.output2DWSObjectTitle__}_2D_{proc_name}_{year}_{cat}_{co.sqrts__}" + else: + logger.error(f"Unknown workspace type: {ws_type}") + sys.exit(1) + + +def _build_prod_pdf( + mgg_pdf: ROOT.RooAbsPdf, + mjj_pdf: ROOT.RooAbsPdf, + prod_pdf_name: str, + ws_type: str, + proc: str, + cat: str, +) -> ROOT.RooProdPdf: + # n.b. descriptions assume: + # - single signal proc + # - single nonres bkg proc + # - multiple res bkg procs + if ws_type == "bkg-nonres": + return ROOT.RooProdPdf( + prod_pdf_name, + f"2D nonresonant PDF for {cat}", + ROOT.RooArgList(mgg_pdf, mjj_pdf), + ) + elif ws_type == "bkg-res": + return ROOT.RooProdPdf( + prod_pdf_name, + f"2D resonant PDF for {proc} {cat}", + ROOT.RooArgList(mgg_pdf, mjj_pdf), + ) + elif ws_type == "signal": + return ROOT.RooProdPdf( + prod_pdf_name, + f"2D signal PDF for {cat}", + ROOT.RooArgList(mgg_pdf, mjj_pdf), + ) + else: + logger.error(f"Unknown workspace type: {ws_type}") + sys.exit(1) + + +def _clear_2d_ws_directory(ws_type: str) -> None: + """Clear the 2D workspace directory in the datacard directory. + + Args: + ws_type (str): Type of workspace (signal, bkg-res, bkg-nonres) + """ + # Clear the outdir_2D directory in the datacard directory + # e.g. /path/to/outdir_2D/bkg-res/ + outdir = co.dwd__ + "/outdir_2D/" + f"/{ws_type}/" + if os.path.exists(outdir): + os.system(f"rm -rf {outdir}") + logger.debug(f"Cleared outdir_2D/{ws_type}/ directory") + else: + logger.debug("No outdir_2D directory to clear") + + +def _move_workspace(path: str, ws_type: str) -> str: + """Move the workspace to outdir_2D in Datacard directory. + Args: + path (str): Path to the workspace + ws_type (str): Type of workspace (signal, bkg-res, bkg-nonres) + + Returns: + str: Path to the moved workspace + """ + # Move the workspace to outdir_2D in Datacard directory + # e.g. /path/to/workspace.root -> /path/to/outdir_2D/bkg-res/workspace.root + outdir = co.dwd__ + "/outdir_2D" + f"/{ws_type}/" + if not os.path.exists(outdir): + os.system(f"mkdir -p {outdir}") + outdir = os.path.abspath(outdir) + + filename = os.path.basename(path) + outpath = os.path.join(outdir, filename) + if os.path.exists(outpath): + logger.warning("Overwriting %s", outpath.split("/")[-1]) + + os.system(f"mv {path} {outpath}") + return outpath + + + + +def collect_models( + json_file: str, + output_file: str, + mgg_var_name: str = "mass", + mjj_var_name: str = "dijet_mass", + log_level: str = "INFO", + ws_type: str = None, + no_clear: bool = False, + **kwargs +) -> None: + """Main function to collect models.""" + # Set up logging - reconfigure the global logger + global logger + + import os + import logManager + from customLogger import CustomFormatter, FileFormatter + + LOG_FILE_NAME = "logfile.log" + if not os.environ.get("LOGLEVEL", False): + os.environ["LOGLEVEL"] = log_level.upper() + + logManager.manage(LOG_FILE_NAME, recreate=True) + + # Remove any existing handlers to avoid duplicate logs + for handler in logger.handlers[:]: + logger.removeHandler(handler) + + # Add new handlers with proper formatting + file_handler = logging.FileHandler(LOG_FILE_NAME) + file_handler.setLevel(logging.getLevelName(log_level.upper())) + file_handler.setFormatter(FileFormatter()) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.getLevelName(log_level.upper())) + console_handler.setFormatter(CustomFormatter()) + + logger.setLevel(logging.getLevelName(log_level.upper())) + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + # Continue with the existing implementation but use function parameters instead of args + if ws_type not in ["signal", "bkg-res", "bkg-nonres", None]: + logger.error("Unknown workspace type: %s. Allowed workspace types: signal, bkg-res, bkg-nonres.", ws_type) + sys.exit(1) + + if not no_clear: + _clear_2d_ws_directory(ws_type) + + models = _parse_json(json_file) + + # Loop over the models + for cat, model_dict in models.items(): + obs_info = {} + for model_name, model_path in model_dict.items(): + year = _name_to_year(model_name) + proc = _name_to_proc(model_name) + obs = _name_to_obs(model_name) + + # PDF name + pdf_name = _get_PDF_name( + ws_type=ws_type, + proc=proc, + obs=obs, + year=year, + cat=cat, + ) + logger.debug("PDF name: %s", pdf_name) + + # Dataset name + dataset_name = _get_dataset_name( + ws_type=ws_type, + obs=obs, + year=year, + cat=cat, + ) + logger.debug("Dataset name: %s", dataset_name) + + obs_info[obs] = { + "pdf_name": pdf_name, + "dataset_name": dataset_name, + "model_path": model_path, + "year": year, + "cat": cat, + "proc": proc, + } + + mgg_path = obs_info["mgg"]["model_path"] + mjj_path = obs_info["mjj"]["model_path"] + + # Open files + mgg_file = ROOT.TFile.Open(mgg_path) + mjj_file = ROOT.TFile.Open(mjj_path) + if not mgg_file or not mjj_file: + logger.error("Could not open workspace files: %s, %s", mgg_path, mjj_path) + mgg_file.Close() + mjj_file.Close() + sys.exit(1) + + # Get workspaces + mgg_ws = mgg_file.Get(_get_ws_name(obs="mgg", ws_type=ws_type)) + mjj_ws = mjj_file.Get(_get_ws_name(obs="mjj", ws_type=ws_type)) + if not mgg_ws or not mjj_ws: + logger.error( + "Failed to get workspaces from input files (mgg: %s, mjj: %s)", + mgg_path is True, + mjj_path is True, + ) + mgg_file.Close() + mjj_file.Close() + sys.exit(1) + else: + logger.debug("Loaded workspaces") + + # Create output workspace + out_ws = ROOT.RooWorkspace("multipdf", "multipdf") + + # Get mgg and mjj variables + mgg = mgg_ws.var(mgg_var_name) + mjj = mjj_ws.var(mjj_var_name) + if not mgg or not mjj: + logger.error("Failed to get mass variables from workspaces") + mgg_file.Close() + mjj_file.Close() + continue + else: + logger.debug("Got mass variables") + + # Get mgg and mjj PDFs + mgg_pdf = mgg_ws.pdf(obs_info["mgg"]["pdf_name"]) + mjj_pdf = mjj_ws.pdf(obs_info["mjj"]["pdf_name"]) + if not mgg_pdf or not mjj_pdf: + logger.error( + "Failed to get PDFs or category indices for background nonresonant (Bad: %s)", + ", ".join( + [ + str(k) + for k, v in { + "mgg_pdf": mgg_pdf, + "mjj_pdf": mjj_pdf, + }.items() + if not bool(v) + ] + ), + ) + mgg_file.Close() + mjj_file.Close() + sys.exit(1) + + prod_pdf_name = _build_2d_pdf_name( + ws_type=ws_type, + proc=obs_info["mgg"]["proc"], + year=obs_info["mgg"]["year"], + cat=obs_info["mgg"]["cat"], + ) + logger.debug("Output PDF name: %s", prod_pdf_name) + + # Create 2D PDF + prod_pdf = _build_prod_pdf( + mgg_pdf=mgg_pdf, + mjj_pdf=mjj_pdf, + prod_pdf_name=prod_pdf_name, + ws_type=ws_type, + proc=obs_info["mgg"]["proc"], + cat=obs_info["mgg"]["cat"], + ) + prod_pdf_norm = ROOT.RooRealVar( + prod_pdf_name + "_norm", + "2D pdf normalization based on mgg pdf norm", + mgg_ws.obj(obs_info["mgg"]["pdf_name"] + "_norm").getVal(), + 0, + 3 * mgg_ws.obj(obs_info["mgg"]["pdf_name"] + "_norm").getVal(), + ) + prod_pdf_norm.setConstant(True) + + out_ws.imp = getattr(out_ws, "import") + + allVars, allFunctions, allPdfs = {}, {}, {} + for _var in _rooiter(mgg_ws.allVars()): + allVars[_var.GetName()] = _var + for _var in _rooiter(mjj_ws.allVars()): + allVars[_var.GetName()] = _var + for _func in _rooiter(mgg_ws.allFunctions()): + allFunctions[_func.GetName()] = _func + for _func in _rooiter(mjj_ws.allFunctions()): + allFunctions[_func.GetName()] = _func + for _pdf in _rooiter(mgg_ws.allPdfs()): + allPdfs[_pdf.GetName()] = _pdf + for _pdf in _rooiter(mjj_ws.allPdfs()): + allPdfs[_pdf.GetName()] = _pdf + allData_mgg = mgg_ws.allData() + allData_mjj = mjj_ws.allData() + + # Import objects into output workspace + logger.debug(f"({ws_type}) Importing variables...") + for _varName, _var in allVars.items(): + logger.debug(" --> Importing %s", _varName) + _var.setConstant(True) + out_ws.imp(_var, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + logger.debug(f"({ws_type}) Importing functions...") + for _funcName, _func in allFunctions.items(): + logger.debug(" --> Importing %s", _funcName) + out_ws.imp(_func, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + logger.debug(f"({ws_type}) Importing PDFs...") + for _pdfName, _pdf in allPdfs.items(): + logger.debug(" --> Importing %s", _pdfName) + out_ws.imp(_pdf, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + logger.debug(f"({ws_type}) Importing datasets...") + for _data in allData_mgg: + logger.debug(" --> Importing %s", _data.GetName()) + out_ws.imp(_data, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + for _data in allData_mjj: + logger.debug(" --> Importing %s", _data.GetName()) + out_ws.imp(_data, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + out_ws.imp(prod_pdf, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + out_ws.imp(prod_pdf_norm) + + # Output workspace name (full file path) + full_output_name = _build_output_name( + output_name=output_file, + proc=proc, + year=year, + cat=cat, + ) + logger.debug("Full output name: %s", full_output_name) + out_file = ROOT.TFile.Open(full_output_name, "RECREATE") + out_ws.Write() + out_file.Close() + + # Close input files + mgg_file.Close() + mjj_file.Close() + + logger.info("Created 2D workspace: %s", full_output_name) + + # Move the workspace to outdir_2D in Datacard directory + outpath = _move_workspace( + path=full_output_name, + ws_type=ws_type, + ) + logger.info("Moved workspace to: %s", outpath) + + +def create_empty_json(path: str) -> None: + """Create an empty JSON file at the given path + + Args: + path (str): Path to the JSON file + """ + + if os.path.exists(path): + logger.debug("JSON file already exists: %s", path) + return + + with open(path, "w", encoding="utf-8") as f: + json.dump({}, f, indent=4) + logger.info("Created empty JSON file: %s", path) + + +def remove_obs_from_json( + json_file: str, + obs: str, +) -> None: + """Remove the given observable from the JSON file + + Args: + json_file (str): Path to the JSON file + obs (str): Observable to remove (e.g. mgg, mjj) + """ + if not os.path.exists(json_file): + logger.error("JSON file does not exist: %s", json_file) + return + + with open(json_file, "r", encoding="utf-8") as f: + models = json.load(f) + + for cat in models: + models[cat] = {k: v for k, v in models[cat].items() if obs not in k.split("_")[0]} + + with open(json_file, "w", encoding="utf-8") as f: + json.dump(models, f, indent=4) + + +if __name__ == "__main__": + # Only parse arguments when run as a script + import argparse + + def _parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description="Collect models from fit outputs") + parser.add_argument( + "--json", + dest="json", + help="Input json file listing model names and paths.", + ) + parser.add_argument( + "--output", + dest="output", + help="Output file name for the combined model.", + ) + parser.add_argument( + "--mggVarName", + dest="mggVarName", + default="mass", + help="Variable name for mgg workspace (default: mass)", + ) + parser.add_argument( + "--mjjVarName", + dest="mjjVarName", + default="dijet_mass", + help="Variable name for mjj workspace (default: dijet_mass)", + ) + parser.add_argument( + "--wsType", + dest="ws_type", + default=None, + help="Workspace type (signal, bkg-res, bkg-nonres) (default: None)", + ) + parser.add_argument( + "--noClear", + dest="noClear", + default=False, + action="store_true", + help="Do not clear the outdir_2D directory before running", + ) + parser.add_argument( + "--logLevel", + dest="logLevel", + default="INFO", + help="Log level (default: INFO) (DEBUG, INFO, WARNING, ERROR, CRITICAL)", + ) + return parser.parse_args() + + args = _parse_args() + + collect_models( + json_file=args.json, + output_file=args.output, + mgg_var_name=args.mggVarName, + mjj_var_name=args.mjjVarName, + ws_type=args.ws_type, + no_clear=args.noClear, + log_level=args.logLevel, + ) diff --git a/tools/commonObjects.py b/tools/commonObjects.py index 0b5137b9..ecf57048 100644 --- a/tools/commonObjects.py +++ b/tools/commonObjects.py @@ -36,7 +36,9 @@ # Production modes and decay channel: for extract XS from combine productionModes = ['ggH','qqH','ttH','tHq','tHW','ggZH','WH','ZH','bbH'] -decayMode = 'hgg' +# decayMode = 'hgg' # original +# decayModes = ['hgg'] # for 1D fit +decayModes = ['hgg', 'hjj'] # for 2D fit # List of years years_to_process = ['2016','2017','2018','2022preEE','2022postEE'] @@ -46,7 +48,9 @@ inputNuisanceExtMap = {'scales':'','scalesCorr':'','smears':''} # Signal output WS objects outputWSName__ = "wsig" -outputWSObjectTitle__ = "hggpdfsmrel" +outputMggWSObjectTitle__ = "hggpdfsmrel" # previous variable name: outputWSObjectTitle__ +outputMjjWSObjectTitle__ = "hjjpdfsmrel" +output2DWSObjectTitle__ = "hggpdfsmrel2D" outputWSNuisanceTitle__ = "CMS_hgg_nuisance" #outputNuisanceExtMap = {'scales':'%sscale'%sqrts__,'scalesCorr':'%sscaleCorr'%sqrts__,'smears':'%ssmear'%sqrts__,'scalesGlobal':'%sscale'%sqrts__} outputNuisanceExtMap = {'scales':'','scalesCorr':'','smears':'','scalesGlobal':''}