diff --git a/RecoPPS/RPixEfficiencyTools/.gitignore b/RecoPPS/RPixEfficiencyTools/.gitignore index a87ca5ee0b11e..37beb76941b68 100644 --- a/RecoPPS/RPixEfficiencyTools/.gitignore +++ b/RecoPPS/RPixEfficiencyTools/.gitignore @@ -10,4 +10,6 @@ automation/crab_* *crab.log* *.root crab_mobrzut* -.__afs108D \ No newline at end of file +.__afs108D +crab_ctrl_* +crab_* diff --git a/RecoPPS/RPixEfficiencyTools/automation/.gitignore b/RecoPPS/RPixEfficiencyTools/automation/.gitignore index d97ce158aa4f9..77322882f76f7 100644 --- a/RecoPPS/RPixEfficiencyTools/automation/.gitignore +++ b/RecoPPS/RPixEfficiencyTools/automation/.gitignore @@ -2,4 +2,4 @@ crab_* *.log data_periods.txt pyvenv.cfg -*.txt \ No newline at end of file +*.txt diff --git a/RecoPPS/RPixEfficiencyTools/automation/CrabConfigTemplateForFirstModule.py b/RecoPPS/RPixEfficiencyTools/automation/CrabConfigTemplateForFirstModule.py deleted file mode 100644 index fc52edb764729..0000000000000 --- a/RecoPPS/RPixEfficiencyTools/automation/CrabConfigTemplateForFirstModule.py +++ /dev/null @@ -1,25 +0,0 @@ -from CRABClient.UserUtilities import config -config = config() -InputDataset="/Charmonium/Run2018B-12Nov2019_UL2018-v1/AOD" -GeometryFile="Geometry.VeryForwardGeometry.geometryRPFromDD_2018_cfi" - -config.General.transferOutputs = True -config.General.transferLogs = True - -config.JobType.scriptExe = 'wrapper.sh' -config.JobType.pluginName = 'Analysis' -config.JobType.psetName = '/afs/cern.ch/user/l/lkita/new_vers/CMSSW_11_3_2/src/RecoPPS/RPixEfficiencyTools/python/EfficiencyAnalysisDQMWorker_cfg.py' -config.JobType.outputFiles = ["tmp.root"] -config.JobType.pyCfgParams = ["sourceFileList=/afs/cern.ch/user/l/lkita/public/test.dat", "outputFileName=tmp.root"] -config.JobType.priority = 40 - -config.Data.outLFNDirBase = '/store/group/dpg_ctpps/comm_ctpps/pps_workflow' -config.Data.inputDataset = InputDataset -config.Data.publication = False -config.Data.inputDBS = 'global' -config.Data.splitting = 'LumiBased' -config.Data.unitsPerJob = 1000 -config.Data.runRange = '317080' -config.Data.lumiMask = "/afs/cern.ch/user/e/ecalgit/CMSSW_11_3_2/src/RecoPPS/RPixEfficiencyTools/InputFiles/test_mask.json" - -config.Site.storageSite = 'T2_CH_CERN' diff --git a/RecoPPS/RPixEfficiencyTools/automation/CrabConfigTemplateForSecondModule.py b/RecoPPS/RPixEfficiencyTools/automation/CrabConfigTemplateForSecondModule.py deleted file mode 100644 index a34b32c3dc9fe..0000000000000 --- a/RecoPPS/RPixEfficiencyTools/automation/CrabConfigTemplateForSecondModule.py +++ /dev/null @@ -1,26 +0,0 @@ -from CRABClient.UserUtilities import config -config = config() -InputDataset="/Charmonium/Run2018B-12Nov2019_UL2018-v1/AOD" -GeometryFile="Geometry.VeryForwardGeometry.geometryRPFromDD_2018_cfi" - -config.General.transferOutputs = True -config.General.transferLogs = True - -config.JobType.scriptExe = 'wrapper.sh' -config.JobType.pluginName = 'Analysis' -config.JobType.psetName = '/afs/cern.ch/user/l/lkita/CMSSW_11_3_2/src/RecoPPS/RPixEfficiencyTools/python/ReferenceAnalysisDQMWorker_cfg.py' -config.JobType.outputFiles = ["tmp.root"] -config.JobType.inputFiles = ["/eos/user/l/lkita/Charmonium/efficiency_reference.root"] -config.JobType.pyCfgParams = ["sourceFileList=/afs/cern.ch/user/l/lkita/CMSSW_11_3_2/src/RecoPPS/RPixEfficiencyTools/InputFiles/test.dat", "outputFileName=tmp.root", "efficiencyFileName=efficiency_reference.root"] -config.JobType.priority = 40 - -config.Data.outLFNDirBase = '/store/user/lkita' -config.Data.inputDataset = InputDataset -config.Data.publication = False -config.Data.inputDBS = 'global' -config.Data.splitting = 'LumiBased' -config.Data.unitsPerJob = 1000 -config.Data.runRange = '317080' -config.Data.lumiMask = "/afs/cern.ch/user/l/lkita/CMSSW_11_3_2/src/RecoPPS/RPixEfficiencyTools/InputFiles/test_mask.json" - -config.Site.storageSite = 'T3_CH_CERNBOX' \ No newline at end of file diff --git a/RecoPPS/RPixEfficiencyTools/automation/EfficiencyAnalysisEngine.py b/RecoPPS/RPixEfficiencyTools/automation/EfficiencyAnalysisEngine.py index 04a4254d9d05a..bfec95873461e 100755 --- a/RecoPPS/RPixEfficiencyTools/automation/EfficiencyAnalysisEngine.py +++ b/RecoPPS/RPixEfficiencyTools/automation/EfficiencyAnalysisEngine.py @@ -113,8 +113,8 @@ def set_status_after_first_worker_submission(task_status, operation_result): workflow= dataPeriod= """ - -storage_path = "/eos/user/l/lkita" + +storage_path = "/eos/user/m/mobrzut" def aggregate_files(path: str) -> str: if path[-1] != '/': @@ -138,6 +138,8 @@ def submit_task_to_condor(campaign, workflow, data_period): input_files_path = dir_name[0] executable = executable.replace("", aggregate_files(input_files_path) ) + # why this is hardcoded?? + output_dir = "/afs/cern.ch/user/e/ecalgit/CMSSW_11_3_2/src/RecoPPS/RPixEfficiencyTools/OutputFiles/"+"/".join([campaign, workflow, data_period]) executable = executable.replace("", output_dir) executable = executable.replace("", campaign) diff --git a/RecoPPS/RPixEfficiencyTools/automation/README.md b/RecoPPS/RPixEfficiencyTools/automation/README.md new file mode 100644 index 0000000000000..55fbbc888ec84 --- /dev/null +++ b/RecoPPS/RPixEfficiencyTools/automation/README.md @@ -0,0 +1,31 @@ +# Automation module +Automation module contains code which is executed by Jenkins. +This module uses `automation_module` ([gitlab here]()) for submitting jobs to CRAB, HTCondor, checking the database status etc. + +## Setup +1. Export the CRAB authentication key to the file with `--out` option. +``` +ex. voms-proxy-init -voms cms -rfc -valid 192:00 --out ~/public/jenkins_proxy/jenkins.pem +``` + +## Structure +This folder contains: +- **CrabConfigs** for submitting the EA (Efficiency Analysis) and RA (Reference Analysis) Workers to the CRAB. +- **Engine** The engine for . You can modify this engine to create any workflow with CRAB and HTCondor tasks. More about the engine can be found in [documentation](https://indico.cern.ch/event/1075717/contributions/4523828/attachments/2312956/3951051/documentation.pdf) + +## Description of temporary setup: +- **TempSteps** contains python scripts for manual testing of single steps from EfficiencyAnalysisEngine + +**Running test setup** +``` +cmsenv +source TempSetup.sh +source /cvmfs/cms.cern.ch/common/crab-setup.sh +``` +##TODO/Noticed problems +- [ ] Template parameter is named badly. It should be renamed to crab_config_file_path. (Background: running submit_task_to_crab method shows error in CrabWrapper no template - which is confusing. ) + + + + + diff --git a/RecoPPS/RPixEfficiencyTools/automation/TempCrabConfigs/TempCrabConfigEAWorker.py b/RecoPPS/RPixEfficiencyTools/automation/TempCrabConfigs/TempCrabConfigEAWorker.py new file mode 100644 index 0000000000000..a114b79e1fde5 --- /dev/null +++ b/RecoPPS/RPixEfficiencyTools/automation/TempCrabConfigs/TempCrabConfigEAWorker.py @@ -0,0 +1,32 @@ +import CRABClient +from CRABClient.UserUtilities import config +config = config() + +InputDataset ="/EGamma/Run2018B-12Nov2019_UL2018-v2/AOD" +number = 26 + +config.General.transferOutputs = True +config.General.transferLogs = True + +config.General.requestName = f'mobrzut_test_EA_DQM_Worker_{number}' +config.General.workArea = '/afs/cern.ch/user/m/mobrzut/automation/environment_based_on_jenkins_script/pps_workflow_for_cmssw12/mobrzut_test_1/CMSSW_12_4_0/src/RecoPPS/RPixEfficiencyTools' # TODO: your path here + + + +config.JobType.pluginName = 'Analysis' +config.JobType.psetName = '/afs/cern.ch/user/m/mobrzut/automation/environment_based_on_jenkins_script/pps_workflow_for_cmssw12/mobrzut_test_1/CMSSW_12_4_0/src/RecoPPS/RPixEfficiencyTools/python/EfficiencyAnalysisDQMWorker_cfg.py' # TODO: your path here +config.JobType.pyCfgParams = ["sourceFileList=/afs/cern.ch/user/m/mobrzut/public/Era.dat", "outputFileName=tmp.root"] +config.Data.inputDataset = InputDataset + +config.Data.inputDBS = 'global' +config.Data.splitting = 'LumiBased' +# config.Data.splitting = 'Automatic' + +config.Data.unitsPerJob = 20 +config.Data.publication = False +config.Data.outLFNDirBase = '/store/group/dpg_ctpps/comm_ctpps/2018_PixelEfficiency' +config.Data.outputDatasetTag = f'CRAB3_tmobrzut_test_EA_DQM_Worker_{number}' +config.Data.runRange = '317080' + + +config.Site.storageSite = 'T2_CH_CERN' diff --git a/RecoPPS/RPixEfficiencyTools/automation/TempCrabEAWorkerEngine.py b/RecoPPS/RPixEfficiencyTools/automation/TempCrabEAWorkerEngine.py new file mode 100644 index 0000000000000..ae944d823d7a1 --- /dev/null +++ b/RecoPPS/RPixEfficiencyTools/automation/TempCrabEAWorkerEngine.py @@ -0,0 +1,111 @@ +from weakref import WeakKeyDictionary +import automation_control as ctrl +import argparse +import enum +import logging +from typing import Any, Type, Union +from os import listdir, walk, environ +from os.path import isfile, join + +logger = logging.getLogger("EfficiencyAnalysisLogger") +logger.setLevel(logging.DEBUG) + +ch = logging.FileHandler("EfficiencyAnalysisEngine.log") +ch.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +ch.setFormatter(formatter) +logger.addHandler(ch) + +ch = logging.StreamHandler() +ch.setLevel(logging.INFO) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +ch.setFormatter(formatter) +logger.addHandler(ch) + +campaign=environ.get("CAMPAIGN") +workflow=environ.get("WORKFLOW") +dataset=environ.get("DATASET") +proxy=environ.get("PROXY") + +template_for_first_module = "CrabConfigTemplateForFirstModule.py" + +@ctrl.define_status_enum +class TaskStatusEnum(enum.Enum): + """ + Class to encode enum tasks statuses for the purpouse of this automation workflow + """ + initialized = enum.auto(), + duringFirstWorker = enum.auto(), + waitingForFirstWorkerTransfer= enum.auto() + done = enum.auto() + + +@ctrl.decorate_with_enum(TaskStatusEnum) +class TaskStatus: + loop_id = 0.0 + condor_job_id = 0 + +def get_tasks_numbers_list(tasks_list_path): + with open(tasks_list_path) as tasks_list_path: + tasks_list_data = tasks_list_path.read() + tasks_list_data = tasks_list_data.replace(" ", "") + tasks_list = tasks_list_data.split(",") + return tasks_list + + +def prepare_parser()->argparse.ArgumentParser: + parser = argparse.ArgumentParser(description= + """This is a script to run PPS Efficiency Analysis automation workflow""", formatter_class=argparse.RawTextHelpFormatter) + + parser.add_argument('-t', '--tasks_list', dest='tasks_list_path', help='path to file containing list of data periods', required=True) + return parser + + +def get_runs_range(data_period): + """MOCKED""" + return '317080' + + +def process_new_tasks(tasks_list_path, task_controller): + tasks_list = get_tasks_numbers_list(tasks_list_path) + tasks_list = set(tasks_list) + tasks_in_database = task_controller.getAllTasks().get_points() + tasks_in_database = set(map(lambda x: x['dataPeriod'], tasks_in_database)) + tasks_not_submited_yet = tasks_list-tasks_in_database + if tasks_not_submited_yet: + task_controller.submitTasks(tasks_not_submited_yet) + + +def submit_task_to_crab(campaign, workflow, data_period, dataset, template, proxy): + result = ctrl.submit_task_to_crab(campaign, workflow, data_period, get_runs_range(data_period), template, dataset, proxy) + + return result + + +def set_status_after_first_worker_submission(task_status, operation_result): + task_status.duringFirstWorker=1 + task_status.initialized=0 + task_status.loop_id+=1 + return task_status + + +storage_path = "/eos/user/m/mobrzut" + +TRANSITIONS_DICT = { + 'initialized': (submit_task_to_crab, 0, set_status_after_first_worker_submission, [dataset, template_for_first_module, proxy] ), + 'duringFirstWorker': (ctrl.check_if_crab_task_is_finished, True, TaskStatus.waitingForFirstWorkerTransfer, [proxy]), + 'waitingForFirstWorkerTransfer': (ctrl.is_crab_output_already_transfered, True, TaskStatus.done, [proxy]) + } + + + +if __name__ == '__main__': + parser = prepare_parser() + opts = parser.parse_args() + task_controller = ctrl.TaskCtrl.TaskControl(campaign=campaign, workflow=workflow, TaskStatusClass=TaskStatus) + process_new_tasks(opts.tasks_list_path, task_controller) + finite_state_machine = ctrl.FiniteStateMachine(TRANSITIONS_DICT) + finite_state_machine.process_tasks(task_controller, TaskStatusClass=TaskStatus) + + + \ No newline at end of file diff --git a/RecoPPS/RPixEfficiencyTools/automation/TempSetup.sh b/RecoPPS/RPixEfficiencyTools/automation/TempSetup.sh new file mode 100755 index 0000000000000..985cbefcd4eb0 --- /dev/null +++ b/RecoPPS/RPixEfficiencyTools/automation/TempSetup.sh @@ -0,0 +1,8 @@ +# This scirpt is for a setup of environment variables. +# The full setup is done by Jenkins script +# export WORKFLOW=pps_workflow_for_cmssw12 +# export CAMPAIGN=mobrzut_test_1 +# export DATASET=/EGamma/Run2018B-12Nov2019_UL2018-v2/AOD +# export PROXY=/afs/cern.ch/user/m/mobrzut/public/jenkins_proxy/jenkins.pem # should it be .x509 or .pem file +export PYTHONPATH=\$PYTHONPATH:`pwd`/automation_control +source /cvmfs/cms.cern.ch/common/crab-setup.sh #todo why I still need to run this manually? diff --git a/RecoPPS/RPixEfficiencyTools/automation/TempSteps/TempCrabIsFinishedStep.py b/RecoPPS/RPixEfficiencyTools/automation/TempSteps/TempCrabIsFinishedStep.py new file mode 100644 index 0000000000000..503077cc2be10 --- /dev/null +++ b/RecoPPS/RPixEfficiencyTools/automation/TempSteps/TempCrabIsFinishedStep.py @@ -0,0 +1,23 @@ +#################################################### + +# Boiler plate code for importing automation_control form parent directory :SCREAM: +import sys +import os +current = os.path.dirname(os.path.realpath(__file__)) +parent = os.path.dirname(current) +sys.path.append(parent) + +###################################################### + +from urllib3 import proxy_from_url +import automation_control as ctrl + +number = 37 + +campaign = f'ctrl_camp{number}' +workflow = f'crtl_workflow{number}' +data_period = 'foo_36' # todo use correct dataperiod number +proxy = '/afs/cern.ch/user/m/mobrzut/public/jenkins_proxy/jenkins.pem' + + +ctrl.check_if_crab_task_is_finished(campaign, workflow, data_period, proxy) \ No newline at end of file diff --git a/RecoPPS/RPixEfficiencyTools/automation/TempSteps/TempCrabSubmissionStep.py b/RecoPPS/RPixEfficiencyTools/automation/TempSteps/TempCrabSubmissionStep.py new file mode 100644 index 0000000000000..6e31e50684480 --- /dev/null +++ b/RecoPPS/RPixEfficiencyTools/automation/TempSteps/TempCrabSubmissionStep.py @@ -0,0 +1,30 @@ +#################################################### + +# Boiler plate code for importing automation_control form parent directory :SCREAM: +import sys +import os +current = os.path.dirname(os.path.realpath(__file__)) +parent = os.path.dirname(current) +sys.path.append(parent) + +###################################################### + +from urllib3 import proxy_from_url +import automation_control as ctrl + +number =41 +campaign = f'pps_workflow_for_cmssw12' +workflow = f'mobrzut_test_1' +data_period = '317081' # todo use correct dataperiod number +# data_period = 'foo1000' # todo use correct dataperiod number + +template = '/afs/cern.ch/user/m/mobrzut/automation/environment_based_on_jenkins_script/pps_workflow_for_cmssw12/mobrzut_test_1/CMSSW_12_4_0/src/RecoPPS/RPixEfficiencyTools/automation/TempCrabConfigs/TempCrabConfigEAWorker.py' +dataset = '/EGamma/Run2018B-12Nov2019_UL2018-v2/AOD' +proxy = '/afs/cern.ch/user/m/mobrzut/public/jenkins_proxy/jenkins.pem' + +def get_runs_range(data_period): + """MOCKED""" + return '317081' + + +ctrl.submit_task_to_crab(campaign, workflow, data_period, get_runs_range(data_period), template, dataset, proxy) \ No newline at end of file