Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion RecoPPS/RPixEfficiencyTools/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ automation/crab_*
*crab.log*
*.root
crab_mobrzut*
.__afs108D
.__afs108D
crab_ctrl_*
crab_*
2 changes: 1 addition & 1 deletion RecoPPS/RPixEfficiencyTools/automation/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ crab_*
*.log
data_periods.txt
pyvenv.cfg
*.txt
*.txt

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ def set_status_after_first_worker_submission(task_status, operation_result):
workflow=<workflow>
dataPeriod=<data_period>
"""

storage_path = "/eos/user/l/lkita"
storage_path = "/eos/user/m/mobrzut"

def aggregate_files(path: str) -> str:
if path[-1] != '/':
Expand All @@ -138,6 +138,8 @@ def submit_task_to_condor(campaign, workflow, data_period):
input_files_path = dir_name[0]

executable = executable.replace("<input_files>", aggregate_files(input_files_path) )
# why this is hardcoded??

output_dir = "/afs/cern.ch/user/e/ecalgit/CMSSW_11_3_2/src/RecoPPS/RPixEfficiencyTools/OutputFiles/"+"/".join([campaign, workflow, data_period])
executable = executable.replace("<output_dir>", output_dir)
executable = executable.replace("<campaign>", campaign)
Expand Down
31 changes: 31 additions & 0 deletions RecoPPS/RPixEfficiencyTools/automation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Automation module
Automation module contains code which is executed by Jenkins.
This module uses `automation_module` ([gitlab here]()) for submitting jobs to CRAB, HTCondor, checking the database status etc.

## Setup
1. Export the CRAB authentication key to the file with `--out` option.
```
ex. voms-proxy-init -voms cms -rfc -valid 192:00 --out ~/public/jenkins_proxy/jenkins.pem
```

## Structure
This folder contains:
- **CrabConfigs** for submitting the EA (Efficiency Analysis) and RA (Reference Analysis) Workers to the CRAB.
- **Engine** The engine for . You can modify this engine to create any workflow with CRAB and HTCondor tasks. More about the engine can be found in [documentation](https://indico.cern.ch/event/1075717/contributions/4523828/attachments/2312956/3951051/documentation.pdf)

## Description of temporary setup:
- **TempSteps** contains python scripts for manual testing of single steps from EfficiencyAnalysisEngine

**Running test setup**
```
cmsenv
source TempSetup.sh
source /cvmfs/cms.cern.ch/common/crab-setup.sh
```
##TODO/Noticed problems
- [ ] Template parameter is named badly. It should be renamed to crab_config_file_path. (Background: running submit_task_to_crab method shows error in CrabWrapper no template - which is confusing. )





Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import CRABClient
from CRABClient.UserUtilities import config
config = config()

InputDataset ="/EGamma/Run2018B-12Nov2019_UL2018-v2/AOD"
number = 26

config.General.transferOutputs = True
config.General.transferLogs = True

config.General.requestName = f'mobrzut_test_EA_DQM_Worker_{number}'
config.General.workArea = '/afs/cern.ch/user/m/mobrzut/automation/environment_based_on_jenkins_script/pps_workflow_for_cmssw12/mobrzut_test_1/CMSSW_12_4_0/src/RecoPPS/RPixEfficiencyTools' # TODO: your path here



config.JobType.pluginName = 'Analysis'
config.JobType.psetName = '/afs/cern.ch/user/m/mobrzut/automation/environment_based_on_jenkins_script/pps_workflow_for_cmssw12/mobrzut_test_1/CMSSW_12_4_0/src/RecoPPS/RPixEfficiencyTools/python/EfficiencyAnalysisDQMWorker_cfg.py' # TODO: your path here
config.JobType.pyCfgParams = ["sourceFileList=/afs/cern.ch/user/m/mobrzut/public/Era.dat", "outputFileName=tmp.root"]
config.Data.inputDataset = InputDataset

config.Data.inputDBS = 'global'
config.Data.splitting = 'LumiBased'
# config.Data.splitting = 'Automatic'

config.Data.unitsPerJob = 20
config.Data.publication = False
config.Data.outLFNDirBase = '/store/group/dpg_ctpps/comm_ctpps/2018_PixelEfficiency'
config.Data.outputDatasetTag = f'CRAB3_tmobrzut_test_EA_DQM_Worker_{number}'
config.Data.runRange = '317080'


config.Site.storageSite = 'T2_CH_CERN'
111 changes: 111 additions & 0 deletions RecoPPS/RPixEfficiencyTools/automation/TempCrabEAWorkerEngine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from weakref import WeakKeyDictionary
import automation_control as ctrl
import argparse
import enum
import logging
from typing import Any, Type, Union
from os import listdir, walk, environ
from os.path import isfile, join

logger = logging.getLogger("EfficiencyAnalysisLogger")
logger.setLevel(logging.DEBUG)

ch = logging.FileHandler("EfficiencyAnalysisEngine.log")
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

campaign=environ.get("CAMPAIGN")
workflow=environ.get("WORKFLOW")
dataset=environ.get("DATASET")
proxy=environ.get("PROXY")

template_for_first_module = "CrabConfigTemplateForFirstModule.py"

@ctrl.define_status_enum
class TaskStatusEnum(enum.Enum):
"""
Class to encode enum tasks statuses for the purpouse of this automation workflow
"""
initialized = enum.auto(),
duringFirstWorker = enum.auto(),
waitingForFirstWorkerTransfer= enum.auto()
done = enum.auto()


@ctrl.decorate_with_enum(TaskStatusEnum)
class TaskStatus:
loop_id = 0.0
condor_job_id = 0

def get_tasks_numbers_list(tasks_list_path):
with open(tasks_list_path) as tasks_list_path:
tasks_list_data = tasks_list_path.read()
tasks_list_data = tasks_list_data.replace(" ", "")
tasks_list = tasks_list_data.split(",")
return tasks_list


def prepare_parser()->argparse.ArgumentParser:
parser = argparse.ArgumentParser(description=
"""This is a script to run PPS Efficiency Analysis automation workflow""", formatter_class=argparse.RawTextHelpFormatter)

parser.add_argument('-t', '--tasks_list', dest='tasks_list_path', help='path to file containing list of data periods', required=True)
return parser


def get_runs_range(data_period):
"""MOCKED"""
return '317080'


def process_new_tasks(tasks_list_path, task_controller):
tasks_list = get_tasks_numbers_list(tasks_list_path)
tasks_list = set(tasks_list)
tasks_in_database = task_controller.getAllTasks().get_points()
tasks_in_database = set(map(lambda x: x['dataPeriod'], tasks_in_database))
tasks_not_submited_yet = tasks_list-tasks_in_database
if tasks_not_submited_yet:
task_controller.submitTasks(tasks_not_submited_yet)


def submit_task_to_crab(campaign, workflow, data_period, dataset, template, proxy):
result = ctrl.submit_task_to_crab(campaign, workflow, data_period, get_runs_range(data_period), template, dataset, proxy)

return result


def set_status_after_first_worker_submission(task_status, operation_result):
task_status.duringFirstWorker=1
task_status.initialized=0
task_status.loop_id+=1
return task_status


storage_path = "/eos/user/m/mobrzut"

TRANSITIONS_DICT = {
'initialized': (submit_task_to_crab, 0, set_status_after_first_worker_submission, [dataset, template_for_first_module, proxy] ),
'duringFirstWorker': (ctrl.check_if_crab_task_is_finished, True, TaskStatus.waitingForFirstWorkerTransfer, [proxy]),
'waitingForFirstWorkerTransfer': (ctrl.is_crab_output_already_transfered, True, TaskStatus.done, [proxy])
}



if __name__ == '__main__':
parser = prepare_parser()
opts = parser.parse_args()
task_controller = ctrl.TaskCtrl.TaskControl(campaign=campaign, workflow=workflow, TaskStatusClass=TaskStatus)
process_new_tasks(opts.tasks_list_path, task_controller)
finite_state_machine = ctrl.FiniteStateMachine(TRANSITIONS_DICT)
finite_state_machine.process_tasks(task_controller, TaskStatusClass=TaskStatus)



8 changes: 8 additions & 0 deletions RecoPPS/RPixEfficiencyTools/automation/TempSetup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This scirpt is for a setup of environment variables.
# The full setup is done by Jenkins script
# export WORKFLOW=pps_workflow_for_cmssw12
# export CAMPAIGN=mobrzut_test_1
# export DATASET=/EGamma/Run2018B-12Nov2019_UL2018-v2/AOD
# export PROXY=/afs/cern.ch/user/m/mobrzut/public/jenkins_proxy/jenkins.pem # should it be .x509 or .pem file
export PYTHONPATH=\$PYTHONPATH:`pwd`/automation_control
source /cvmfs/cms.cern.ch/common/crab-setup.sh #todo why I still need to run this manually?
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
####################################################

# Boiler plate code for importing automation_control form parent directory :SCREAM:
import sys
import os
current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)

######################################################

from urllib3 import proxy_from_url
import automation_control as ctrl

number = 37

campaign = f'ctrl_camp{number}'
workflow = f'crtl_workflow{number}'
data_period = 'foo_36' # todo use correct dataperiod number
proxy = '/afs/cern.ch/user/m/mobrzut/public/jenkins_proxy/jenkins.pem'


ctrl.check_if_crab_task_is_finished(campaign, workflow, data_period, proxy)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
####################################################

# Boiler plate code for importing automation_control form parent directory :SCREAM:
import sys
import os
current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)

######################################################

from urllib3 import proxy_from_url
import automation_control as ctrl

number =41
campaign = f'pps_workflow_for_cmssw12'
workflow = f'mobrzut_test_1'
data_period = '317081' # todo use correct dataperiod number
# data_period = 'foo1000' # todo use correct dataperiod number

template = '/afs/cern.ch/user/m/mobrzut/automation/environment_based_on_jenkins_script/pps_workflow_for_cmssw12/mobrzut_test_1/CMSSW_12_4_0/src/RecoPPS/RPixEfficiencyTools/automation/TempCrabConfigs/TempCrabConfigEAWorker.py'
dataset = '/EGamma/Run2018B-12Nov2019_UL2018-v2/AOD'
proxy = '/afs/cern.ch/user/m/mobrzut/public/jenkins_proxy/jenkins.pem'

def get_runs_range(data_period):
"""MOCKED"""
return '317081'


ctrl.submit_task_to_crab(campaign, workflow, data_period, get_runs_range(data_period), template, dataset, proxy)