cms-sw · cmsbuild · Sep 21, 2021 · Sep 14, 2021 · Sep 14, 2021 · Sep 14, 2021
diff --git a/Configuration/PyReleaseValidation/python/MatrixInjector.py b/Configuration/PyReleaseValidation/python/MatrixInjector.py
@@ -65,6 +65,25 @@ def __init__(self,opt,mode='init',options=''):
         if(opt.batchName):
             self.batchName = '__'+opt.batchName+'-'+self.batchTime
 
+        ####################################
+        # Checking and setting up GPU attributes
+        ####################################
+        # Mendatory
+        self.RequiresGPU = opt.RequiresGPU
+        if self.RequiresGPU not in ('forbidden','optional','required'):
+            print('RequiresGPU must be forbidden, optional, required. Now, set to forbidden.')
+            self.RequiresGPU = 'forbidden'
+        if self.RequiresGPU == 'optional':
+            print('Optional GPU is turned off for RelVals. Now, changing it to forbidden')
+            self.RequiresGPU = 'forbidden'
-        if self.RequiresGPU == 'optional':
-            print('Optional GPU is turned off for RelVals. Now, changing it to forbidden')
-            self.RequiresGPU = 'forbidden'
-        if self.RequiresGPU == 'optional':
-            print('Optional GPU is turned off for RelVals. Now, changing it to forbidden')
-            self.RequiresGPU = 'forbidden'
+        self.GPUMemoryMB = opt.GPUMemoryMB
+        self.CUDACapabilities = opt.CUDACapabilities.split(',')
+        self.CUDARuntime = opt.CUDARuntime
+        # optional
+        self.GPUName = opt.GPUName
+        self.CUDADriverVersion = opt.CUDADriverVersion
+        self.CUDARuntimeVersion = opt.CUDARuntimeVersion
+
         # WMagent url
         if not self.wmagent:
             # Overwrite with env variable
@@ -180,8 +199,24 @@ def __init__(self,opt,mode='init',options=''):
             "nowmIO": {},
             "Multicore" : opt.nThreads,                       # this is the per-taskchain Multicore; it's the default assigned to a task if it has no value specified 
             "EventStreams": self.numberOfStreams,
-            "KeepOutput" : False
+            "KeepOutput" : False,
+            "RequiresGPU" : None,
+            "GPUParams": None
+            }
+        self.defaultGPUParams={
+            "GPUMemoryMB": self.GPUMemoryMB,
+            "CUDACapabilities": self.CUDACapabilities,
+            "CUDARuntime": self.CUDARuntime
             }
+
+        self.dictGPUName={"GPUName": self.GPUName}
+        if self.GPUName: self.defaultGPUParams.update(self.dictGPUName)
+
+        self.dictCUDADriverVersion={"CUDADriverVersion": self.CUDADriverVersion}
+        if self.CUDADriverVersion: self.defaultGPUParams.update(self.dictCUDADriverVersion)
+
+        self.dictCUDARuntimeVersion={"CUDARuntimeVersion": self.CUDARuntimeVersion}
+        if self.CUDARuntimeVersion: elf.defaultGPUParams.update(self.dictCUDARuntimeVersion)
 
         self.chainDicts={}
 
@@ -408,6 +443,9 @@ def prepare(self, mReader, directories, mode='init'):
                                     if setPrimaryDs:
                                         chainDict['nowmTasklist'][-1]['PrimaryDataset']=setPrimaryDs
                                 nextHasDSInput=None
+                                if 'GPU' in step and self.RequiresGPU == 'required':
+                                    chainDict['nowmTasklist'][-1]['RequiresGPU'] = self.RequiresGPU
+                                    chainDict['nowmTasklist'][-1]['GPUParams']=json.dumps(self.defaultGPUParams)
-                                if 'GPU' in step and self.RequiresGPU == 'required':
-                                    chainDict['nowmTasklist'][-1]['RequiresGPU'] = self.RequiresGPU
-                                    chainDict['nowmTasklist'][-1]['GPUParams']=json.dumps(self.defaultGPUParams)
+                                if self.RequiresGPU != 'forbidden':
+                                    chainDict['nowmTasklist'][-1]['RequiresGPU'] = self.RequiresGPU
+                                    chainDict['nowmTasklist'][-1]['GPUParams']=json.dumps(self.defaultGPUParams)
-                                if 'GPU' in step and self.RequiresGPU == 'required':
-                                    chainDict['nowmTasklist'][-1]['RequiresGPU'] = self.RequiresGPU
-                                    chainDict['nowmTasklist'][-1]['GPUParams']=json.dumps(self.defaultGPUParams)
+                                if self.RequiresGPU != 'forbidden':
+                                    chainDict['nowmTasklist'][-1]['RequiresGPU'] = self.RequiresGPU
+                                    chainDict['nowmTasklist'][-1]['GPUParams']=json.dumps(self.defaultGPUParams)
                             else:
                                 #not first step and no inputDS
                                 chainDict['nowmTasklist'].append(copy.deepcopy(self.defaultTask))
@@ -420,6 +458,9 @@ def prepare(self, mReader, directories, mode='init'):
                                     chainDict['nowmTasklist'][-1]['LumisPerJob']=splitForThisWf
                                 if step in wmsplit:
                                     chainDict['nowmTasklist'][-1]['LumisPerJob']=wmsplit[step]
+                                if 'GPU' in step and self.RequiresGPU == 'required':
+                                    chainDict['nowmTasklist'][-1]['RequiresGPU'] = self.RequiresGPU
+                                    chainDict['nowmTasklist'][-1]['GPUParams']=json.dumps(self.defaultGPUParams)
 
                             # change LumisPerJob for Hadronizer steps. 
                             if 'Hadronizer' in step: 

diff --git a/Configuration/PyReleaseValidation/scripts/runTheMatrix.py b/Configuration/PyReleaseValidation/scripts/runTheMatrix.py
@@ -120,26 +120,31 @@ def runSelected(opt):
                       dest='memoryOffset',
                       default=3000
                      )
+
     parser.add_option('--addMemPerCore',
                       help='increase of memory per each n > 1 core:  memory(n_core) = memoryOffset + (n_core-1) * memPerCore',
                       dest='memPerCore',
                       default=1500
                      )
+
     parser.add_option('-j','--nproc',
                       help='number of processes. 0 Will use 4 processes, not execute anything but create the wfs',
                       dest='nProcs',
                       default=4
                      )
+
     parser.add_option('-t','--nThreads',
                       help='number of threads per process to use in cmsRun.',
                       dest='nThreads',
                       default=1
                      )
+
     parser.add_option('--nStreams',
                       help='number of streams to use in cmsRun.',
                       dest='nStreams',
                       default=0
                      )
+
     parser.add_option('--numberEventsInLuminosityBlock',
                       help='number of events in a luminosity block',
                       dest='numberEventsInLuminosityBlock',
@@ -152,119 +157,142 @@ def runSelected(opt):
                       default=False,
                       action='store_true'
                       )
+
     parser.add_option('-e','--extended',
                       help='Show details of workflows, used with --show',
                       dest='extended',
                       default=False,
                       action='store_true'
                       )
+
     parser.add_option('-s','--selected',
                       help='Run a pre-defined selected matrix of wf. Deprecated, please use -l limited',
                       dest='restricted',
                       default=False,
                       action='store_true'
                       )
+
     parser.add_option('-l','--list',
-                     help='Coma separated list of workflow to be shown or ran. Possible keys are also '+str(predefinedSet.keys())+'. and wild card like muon, or mc',
+                     help='Comma separated list of workflow to be shown or ran. Possible keys are also '+str(predefinedSet.keys())+'. and wild card like muon, or mc',
                      dest='testList',
                      default=None
                      )
+
     parser.add_option('-r','--raw',
                       help='Temporary dump the .txt needed for prodAgent interface. To be discontinued soon. Argument must be the name of the set (standard, pileup,...)',
                       dest='raw'
                       )
+
     parser.add_option('-i','--useInput',
-                      help='Use recyling where available. Either all, or a coma separated list of wf number.',
+                      help='Use recyling where available. Either all, or a comma separated list of wf number.',
                       dest='useInput',
                       default=None
                       )
+
     parser.add_option('-w','--what',
                       help='Specify the set to be used. Argument must be the name of a set (standard, pileup,...) or multiple sets separated by commas (--what standard,pileup )',
                       dest='what',
                       default='all'
                       )
+
     parser.add_option('--step1',
                       help='Used with --raw. Limit the production to step1',
                       dest='step1Only',
                       default=False
                       )
+
     parser.add_option('--maxSteps',
                       help='Only run maximum on maxSteps. Used when we are only interested in first n steps.',
                       dest='maxSteps',
                       default=9999,
                       type="int"
                       )
+
     parser.add_option('--fromScratch',
-                      help='Coma separated list of wf to be run without recycling. all is not supported as default.',
+                      help='Comma separated list of wf to be run without recycling. all is not supported as default.',
                       dest='fromScratch',
                       default=None
                        )
+
     parser.add_option('--refRelease',
                       help='Allow to modify the recycling dataset version',
                       dest='refRel',
                       default=None
                       )
+
     parser.add_option('--wmcontrol',
                       help='Create the workflows for injection to WMAgent. In the WORKING. -wmcontrol init will create the the workflows, -wmcontrol test will dryRun a test, -wmcontrol submit will submit to wmagent',
                       choices=['init','test','submit','force'],
                       dest='wmcontrol',
                       default=None,
                       )
+
     parser.add_option('--revertDqmio',
                       help='When submitting workflows to wmcontrol, force DQM outout to use pool and not DQMIO',
                       choices=['yes','no'],
                       dest='revertDqmio',
                       default='no',
                       )
+
     parser.add_option('--optionswm',
                       help='Specify a few things for wm injection',
                       default='',
                       dest='wmoptions')
+
     parser.add_option('--keep',
-                      help='allow to specify for which coma separated steps the output is needed',
+                      help='allow to specify for which comma separated steps the output is needed',
                       default=None)
+
     parser.add_option('--label',
                       help='allow to give a special label to the output dataset name',
                       default='')
+
     parser.add_option('--command',
                       help='provide a way to add additional command to all of the cmsDriver commands in the matrix',
                       dest='command',
                       action='append',
                       default=None
                       )
+
     parser.add_option('--apply',
-                      help='allow to use the --command only for 1 coma separeated',
+                      help='allow to use the --command only for 1 comma separeated',
                       dest='apply',
                       default=None)
+
     parser.add_option('--workflow',
                       help='define a workflow to be created or altered from the matrix',
                       action='append',
                       dest='workflow',
                       default=None
                       )
+
     parser.add_option('--dryRun',
                       help='do not run the wf at all',
                       action='store_true',
                       dest='dryRun',
                       default=False
                       )
+
     parser.add_option('--testbed',
                       help='workflow injection to cmswebtest (you need dedicated rqmgr account)',
                       dest='testbed',
                       default=False,
                       action='store_true'
                       )
+
     parser.add_option('--noCafVeto',
                       help='Run from any source, ignoring the CAF label',
                       dest='cafVeto',
                       default=True,
                       action='store_false'
                       )
+
     parser.add_option('--overWrite',
                       help='Change the content of a step for another. List of pairs.',
                       dest='overWrite',
                       default=None
                       )
+
     parser.add_option('--noRun',
                       help='Remove all run list selection from wfs',
                       dest='noRun',
@@ -294,6 +322,7 @@ def runSelected(opt):
                       dest='dasSites',
                       default='T2_CH_CERN',
                       action='store')
+
     parser.add_option('--interactive',
                       help="Open the Matrix interactive shell",
                       action='store_true',
@@ -305,6 +334,47 @@ def runSelected(opt):
                       default=None,
                       action='store')
 
+    parser.add_option('--gpu',
+                      help='Use GPU workflow setup if available',
+                      dest='gpuEnable',
+                      default=False,
+                      action='store_true')
+
+    parser.add_option('--RequiresGPU',
+                      help='if GPU is required or not: forbidden (default, CPU-only), optional, required. For relvals, the GPU option will be turned off for optional.',
+                      dest='RequiresGPU',
+                      default='forbidden')
+
+    parser.add_option('--GPUMemoryMB',
+                      help='to specify GPU memory. Default = 8000 MB (for RequiresGPU = required).',
+                      dest='GPUMemoryMB',
+                      default=8000)
+
+    parser.add_option('--CUDACapabilities',
+                      help='to specify CUDA capabilities. Default = 6.0,6.1,6.2,7.0,7.2,7.5 (for RequiresGPU = required). Use comma to identify various CUDACapabilities',
+                      dest='CUDACapabilities',
+                      default='6.0,6.1,6.2,7.0,7.2,7.5')
+
+    parser.add_option('--CUDARuntime',
+                      help='to specify major and minor CUDA runtime used to build the application. Default = 11.2 (for RequiresGPU = required). FIX ME TO MATCH WITH CMSSW.',
+                      dest='CUDARuntime',
+                      default='11.2')
+
+    parser.add_option('--GPUName',
+                      help='to specify GPU class. This is an optional parameter.',
+                      dest='GPUName',
+                      default='')
+
+    parser.add_option('--CUDADriverVersion',
+                      help='to specify CUDA driver version. This is an optional parameter.',
+                      dest='CUDADriverVersion',
+                      default='')
+
+    parser.add_option('--CUDARuntimeVersion',
+                      help='to specify CUDA runtime version. This is an optional parameter.',
+                      dest='CUDARuntimeVersion',
+                      default='')
+
     opt,args = parser.parse_args()
     if opt.command: opt.command = ' '.join(opt.command)
     os.environ["CMSSW_DAS_QUERY_SITES"]=opt.dasSites
@@ -346,8 +416,6 @@ def stepOrIndex(s):
     if opt.keep:
         opt.keep=map(stepOrIndex,opt.keep.split(','))
 
-
-
     if opt.testList:
         testList=[]
         for entry in opt.testList.split(','):
@@ -372,9 +440,11 @@ def stepOrIndex(s):
     if opt.nProcs: opt.nProcs=int(opt.nProcs)
     if opt.nThreads: opt.nThreads=int(opt.nThreads)
     if opt.nStreams: opt.nStreams=int(opt.nStreams)
-    if (opt.numberEventsInLuminosityBlock): opt.numberEventsInLuminosityBlock=int(opt.numberEventsInLuminosityBlock)
-    if (opt.memoryOffset): opt.memoryOffset=int(opt.memoryOffset)
-    if (opt.memPerCore): opt.memPerCore=int(opt.memPerCore)
+    if opt.numberEventsInLuminosityBlock: opt.numberEventsInLuminosityBlock=int(opt.numberEventsInLuminosityBlock)
+    if opt.memoryOffset: opt.memoryOffset=int(opt.memoryOffset)
+    if opt.memPerCore: opt.memPerCore=int(opt.memPerCore)
+    if opt.GPUMemoryMB: opt.GPUMemoryMB=int(opt.GPUMemoryMB)
+    if opt.gpuEnable: opt.RequiresGPU="required"
 
     if opt.wmcontrol:
         performInjectionOptionTest(opt)