Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions Configuration/Applications/python/ConfigBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ def _datasetname_and_maxfiles(entry):
self.process.source.fileNames.append(self._options.dirin+entry)
if self._options.secondfilein:
if not hasattr(self.process.source,"secondaryFileNames"):
raise Exception("--secondfilein not compatible with "+self._options.filetype+"input type")
raise Exception("--secondfilein not compatible with "+self._options.filetype+" input type")
for entry in self._options.secondfilein.split(','):
print("entry",entry)
if entry.startswith("filelist:"):
Expand All @@ -459,8 +459,8 @@ def _datasetname_and_maxfiles(entry):
filesFromOption(self)
if self._options.filetype == "EDM_RNTUPLE":
self.process.source=cms.Source("RNTupleTempSource",
fileNames = cms.untracked.vstring())#, 2ndary not supported yet
#secondaryFileNames= cms.untracked.vstring())
fileNames = cms.untracked.vstring(),
secondaryFileNames= cms.untracked.vstring())
filesFromOption(self)
elif self._options.filetype == "DAT":
self.process.source=cms.Source("NewEventStreamFileReader",fileNames = cms.untracked.vstring())
Expand Down Expand Up @@ -817,6 +817,8 @@ def addStandardSequences(self):
mixingDict['F']=(filesFromList(self._options.pileup_input[9:]))[0]
else:
mixingDict['F']=self._options.pileup_input.split(',')

self.customizeMixingModuleForRNTuple(mixingDict.get('F', []), 'mix')
specialization=defineMixing(mixingDict)
for command in specialization:
self.executeAndRemember(command)
Expand Down Expand Up @@ -872,6 +874,20 @@ def addStandardSequences(self):
else:
self._options.inputCommands='keep *_randomEngineStateProducer_*_*,'

def customizeMixingModuleForRNTuple(self, files, mixingModuleLabel):
# Do we want a command-line option as well to switch the input type?
# Naively the 'filetype' looks attractive, but it would
# couple the primary Source and the SecSource to the same
# file format, which is not strictly necessary
useRNTuple= len(files) > 0 and files[0].lower().endswith(".rntpl")
if useRNTuple:
rntupleSrc = cms.SecSource("EmbeddedRNTupleRootSource")
mixingModule = getattr(self.process, mixingModuleLabel)
rntupleSrc.update_(mixingModule.input.parameters_())
mixingModule.input = rntupleSrc
self.additionalCommands.append('rntupleSrc = cms.SecSource("EmbeddedRNTupleTempSource")')
self.additionalCommands.append(f'rntupleSrc.update_(process.{mixingModuleLabel}.input.parameters_())')
self.additionalCommands.append(f'process.{mixingModuleLabel}.input = rntupleSrc')

def completeInputCommand(self):
if self._options.inputEventContent:
Expand Down Expand Up @@ -1591,6 +1607,8 @@ def prepare_DATAMIX(self, stepSpec = None):
theFiles= (filesFromList(self._options.pileup_input[9:]))[0]
else:
theFiles=self._options.pileup_input.split(',')

self.customizeMixingModuleForRNTuple(theFiles, 'mixData')
#print theFiles
self.executeAndRemember( "process.mixData.input.fileNames = cms.untracked.vstring(%s)"%( theFiles ) )

Expand Down
56 changes: 43 additions & 13 deletions Configuration/PyReleaseValidation/python/WorkFlowRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,35 @@ def doCmd(self, cmd):
return ret

@staticmethod
def replace_filein_extensions(command_line, extension):
def replace_filein_extensions(command_line, outputExtensionForStep, defaultExtension, fileOption='--filein'):
# Pattern to match --filein followed by file:file.ext entries (comma-separated)
filein_pattern = re.compile(
r'(--filein\s+)((?:file:[a-zA-Z0-9_]+\.[a-z]+(?:,\s*)?)*)'
r'('+fileOption+r'\s+)((?:file:[a-zA-Z0-9_]+\.[a-z]+(?:,\s*)?)*)'
)

# Inner pattern to match individual file entries
file_pattern = re.compile(r'file:([a-zA-Z0-9_]+)\.[a-z]+')
# Inner patterns to match individual file entries
# For stepN naming need to know the N
file_pattern_step = re.compile('file:step([1-9]+)(_[a-zA-Z]+)?\.[a-z]+')
# Some ALCA steps use special file names without stepN, those
# are assumed to use the default extension
file_pattern_gen = re.compile(r'file:([a-zA-Z0-9_]+)\.[a-z]+')

def replace_filein_match(filein_match):
filein_prefix = filein_match.group(1)
file_list_str = filein_match.group(2)

# Replace extensions in the file list
new_file_list = file_pattern.sub(
lambda m: 'file:{0}{1}'.format(m.group(1), extension),
file_list_str
)
m = file_pattern_step.search(file_list_str)
if m:
new_file_list = file_pattern_step.sub(
lambda m: 'file:step{0}{1}{2}'.format(m.group(1), m.group(2) or "", outputExtensionForStep[int(m.group(1))]),
file_list_str
)
else:
new_file_list = file_pattern_gen.sub(
lambda m: 'file:{0}{1}'.format(m.group(1), defaultExtension),
file_list_str
)

return filein_prefix + new_file_list

Expand Down Expand Up @@ -117,6 +128,21 @@ def run(self):
def closeCmd(i,ID):
return ' > %s 2>&1; ' % ('step%d_'%(i,)+ID+'.log ',)

# For --secondfilein the primary and secondary files must have
# the same format (TTree or RNTuple). For now find the last
# step that uses --secondfilein, and use TTree for all steps
# up to that step. Theoretically we could identify the exact
# steps that need TTree output in this case, but given the way
# --secondfilein is being used now, and the deployment plan
# for RNTuple for HL-LHC, that complexity does not seem worth it.
lastStepWithSecondFileIn = None
if self.useRNTuple:
for (istepmone,com) in enumerate(self.wf.cmds):
# I don't know what to do in case com is something else
if isinstance(com, str):
if "--secondfilein" in com:
lastStepWithSecondFileIn = istepmone+1

inFile=None
lumiRangeFile=None
aborted=False
Expand Down Expand Up @@ -192,7 +218,8 @@ def closeCmd(i,ID):

cmd += com

if self.useRNTuple:
if self.useRNTuple and not \
(lastStepWithSecondFileIn is not None and istep < lastStepWithSecondFileIn):
cmd+=' --rntuple_out'
if self.startFrom:
steps = cmd.split("-s ")[1].split(" ")[0]
Expand Down Expand Up @@ -227,16 +254,19 @@ def closeCmd(i,ID):
if istep!=1 and not '--filein' in cmd and not 'premix_stage1' in cmd and not ("--fast" in cmd and "premix_stage2" in cmd):
steps = cmd.split("-s ")[1].split(" ")[0] ## relying on the syntax: cmsDriver -s STEPS --otherFlags
if "ALCA" not in steps:
cmd+=' --filein file:step%s%s '%(istep-1,extension)
cmd+=' --filein file:step%s%s '%(istep-1,outputExtensionForStep[istep-1])
elif "ALCA" in steps and "RECO" in steps:
cmd+=' --filein file:step%s%s '%(istep-1,extension)
cmd+=' --filein file:step%s%s '%(istep-1,outputExtensionForStep[istep-1])
elif self.recoOutput:
cmd+=' --filein %s'%(self.recoOutput)
else:
cmd+=' --filein file:step%s%s '%(istep-1,extension)
cmd+=' --filein file:step%s%s '%(istep-1,outputExtensionForStep[istep-1])
elif istep!=1 and '--filein' in cmd and '--filetype' not in cmd:
# make sure correct extension is being used
cmd = self.replace_filein_extensions(cmd, extension)
cmd = self.replace_filein_extensions(cmd, outputExtensionForStep, extension)
if '--pileup_input' in cmd and '--filetype' not in cmd:
# make sure correct extension is being used
cmd = self.replace_filein_extensions(cmd, outputExtensionForStep, extension, fileOption='--pileup_input')
if not '--fileout' in com:
cmd+=' --fileout file:step%s%s '%(istep,extension)
if "RECO" in cmd:
Expand Down
4 changes: 2 additions & 2 deletions FWIO/RNTupleTempInput/src/RootRNTuple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ namespace edm::rntuple_temp {
}
if (not reader_) {
throw cms::Exception("WrongFileFormat")
<< "The ROOT file does not contain a TTree named " << productTreeName
<< "\n This is either not an edm ROOT file or is one that has been corrupted.";
<< "The ROOT file does not contain a RNTuple named " << productTreeName
<< "\n This is either not an edm RNTuple ROOT file or is one that has been corrupted.";
}
entries_ = reader_->GetNEntries();
}
Expand Down