diff --git a/Configuration/Applications/python/ConfigBuilder.py b/Configuration/Applications/python/ConfigBuilder.py index ef849484150b8..83dc0655c033e 100644 --- a/Configuration/Applications/python/ConfigBuilder.py +++ b/Configuration/Applications/python/ConfigBuilder.py @@ -440,7 +440,7 @@ def _datasetname_and_maxfiles(entry): self.process.source.fileNames.append(self._options.dirin+entry) if self._options.secondfilein: if not hasattr(self.process.source,"secondaryFileNames"): - raise Exception("--secondfilein not compatible with "+self._options.filetype+"input type") + raise Exception("--secondfilein not compatible with "+self._options.filetype+" input type") for entry in self._options.secondfilein.split(','): print("entry",entry) if entry.startswith("filelist:"): @@ -459,8 +459,8 @@ def _datasetname_and_maxfiles(entry): filesFromOption(self) if self._options.filetype == "EDM_RNTUPLE": self.process.source=cms.Source("RNTupleTempSource", - fileNames = cms.untracked.vstring())#, 2ndary not supported yet - #secondaryFileNames= cms.untracked.vstring()) + fileNames = cms.untracked.vstring(), + secondaryFileNames= cms.untracked.vstring()) filesFromOption(self) elif self._options.filetype == "DAT": self.process.source=cms.Source("NewEventStreamFileReader",fileNames = cms.untracked.vstring()) @@ -817,6 +817,8 @@ def addStandardSequences(self): mixingDict['F']=(filesFromList(self._options.pileup_input[9:]))[0] else: mixingDict['F']=self._options.pileup_input.split(',') + + self.customizeMixingModuleForRNTuple(mixingDict.get('F', []), 'mix') specialization=defineMixing(mixingDict) for command in specialization: self.executeAndRemember(command) @@ -872,6 +874,20 @@ def addStandardSequences(self): else: self._options.inputCommands='keep *_randomEngineStateProducer_*_*,' + def customizeMixingModuleForRNTuple(self, files, mixingModuleLabel): + # Do we want a command-line option as well to switch the input type? + # Naively the 'filetype' looks attractive, but it would + # couple the primary Source and the SecSource to the same + # file format, which is not strictly necessary + useRNTuple= len(files) > 0 and files[0].lower().endswith(".rntpl") + if useRNTuple: + rntupleSrc = cms.SecSource("EmbeddedRNTupleRootSource") + mixingModule = getattr(self.process, mixingModuleLabel) + rntupleSrc.update_(mixingModule.input.parameters_()) + mixingModule.input = rntupleSrc + self.additionalCommands.append('rntupleSrc = cms.SecSource("EmbeddedRNTupleTempSource")') + self.additionalCommands.append(f'rntupleSrc.update_(process.{mixingModuleLabel}.input.parameters_())') + self.additionalCommands.append(f'process.{mixingModuleLabel}.input = rntupleSrc') def completeInputCommand(self): if self._options.inputEventContent: @@ -1591,6 +1607,8 @@ def prepare_DATAMIX(self, stepSpec = None): theFiles= (filesFromList(self._options.pileup_input[9:]))[0] else: theFiles=self._options.pileup_input.split(',') + + self.customizeMixingModuleForRNTuple(theFiles, 'mixData') #print theFiles self.executeAndRemember( "process.mixData.input.fileNames = cms.untracked.vstring(%s)"%( theFiles ) ) diff --git a/Configuration/PyReleaseValidation/python/WorkFlowRunner.py b/Configuration/PyReleaseValidation/python/WorkFlowRunner.py index cd9e9d0e54c90..2925e6507afed 100644 --- a/Configuration/PyReleaseValidation/python/WorkFlowRunner.py +++ b/Configuration/PyReleaseValidation/python/WorkFlowRunner.py @@ -60,24 +60,35 @@ def doCmd(self, cmd): return ret @staticmethod - def replace_filein_extensions(command_line, extension): + def replace_filein_extensions(command_line, outputExtensionForStep, defaultExtension, fileOption='--filein'): # Pattern to match --filein followed by file:file.ext entries (comma-separated) filein_pattern = re.compile( - r'(--filein\s+)((?:file:[a-zA-Z0-9_]+\.[a-z]+(?:,\s*)?)*)' + r'('+fileOption+r'\s+)((?:file:[a-zA-Z0-9_]+\.[a-z]+(?:,\s*)?)*)' ) - # Inner pattern to match individual file entries - file_pattern = re.compile(r'file:([a-zA-Z0-9_]+)\.[a-z]+') + # Inner patterns to match individual file entries + # For stepN naming need to know the N + file_pattern_step = re.compile('file:step([1-9]+)(_[a-zA-Z]+)?\.[a-z]+') + # Some ALCA steps use special file names without stepN, those + # are assumed to use the default extension + file_pattern_gen = re.compile(r'file:([a-zA-Z0-9_]+)\.[a-z]+') def replace_filein_match(filein_match): filein_prefix = filein_match.group(1) file_list_str = filein_match.group(2) # Replace extensions in the file list - new_file_list = file_pattern.sub( - lambda m: 'file:{0}{1}'.format(m.group(1), extension), - file_list_str - ) + m = file_pattern_step.search(file_list_str) + if m: + new_file_list = file_pattern_step.sub( + lambda m: 'file:step{0}{1}{2}'.format(m.group(1), m.group(2) or "", outputExtensionForStep[int(m.group(1))]), + file_list_str + ) + else: + new_file_list = file_pattern_gen.sub( + lambda m: 'file:{0}{1}'.format(m.group(1), defaultExtension), + file_list_str + ) return filein_prefix + new_file_list @@ -117,6 +128,21 @@ def run(self): def closeCmd(i,ID): return ' > %s 2>&1; ' % ('step%d_'%(i,)+ID+'.log ',) + # For --secondfilein the primary and secondary files must have + # the same format (TTree or RNTuple). For now find the last + # step that uses --secondfilein, and use TTree for all steps + # up to that step. Theoretically we could identify the exact + # steps that need TTree output in this case, but given the way + # --secondfilein is being used now, and the deployment plan + # for RNTuple for HL-LHC, that complexity does not seem worth it. + lastStepWithSecondFileIn = None + if self.useRNTuple: + for (istepmone,com) in enumerate(self.wf.cmds): + # I don't know what to do in case com is something else + if isinstance(com, str): + if "--secondfilein" in com: + lastStepWithSecondFileIn = istepmone+1 + inFile=None lumiRangeFile=None aborted=False @@ -192,7 +218,8 @@ def closeCmd(i,ID): cmd += com - if self.useRNTuple: + if self.useRNTuple and not \ + (lastStepWithSecondFileIn is not None and istep < lastStepWithSecondFileIn): cmd+=' --rntuple_out' if self.startFrom: steps = cmd.split("-s ")[1].split(" ")[0] @@ -227,16 +254,19 @@ def closeCmd(i,ID): if istep!=1 and not '--filein' in cmd and not 'premix_stage1' in cmd and not ("--fast" in cmd and "premix_stage2" in cmd): steps = cmd.split("-s ")[1].split(" ")[0] ## relying on the syntax: cmsDriver -s STEPS --otherFlags if "ALCA" not in steps: - cmd+=' --filein file:step%s%s '%(istep-1,extension) + cmd+=' --filein file:step%s%s '%(istep-1,outputExtensionForStep[istep-1]) elif "ALCA" in steps and "RECO" in steps: - cmd+=' --filein file:step%s%s '%(istep-1,extension) + cmd+=' --filein file:step%s%s '%(istep-1,outputExtensionForStep[istep-1]) elif self.recoOutput: cmd+=' --filein %s'%(self.recoOutput) else: - cmd+=' --filein file:step%s%s '%(istep-1,extension) + cmd+=' --filein file:step%s%s '%(istep-1,outputExtensionForStep[istep-1]) elif istep!=1 and '--filein' in cmd and '--filetype' not in cmd: # make sure correct extension is being used - cmd = self.replace_filein_extensions(cmd, extension) + cmd = self.replace_filein_extensions(cmd, outputExtensionForStep, extension) + if '--pileup_input' in cmd and '--filetype' not in cmd: + # make sure correct extension is being used + cmd = self.replace_filein_extensions(cmd, outputExtensionForStep, extension, fileOption='--pileup_input') if not '--fileout' in com: cmd+=' --fileout file:step%s%s '%(istep,extension) if "RECO" in cmd: diff --git a/FWIO/RNTupleTempInput/src/RootRNTuple.cc b/FWIO/RNTupleTempInput/src/RootRNTuple.cc index be2e496bbb9b7..6291969518b99 100644 --- a/FWIO/RNTupleTempInput/src/RootRNTuple.cc +++ b/FWIO/RNTupleTempInput/src/RootRNTuple.cc @@ -78,8 +78,8 @@ namespace edm::rntuple_temp { } if (not reader_) { throw cms::Exception("WrongFileFormat") - << "The ROOT file does not contain a TTree named " << productTreeName - << "\n This is either not an edm ROOT file or is one that has been corrupted."; + << "The ROOT file does not contain a RNTuple named " << productTreeName + << "\n This is either not an edm RNTuple ROOT file or is one that has been corrupted."; } entries_ = reader_->GetNEntries(); }