diff --git a/IOPool/Input/src/EmbeddedRootSource.cc b/IOPool/Input/src/EmbeddedRootSource.cc index 8ae14b707c22a..f71854d29ad2f 100644 --- a/IOPool/Input/src/EmbeddedRootSource.cc +++ b/IOPool/Input/src/EmbeddedRootSource.cc @@ -13,62 +13,59 @@ namespace edm { class EventID; class EventPrincipal; - EmbeddedRootSource::EmbeddedRootSource(ParameterSet const& pset, VectorInputSourceDescription const& desc) : - VectorInputSource(pset, desc), - rootServiceChecker_(), - nStreams_(desc.allocations_->numberOfStreams()), - // The default value provided as the second argument to the getUntrackedParameter function call - // is not used when the ParameterSet has been validated and the parameters are not optional - // in the description. This is currently true when PoolSource is the primary input source. - // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function - // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource - // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can - // and should be deleted from the code. - // - skipBadFiles_(pset.getUntrackedParameter("skipBadFiles", false)), - bypassVersionCheck_(pset.getUntrackedParameter("bypassVersionCheck", false)), - treeMaxVirtualSize_(pset.getUntrackedParameter("treeMaxVirtualSize", -1)), - productSelectorRules_(pset, "inputCommands", "InputSource"), - runHelper_(new DefaultRunHelper()), - catalog_(pset.getUntrackedParameter >("fileNames"), - pset.getUntrackedParameter("overrideCatalog", std::string())), - // Note: fileSequence_ needs to be initialized last, because it uses data members - // initialized previously in its own initialization. - fileSequence_(new RootEmbeddedFileSequence(pset, *this, catalog_)) { - } + EmbeddedRootSource::EmbeddedRootSource(ParameterSet const& pset, VectorInputSourceDescription const& desc) + : VectorInputSource(pset, desc), + rootServiceChecker_(), + nStreams_(desc.allocations_->numberOfStreams()), + // The default value provided as the second argument to the getUntrackedParameter function call + // is not used when the ParameterSet has been validated and the parameters are not optional + // in the description. This is currently true when PoolSource is the primary input source. + // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function + // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource + // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can + // and should be deleted from the code. + // + skipBadFiles_(pset.getUntrackedParameter("skipBadFiles", false)), + bypassVersionCheck_(pset.getUntrackedParameter("bypassVersionCheck", false)), + treeMaxVirtualSize_(pset.getUntrackedParameter("treeMaxVirtualSize", -1)), + productSelectorRules_(pset, "inputCommands", "InputSource"), + runHelper_(new DefaultRunHelper()), + catalog_(pset.getUntrackedParameter >("fileNames"), + pset.getUntrackedParameter("overrideCatalog", std::string())), + // Note: fileSequence_ needs to be initialized last, because it uses data members + // initialized previously in its own initialization. + fileSequence_(new RootEmbeddedFileSequence(pset, *this, catalog_)) {} EmbeddedRootSource::~EmbeddedRootSource() {} - void - EmbeddedRootSource::beginJob() { - } + void EmbeddedRootSource::beginJob() {} - void - EmbeddedRootSource::endJob() { + void EmbeddedRootSource::endJob() { fileSequence_->endJob(); InputFile::reportReadBranches(); } - void EmbeddedRootSource::closeFile_() { - fileSequence_->closeFile_(); - } + void EmbeddedRootSource::closeFile_() { fileSequence_->closeFile(); } - bool - EmbeddedRootSource::readOneEvent(EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine* engine, EventID const* id, bool recycleFiles) { + bool EmbeddedRootSource::readOneEvent(EventPrincipal& cache, + size_t& fileNameHash, + CLHEP::HepRandomEngine* engine, + EventID const* id, + bool recycleFiles) { return fileSequence_->readOneEvent(cache, fileNameHash, engine, id, recycleFiles); } - void - EmbeddedRootSource::readOneSpecified(EventPrincipal& cache, size_t& fileNameHash, SecondaryEventIDAndFileInfo const& id) { + void EmbeddedRootSource::readOneSpecified(EventPrincipal& cache, + size_t& fileNameHash, + SecondaryEventIDAndFileInfo const& id) { fileSequence_->readOneSpecified(cache, fileNameHash, id); } - void - EmbeddedRootSource::dropUnwantedBranches_(std::vector const& wantedBranches) { + void EmbeddedRootSource::dropUnwantedBranches_(std::vector const& wantedBranches) { std::vector rules; rules.reserve(wantedBranches.size() + 1); rules.emplace_back("drop *"); - for(std::string const& branch : wantedBranches) { + for (std::string const& branch : wantedBranches) { rules.push_back("keep " + branch + "_*"); } ParameterSet pset; @@ -76,22 +73,21 @@ namespace edm { productSelectorRules_ = ProductSelectorRules(pset, "inputCommands", "InputSource"); } - void - EmbeddedRootSource::fillDescriptions(ConfigurationDescriptions& descriptions) { - + void EmbeddedRootSource::fillDescriptions(ConfigurationDescriptions& descriptions) { ParameterSetDescription desc; std::vector defaultStrings; desc.setComment("Reads EDM/Root files for mixing."); - desc.addUntracked >("fileNames") - ->setComment("Names of files to be processed."); + desc.addUntracked >("fileNames")->setComment("Names of files to be processed."); desc.addUntracked("overrideCatalog", std::string()); desc.addUntracked("skipBadFiles", false) - ->setComment("True: Ignore any missing or unopenable input file.\n" - "False: Throw exception if missing or unopenable input file."); + ->setComment( + "True: Ignore any missing or unopenable input file.\n" + "False: Throw exception if missing or unopenable input file."); desc.addUntracked("bypassVersionCheck", false) - ->setComment("True: Bypass release version check.\n" - "False: Throw exception if reading file in a release prior to the release in which the file was written."); + ->setComment( + "True: Bypass release version check.\n" + "False: Throw exception if reading file in a release prior to the release in which the file was written."); desc.addUntracked("treeMaxVirtualSize", -1) ->setComment("Size of ROOT TTree TBasket cache. Affects performance."); @@ -100,4 +96,4 @@ namespace edm { descriptions.add("source", desc); } -} +} // namespace edm diff --git a/IOPool/Input/src/PoolSource.cc b/IOPool/Input/src/PoolSource.cc index 79062225d0f04..255ae051932bf 100644 --- a/IOPool/Input/src/PoolSource.cc +++ b/IOPool/Input/src/PoolSource.cc @@ -37,57 +37,57 @@ namespace edm { void checkHistoryConsistency(Principal const& primary, Principal const& secondary) { ProcessHistory const& ph1 = primary.processHistory(); ProcessHistory const& ph2 = secondary.processHistory(); - if(ph1 != ph2 && !isAncestor(ph2, ph1)) { - throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") << - "The secondary file is not an ancestor of the primary file\n"; + if (ph1 != ph2 && !isAncestor(ph2, ph1)) { + throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") + << "The secondary file is not an ancestor of the primary file\n"; } } void checkConsistency(EventPrincipal const& primary, EventPrincipal const& secondary) { - if(!isSameEvent(primary, secondary)) { - throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") << - primary.id() << " has inconsistent EventAuxiliary data in the primary and secondary file\n"; + if (!isSameEvent(primary, secondary)) { + throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") + << primary.id() << " has inconsistent EventAuxiliary data in the primary and secondary file\n"; } } void checkConsistency(LuminosityBlockAuxiliary const& primary, LuminosityBlockAuxiliary const& secondary) { - if(primary.id() != secondary.id()) { - throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") << - primary.id() << " has inconsistent LuminosityBlockAuxiliary data in the primary and secondary file\n"; + if (primary.id() != secondary.id()) { + throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") + << primary.id() << " has inconsistent LuminosityBlockAuxiliary data in the primary and secondary file\n"; } } void checkConsistency(RunAuxiliary const& primary, RunAuxiliary const& secondary) { - if(primary.id() != secondary.id()) { - throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") << - primary.id() << " has inconsistent RunAuxiliary data in the primary and secondary file\n"; + if (primary.id() != secondary.id()) { + throw Exception(errors::MismatchedInputFiles, "PoolSource::checkConsistency") + << primary.id() << " has inconsistent RunAuxiliary data in the primary and secondary file\n"; } } - } + } // namespace - PoolSource::PoolSource(ParameterSet const& pset, InputSourceDescription const& desc) : - InputSource(pset, desc), - rootServiceChecker_(), - catalog_(pset.getUntrackedParameter >("fileNames"), - pset.getUntrackedParameter("overrideCatalog", std::string())), - secondaryCatalog_(pset.getUntrackedParameter >("secondaryFileNames", std::vector()), - pset.getUntrackedParameter("overrideCatalog", std::string())), - secondaryRunPrincipal_(), - secondaryLumiPrincipal_(), - secondaryEventPrincipals_(), - branchIDsToReplace_(), - nStreams_(desc.allocations_->numberOfStreams()), - skipBadFiles_(pset.getUntrackedParameter("skipBadFiles")), - bypassVersionCheck_(pset.getUntrackedParameter("bypassVersionCheck")), - treeMaxVirtualSize_(pset.getUntrackedParameter("treeMaxVirtualSize")), - productSelectorRules_(pset, "inputCommands", "InputSource"), - dropDescendants_(pset.getUntrackedParameter("dropDescendantsOfDroppedBranches")), - labelRawDataLikeMC_(pset.getUntrackedParameter("labelRawDataLikeMC")), - runHelper_(makeRunHelper(pset)), - resourceSharedWithDelayedReaderPtr_(), - // Note: primaryFileSequence_ and secondaryFileSequence_ need to be initialized last, because they use data members - // initialized previously in their own initialization. - primaryFileSequence_(new RootPrimaryFileSequence(pset, *this, catalog_)), - secondaryFileSequence_(secondaryCatalog_.empty() ? nullptr : - new RootSecondaryFileSequence(pset, *this, secondaryCatalog_)) - { + PoolSource::PoolSource(ParameterSet const& pset, InputSourceDescription const& desc) + : InputSource(pset, desc), + rootServiceChecker_(), + catalog_(pset.getUntrackedParameter >("fileNames"), + pset.getUntrackedParameter("overrideCatalog", std::string())), + secondaryCatalog_( + pset.getUntrackedParameter >("secondaryFileNames", std::vector()), + pset.getUntrackedParameter("overrideCatalog", std::string())), + secondaryRunPrincipal_(), + secondaryLumiPrincipal_(), + secondaryEventPrincipals_(), + branchIDsToReplace_(), + nStreams_(desc.allocations_->numberOfStreams()), + skipBadFiles_(pset.getUntrackedParameter("skipBadFiles")), + bypassVersionCheck_(pset.getUntrackedParameter("bypassVersionCheck")), + treeMaxVirtualSize_(pset.getUntrackedParameter("treeMaxVirtualSize")), + productSelectorRules_(pset, "inputCommands", "InputSource"), + dropDescendants_(pset.getUntrackedParameter("dropDescendantsOfDroppedBranches")), + labelRawDataLikeMC_(pset.getUntrackedParameter("labelRawDataLikeMC")), + runHelper_(makeRunHelper(pset)), + resourceSharedWithDelayedReaderPtr_(), + // Note: primaryFileSequence_ and secondaryFileSequence_ need to be initialized last, because they use data members + // initialized previously in their own initialization. + primaryFileSequence_(new RootPrimaryFileSequence(pset, *this, catalog_)), + secondaryFileSequence_( + secondaryCatalog_.empty() ? nullptr : new RootSecondaryFileSequence(pset, *this, secondaryCatalog_)) { auto resources = SharedResourcesRegistry::instance()->createAcquirerForSourceDelayedReader(); resourceSharedWithDelayedReaderPtr_ = std::make_unique(std::move(resources.first)); mutexSharedWithDelayedReader_ = resources.second; @@ -95,9 +95,9 @@ namespace edm { if (secondaryCatalog_.empty() && pset.getUntrackedParameter("needSecondaryFileNames", false)) { throw Exception(errors::Configuration, "PoolSource") << "'secondaryFileNames' must be specified\n"; } - if(secondaryFileSequence_) { + if (secondaryFileSequence_) { secondaryEventPrincipals_.reserve(nStreams_); - for(unsigned int index = 0; index < nStreams_; ++index) { + for (unsigned int index = 0; index < nStreams_; ++index) { secondaryEventPrincipals_.emplace_back(new EventPrincipal(secondaryFileSequence_->fileProductRegistry(), secondaryFileSequence_->fileBranchIDListHelper(), std::make_shared(), @@ -111,32 +111,31 @@ namespace edm { std::set associationsFromSecondary; //this is the registry used by the 'outside' world and only has the primary file information in it at present ProductRegistry::ProductList& fullList = productRegistryUpdate().productListUpdator(); - for(auto const& item : secondary) { - if(item.second.present()) { + for (auto const& item : secondary) { + if (item.second.present()) { idsToReplace[item.second.branchType()].insert(item.second.branchID()); - if(item.second.branchType() == InEvent && - item.second.unwrappedType() == typeid(ThinnedAssociation)) { + if (item.second.branchType() == InEvent && item.second.unwrappedType() == typeid(ThinnedAssociation)) { associationsFromSecondary.insert(item.second.branchID()); } //now make sure this is marked as not dropped else the product will not be 'get'table from the Event auto itFound = fullList.find(item.first); - if(itFound != fullList.end()) { + if (itFound != fullList.end()) { itFound->second.setDropped(false); } } } - for(auto const& item : primary) { - if(item.second.present()) { + for (auto const& item : primary) { + if (item.second.present()) { idsToReplace[item.second.branchType()].erase(item.second.branchID()); associationsFromSecondary.erase(item.second.branchID()); } } - if(idsToReplace[InEvent].empty() && idsToReplace[InLumi].empty() && idsToReplace[InRun].empty()) { - secondaryFileSequence_ = nullptr; // propagate_const has no reset() function + if (idsToReplace[InEvent].empty() && idsToReplace[InLumi].empty() && idsToReplace[InRun].empty()) { + secondaryFileSequence_ = nullptr; // propagate_const has no reset() function } else { - for(int i = InEvent; i < NumBranchTypes; ++i) { + for (int i = InEvent; i < NumBranchTypes; ++i) { branchIDsToReplace_[i].reserve(idsToReplace[i].size()); - for(auto const& id : idsToReplace[i]) { + for (auto const& id : idsToReplace[i]) { branchIDsToReplace_[i].push_back(id); } } @@ -147,93 +146,78 @@ namespace edm { PoolSource::~PoolSource() {} - void - PoolSource::endJob() { - if(secondaryFileSequence_) secondaryFileSequence_->endJob(); + void PoolSource::endJob() { + if (secondaryFileSequence_) + secondaryFileSequence_->endJob(); primaryFileSequence_->endJob(); InputFile::reportReadBranches(); } - std::unique_ptr - PoolSource::readFile_() { + std::unique_ptr PoolSource::readFile_() { std::unique_ptr fb = primaryFileSequence_->readFile_(); - if(secondaryFileSequence_) { + if (secondaryFileSequence_) { fb->setNotFastClonable(FileBlock::HasSecondaryFileSequence); } return fb; } - void PoolSource::closeFile_() { - primaryFileSequence_->closeFile_(); - } + void PoolSource::closeFile_() { primaryFileSequence_->closeFile(); } - std::shared_ptr - PoolSource::readRunAuxiliary_() { - return primaryFileSequence_->readRunAuxiliary_(); - } + std::shared_ptr PoolSource::readRunAuxiliary_() { return primaryFileSequence_->readRunAuxiliary_(); } - std::shared_ptr - PoolSource::readLuminosityBlockAuxiliary_() { + std::shared_ptr PoolSource::readLuminosityBlockAuxiliary_() { return primaryFileSequence_->readLuminosityBlockAuxiliary_(); } - void - PoolSource::readRun_(RunPrincipal& runPrincipal) { + void PoolSource::readRun_(RunPrincipal& runPrincipal) { primaryFileSequence_->readRun_(runPrincipal); - if(secondaryFileSequence_ && !branchIDsToReplace_[InRun].empty()) { + if (secondaryFileSequence_ && !branchIDsToReplace_[InRun].empty()) { bool found = secondaryFileSequence_->skipToItem(runPrincipal.run(), 0U, 0U); - if(found) { + if (found) { std::shared_ptr secondaryAuxiliary = secondaryFileSequence_->readRunAuxiliary_(); checkConsistency(runPrincipal.aux(), *secondaryAuxiliary); secondaryRunPrincipal_ = std::make_shared(secondaryAuxiliary, - secondaryFileSequence_->fileProductRegistry(), - processConfiguration(), - nullptr, - runPrincipal.index()); + secondaryFileSequence_->fileProductRegistry(), + processConfiguration(), + nullptr, + runPrincipal.index()); secondaryFileSequence_->readRun_(*secondaryRunPrincipal_); checkHistoryConsistency(runPrincipal, *secondaryRunPrincipal_); runPrincipal.recombine(*secondaryRunPrincipal_, branchIDsToReplace_[InRun]); } else { throw Exception(errors::MismatchedInputFiles, "PoolSource::readRun_") - << " Run " << runPrincipal.run() - << " is not found in the secondary input files\n"; + << " Run " << runPrincipal.run() << " is not found in the secondary input files\n"; } } } - void - PoolSource::readLuminosityBlock_(LuminosityBlockPrincipal& lumiPrincipal) { + void PoolSource::readLuminosityBlock_(LuminosityBlockPrincipal& lumiPrincipal) { primaryFileSequence_->readLuminosityBlock_(lumiPrincipal); - if(secondaryFileSequence_ && !branchIDsToReplace_[InLumi].empty()) { + if (secondaryFileSequence_ && !branchIDsToReplace_[InLumi].empty()) { bool found = secondaryFileSequence_->skipToItem(lumiPrincipal.run(), lumiPrincipal.luminosityBlock(), 0U); - if(found) { - std::shared_ptr secondaryAuxiliary = secondaryFileSequence_->readLuminosityBlockAuxiliary_(); + if (found) { + std::shared_ptr secondaryAuxiliary = + secondaryFileSequence_->readLuminosityBlockAuxiliary_(); checkConsistency(lumiPrincipal.aux(), *secondaryAuxiliary); secondaryLumiPrincipal_ = std::make_shared(secondaryAuxiliary, - secondaryFileSequence_->fileProductRegistry(), - processConfiguration(), - nullptr, - lumiPrincipal.index()); + secondaryFileSequence_->fileProductRegistry(), processConfiguration(), nullptr, lumiPrincipal.index()); secondaryFileSequence_->readLuminosityBlock_(*secondaryLumiPrincipal_); checkHistoryConsistency(lumiPrincipal, *secondaryLumiPrincipal_); lumiPrincipal.recombine(*secondaryLumiPrincipal_, branchIDsToReplace_[InLumi]); } else { throw Exception(errors::MismatchedInputFiles, "PoolSource::readLuminosityBlock_") - << " Run " << lumiPrincipal.run() - << " LuminosityBlock " << lumiPrincipal.luminosityBlock() - << " is not found in the secondary input files\n"; + << " Run " << lumiPrincipal.run() << " LuminosityBlock " << lumiPrincipal.luminosityBlock() + << " is not found in the secondary input files\n"; } } } - void - PoolSource::readEvent_(EventPrincipal& eventPrincipal) { + void PoolSource::readEvent_(EventPrincipal& eventPrincipal) { primaryFileSequence_->readEvent(eventPrincipal); - if(secondaryFileSequence_ && !branchIDsToReplace_[InEvent].empty()) { - bool found = secondaryFileSequence_->skipToItem(eventPrincipal.run(), - eventPrincipal.luminosityBlock(), - eventPrincipal.id().event()); - if(found) { + if (secondaryFileSequence_ && !branchIDsToReplace_[InEvent].empty()) { + bool found = secondaryFileSequence_->skipToItem( + eventPrincipal.run(), eventPrincipal.luminosityBlock(), eventPrincipal.id().event()); + if (found) { EventPrincipal& secondaryEventPrincipal = *secondaryEventPrincipals_[eventPrincipal.streamID().value()]; secondaryFileSequence_->readEvent(secondaryEventPrincipal); checkConsistency(eventPrincipal, secondaryEventPrincipal); @@ -242,30 +226,29 @@ namespace edm { eventPrincipal.mergeProvenanceRetrievers(secondaryEventPrincipal); secondaryEventPrincipal.clearPrincipal(); } else { - throw Exception(errors::MismatchedInputFiles, "PoolSource::readEvent_") << - eventPrincipal.id() << " is not found in the secondary input files\n"; + throw Exception(errors::MismatchedInputFiles, "PoolSource::readEvent_") + << eventPrincipal.id() << " is not found in the secondary input files\n"; } } } - bool - PoolSource::readIt(EventID const& id, EventPrincipal& eventPrincipal, StreamContext& streamContext) { + bool PoolSource::readIt(EventID const& id, EventPrincipal& eventPrincipal, StreamContext& streamContext) { bool found = primaryFileSequence_->skipToItem(id.run(), id.luminosityBlock(), id.event()); - if(!found) return false; + if (!found) + return false; EventSourceSentry sentry(*this, streamContext); readEvent_(eventPrincipal); return true; } - InputSource::ItemType - PoolSource::getNextItemType() { + InputSource::ItemType PoolSource::getNextItemType() { RunNumber_t run = IndexIntoFile::invalidRun; LuminosityBlockNumber_t lumi = IndexIntoFile::invalidLumi; EventNumber_t event = IndexIntoFile::invalidEvent; InputSource::ItemType itemType = primaryFileSequence_->getNextItemType(run, lumi, event); - if(secondaryFileSequence_ && (IsSynchronize != state())) { - if(itemType == IsRun || itemType == IsLumi || itemType == IsEvent) { - if(!secondaryFileSequence_->containedInCurrentFile(run, lumi, event)) { + if (secondaryFileSequence_ && (IsSynchronize != state())) { + if (itemType == IsRun || itemType == IsLumi || itemType == IsEvent) { + if (!secondaryFileSequence_->containedInCurrentFile(run, lumi, event)) { return IsSynchronize; } } @@ -273,48 +256,37 @@ namespace edm { return runHelper_->nextItemType(state(), itemType); } - std::pair - PoolSource::resourceSharedWithDelayedReader_() { + std::pair PoolSource::resourceSharedWithDelayedReader_() { return std::make_pair(resourceSharedWithDelayedReaderPtr_.get(), mutexSharedWithDelayedReader_.get()); } // Rewind to before the first event that was read. - void - PoolSource::rewind_() { - primaryFileSequence_->rewind_(); - } + void PoolSource::rewind_() { primaryFileSequence_->rewind_(); } // Advance "offset" events. Offset can be positive or negative (or zero). - void - PoolSource::skip(int offset) { - primaryFileSequence_->skipEvents(offset); - } + void PoolSource::skip(int offset) { primaryFileSequence_->skipEvents(offset); } - bool - PoolSource::goToEvent_(EventID const& eventID) { - return primaryFileSequence_->goToEvent(eventID); - } - - void - PoolSource::fillDescriptions(ConfigurationDescriptions & descriptions) { + bool PoolSource::goToEvent_(EventID const& eventID) { return primaryFileSequence_->goToEvent(eventID); } + void PoolSource::fillDescriptions(ConfigurationDescriptions& descriptions) { ParameterSetDescription desc; std::vector defaultStrings; desc.setComment("Reads EDM/Root files."); - desc.addUntracked >("fileNames") - ->setComment("Names of files to be processed."); + desc.addUntracked >("fileNames")->setComment("Names of files to be processed."); desc.addUntracked >("secondaryFileNames", defaultStrings) ->setComment("Names of secondary files to be processed."); desc.addUntracked("needSecondaryFileNames", false) ->setComment("If True, 'secondaryFileNames' must be specified and be non-empty."); desc.addUntracked("overrideCatalog", std::string()); desc.addUntracked("skipBadFiles", false) - ->setComment("True: Ignore any missing or unopenable input file.\n" - "False: Throw exception if missing or unopenable input file."); + ->setComment( + "True: Ignore any missing or unopenable input file.\n" + "False: Throw exception if missing or unopenable input file."); desc.addUntracked("bypassVersionCheck", false) - ->setComment("True: Bypass release version check.\n" - "False: Throw exception if reading file in a release prior to the release in which the file was written."); + ->setComment( + "True: Bypass release version check.\n" + "False: Throw exception if reading file in a release prior to the release in which the file was written."); desc.addUntracked("treeMaxVirtualSize", -1) ->setComment("Size of ROOT TTree TBasket cache. Affects performance."); desc.addUntracked("dropDescendantsOfDroppedBranches", true) @@ -329,18 +301,9 @@ namespace edm { descriptions.add("source", desc); } - bool - PoolSource::randomAccess_() const { - return true; - } + bool PoolSource::randomAccess_() const { return true; } - ProcessingController::ForwardState - PoolSource::forwardState_() const { - return primaryFileSequence_->forwardState(); - } + ProcessingController::ForwardState PoolSource::forwardState_() const { return primaryFileSequence_->forwardState(); } - ProcessingController::ReverseState - PoolSource::reverseState_() const { - return primaryFileSequence_->reverseState(); - } -} + ProcessingController::ReverseState PoolSource::reverseState_() const { return primaryFileSequence_->reverseState(); } +} // namespace edm diff --git a/IOPool/Input/src/RootEmbeddedFileSequence.cc b/IOPool/Input/src/RootEmbeddedFileSequence.cc index 520846243ccda..37dacb29cb2b7 100644 --- a/IOPool/Input/src/RootEmbeddedFileSequence.cc +++ b/IOPool/Input/src/RootEmbeddedFileSequence.cc @@ -23,51 +23,49 @@ namespace edm { class EventPrincipal; - RootEmbeddedFileSequence::RootEmbeddedFileSequence( - ParameterSet const& pset, - EmbeddedRootSource& input, - InputFileCatalog const& catalog) : - RootInputFileSequence(pset, catalog), - input_(input), - orderedProcessHistoryIDs_(), - sequential_(pset.getUntrackedParameter("sequential", false)), - sameLumiBlock_(pset.getUntrackedParameter("sameLumiBlock", false)), - fptr_(nullptr), - eventsRemainingInFile_(0), - // The default value provided as the second argument to the getUntrackedParameter function call - // is not used when the ParameterSet has been validated and the parameters are not optional - // in the description. This is currently true when PoolSource is the primary input source. - // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function - // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource - // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can - // and should be deleted from the code. - initialNumberOfEventsToSkip_(pset.getUntrackedParameter("skipEvents", 0U)), - treeCacheSize_(pset.getUntrackedParameter("cacheSize", roottree::defaultCacheSize)), - enablePrefetching_(false), - enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName", false)) { - - if(noFiles()) { + RootEmbeddedFileSequence::RootEmbeddedFileSequence(ParameterSet const& pset, + EmbeddedRootSource& input, + InputFileCatalog const& catalog) + : RootInputFileSequence(pset, catalog), + input_(input), + orderedProcessHistoryIDs_(), + sequential_(pset.getUntrackedParameter("sequential", false)), + sameLumiBlock_(pset.getUntrackedParameter("sameLumiBlock", false)), + fptr_(nullptr), + eventsRemainingInFile_(0), + // The default value provided as the second argument to the getUntrackedParameter function call + // is not used when the ParameterSet has been validated and the parameters are not optional + // in the description. This is currently true when PoolSource is the primary input source. + // The modules that use PoolSource as a SecSource have not defined their fillDescriptions function + // yet, so the ParameterSet does not get validated yet. As soon as all the modules with a SecSource + // have defined descriptions, the defaults in the getUntrackedParameterSet function calls can + // and should be deleted from the code. + initialNumberOfEventsToSkip_(pset.getUntrackedParameter("skipEvents", 0U)), + treeCacheSize_(pset.getUntrackedParameter("cacheSize", roottree::defaultCacheSize)), + enablePrefetching_(false), + enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName", false)) { + if (noFiles()) { throw Exception(errors::Configuration) << "RootEmbeddedFileSequence no input files specified for secondary input source.\n"; } // // The SiteLocalConfig controls the TTreeCache size and the prefetching settings. Service pSLC; - if(pSLC.isAvailable()) { - if(treeCacheSize_ != 0U && pSLC->sourceTTreeCacheSize()) { + if (pSLC.isAvailable()) { + if (treeCacheSize_ != 0U && pSLC->sourceTTreeCacheSize()) { treeCacheSize_ = *(pSLC->sourceTTreeCacheSize()); } enablePrefetching_ = pSLC->enablePrefetching(); } // Set the pointer to the function that reads an event. - if(sameLumiBlock_) { - if(sequential_) { + if (sameLumiBlock_) { + if (sequential_) { fptr_ = &RootEmbeddedFileSequence::readOneSequentialWithID; } else { fptr_ = &RootEmbeddedFileSequence::readOneRandomWithID; } } else { - if(sequential_) { + if (sequential_) { fptr_ = &RootEmbeddedFileSequence::readOneSequential; } else { fptr_ = &RootEmbeddedFileSequence::readOneRandom; @@ -75,15 +73,15 @@ namespace edm { } // For the secondary input source we do not stage in. - if(sequential_) { + if (sequential_) { // We open the first file - if(!atFirstFile()) { + if (!atFirstFile()) { setAtFirstFile(); initFile(false); } assert(rootFile()); rootFile()->setAtEventEntry(IndexIntoFile::invalidEntry); - if(!sameLumiBlock_) { + if (!sameLumiBlock_) { skipEntries(initialNumberOfEventsToSkip_); } } else { @@ -91,33 +89,29 @@ namespace edm { // We cannot use the random number service yet. std::ifstream f("/dev/urandom"); unsigned int seed; - f.read(reinterpret_cast(&seed), sizeof(seed)); + f.read(reinterpret_cast(&seed), sizeof(seed)); std::default_random_engine dre(seed); size_t count = numberOfFiles(); std::uniform_int_distribution distribution(0, count - 1); - while(!rootFile() && count != 0) { + while (!rootFile() && count != 0) { --count; int offset = distribution(dre); setAtFileSequenceNumber(offset); initFile(input_.skipBadFiles()); } } - if(rootFile()) { + if (rootFile()) { input_.productRegistryUpdate().updateFromInput(rootFile()->productRegistry()->productList()); } } - RootEmbeddedFileSequence::~RootEmbeddedFileSequence() { - } + RootEmbeddedFileSequence::~RootEmbeddedFileSequence() {} - void - RootEmbeddedFileSequence::endJob() { - closeFile_(); - } + void RootEmbeddedFileSequence::endJob() { closeFile(); } void RootEmbeddedFileSequence::closeFile_() { // delete the RootFile object. - if(rootFile()) { + if (rootFile()) { rootFile().reset(); } } @@ -126,36 +120,34 @@ namespace edm { initTheFile(skipBadFiles, false, nullptr, "mixingFiles", InputType::SecondarySource); } - RootEmbeddedFileSequence::RootFileSharedPtr - RootEmbeddedFileSequence::makeRootFile(std::shared_ptr filePtr) { + RootEmbeddedFileSequence::RootFileSharedPtr RootEmbeddedFileSequence::makeRootFile( + std::shared_ptr filePtr) { size_t currentIndexIntoFile = sequenceNumberOfFile(); - return std::make_shared( - fileName(), - ProcessConfiguration(), - logicalFileName(), - filePtr, - input_.nStreams(), - treeCacheSize_, - input_.treeMaxVirtualSize(), - input_.runHelper(), - input_.productSelectorRules(), - InputType::SecondarySource, - input_.processHistoryRegistryForUpdate(), - indexesIntoFiles(), - currentIndexIntoFile, - orderedProcessHistoryIDs_, - input_.bypassVersionCheck(), - enablePrefetching_, - enforceGUIDInFileName_); + return std::make_shared(fileName(), + ProcessConfiguration(), + logicalFileName(), + filePtr, + input_.nStreams(), + treeCacheSize_, + input_.treeMaxVirtualSize(), + input_.runHelper(), + input_.productSelectorRules(), + InputType::SecondarySource, + input_.processHistoryRegistryForUpdate(), + indexesIntoFiles(), + currentIndexIntoFile, + orderedProcessHistoryIDs_, + input_.bypassVersionCheck(), + enablePrefetching_, + enforceGUIDInFileName_); } - void - RootEmbeddedFileSequence::skipEntries(unsigned int offset) { + void RootEmbeddedFileSequence::skipEntries(unsigned int offset) { // offset is decremented by the number of events actually skipped. bool completed = rootFile()->skipEntries(offset); - while(!completed) { + while (!completed) { setAtNextFile(); - if(noMoreFiles()) { + if (noMoreFiles()) { setAtFirstFile(); } initFile(false); @@ -165,14 +157,14 @@ namespace edm { } } - bool - RootEmbeddedFileSequence::readOneSequential(EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const*, bool recycleFiles) { + bool RootEmbeddedFileSequence::readOneSequential( + EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const*, bool recycleFiles) { assert(rootFile()); rootFile()->nextEventEntry(); bool found = rootFile()->readCurrentEvent(cache); - if(!found) { + if (!found) { setAtNextFile(); - if(noMoreFiles()) { + if (noMoreFiles()) { if (recycleFiles) { setAtFirstFile(); } else { @@ -188,39 +180,38 @@ namespace edm { return true; } - bool - RootEmbeddedFileSequence::readOneSequentialWithID(EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const* idp, bool recycleFiles) { + bool RootEmbeddedFileSequence::readOneSequentialWithID( + EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const* idp, bool recycleFiles) { assert(idp); EventID const& id = *idp; int offset = initialNumberOfEventsToSkip_; initialNumberOfEventsToSkip_ = 0; - if(offset > 0) { + if (offset > 0) { assert(rootFile()); - while(offset > 0) { + while (offset > 0) { bool found = readOneSequentialWithID(cache, fileNameHash, nullptr, idp, recycleFiles); - if(!found) { + if (!found) { return false; } --offset; } } assert(rootFile()); - if(noMoreFiles() || - rootFile()->indexIntoFileIter().run() != id.run() || + if (noMoreFiles() || rootFile()->indexIntoFileIter().run() != id.run() || rootFile()->indexIntoFileIter().lumi() != id.luminosityBlock()) { bool found = skipToItem(id.run(), id.luminosityBlock(), 0, 0, false); - if(!found) { + if (!found) { return false; } } assert(rootFile()); bool found = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock()); - if(found) { + if (found) { found = rootFile()->readCurrentEvent(cache); } - if(!found) { + if (!found) { found = skipToItemInNewFile(id.run(), id.luminosityBlock(), 0); - if(!found) { + if (!found) { return false; } return readOneSequentialWithID(cache, fileNameHash, nullptr, idp, recycleFiles); @@ -229,48 +220,48 @@ namespace edm { return true; } - void - RootEmbeddedFileSequence::readOneSpecified(EventPrincipal& cache, size_t& fileNameHash, SecondaryEventIDAndFileInfo const& idx) { + void RootEmbeddedFileSequence::readOneSpecified(EventPrincipal& cache, + size_t& fileNameHash, + SecondaryEventIDAndFileInfo const& idx) { EventID const& id = idx.eventID(); bool found = skipToItem(id.run(), id.luminosityBlock(), id.event(), idx.fileNameHash()); - if(!found) { - throw Exception(errors::NotFound) << - "RootEmbeddedFileSequence::readOneSpecified(): Secondary Input files" << - " do not contain specified event:\n" << id << "\n"; + if (!found) { + throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneSpecified(): Secondary Input files" + << " do not contain specified event:\n" + << id << "\n"; } assert(rootFile()); found = rootFile()->readCurrentEvent(cache); assert(found); fileNameHash = idx.fileNameHash(); - if(fileNameHash == 0U) { + if (fileNameHash == 0U) { fileNameHash = lfnHash(); } } - bool - RootEmbeddedFileSequence::readOneRandom(EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine* engine, EventID const*, bool) { + bool RootEmbeddedFileSequence::readOneRandom( + EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine* engine, EventID const*, bool) { assert(rootFile()); assert(engine); unsigned int currentSeqNumber = sequenceNumberOfFile(); - while(eventsRemainingInFile_ == 0) { - + while (eventsRemainingInFile_ == 0) { unsigned int newSeqNumber = CLHEP::RandFlat::shootInt(engine, fileCatalogItems().size()); setAtFileSequenceNumber(newSeqNumber); - if(newSeqNumber != currentSeqNumber) { + if (newSeqNumber != currentSeqNumber) { initFile(false); currentSeqNumber = newSeqNumber; } eventsRemainingInFile_ = rootFile()->eventTree().entries(); - if(eventsRemainingInFile_ == 0) { - throw Exception(errors::NotFound) << - "RootEmbeddedFileSequence::readOneRandom(): Secondary Input file " << fileName() << " contains no events.\n"; + if (eventsRemainingInFile_ == 0) { + throw Exception(errors::NotFound) << "RootEmbeddedFileSequence::readOneRandom(): Secondary Input file " + << fileName() << " contains no events.\n"; } rootFile()->setAtEventEntry(CLHEP::RandFlat::shootInt(engine, eventsRemainingInFile_) - 1); } rootFile()->nextEventEntry(); bool found = rootFile()->readCurrentEvent(cache); - if(!found) { + if (!found) { rootFile()->setAtEventEntry(0); bool found = rootFile()->readCurrentEvent(cache); assert(found); @@ -280,37 +271,40 @@ namespace edm { return true; } - bool - RootEmbeddedFileSequence::readOneRandomWithID(EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine* engine, EventID const* idp, bool recycleFiles) { + bool RootEmbeddedFileSequence::readOneRandomWithID(EventPrincipal& cache, + size_t& fileNameHash, + CLHEP::HepRandomEngine* engine, + EventID const* idp, + bool recycleFiles) { assert(engine); assert(idp); EventID const& id = *idp; - if(noMoreFiles() || !rootFile() || - rootFile()->indexIntoFileIter().run() != id.run() || + if (noMoreFiles() || !rootFile() || rootFile()->indexIntoFileIter().run() != id.run() || rootFile()->indexIntoFileIter().lumi() != id.luminosityBlock()) { bool found = skipToItem(id.run(), id.luminosityBlock(), 0); - if(!found) { + if (!found) { return false; } int eventsInLumi = 0; assert(rootFile()); - while(rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock())) ++eventsInLumi; + while (rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock())) + ++eventsInLumi; found = skipToItem(id.run(), id.luminosityBlock(), 0); assert(found); int eventInLumi = CLHEP::RandFlat::shootInt(engine, eventsInLumi); - for(int i = 0; i < eventInLumi; ++i) { + for (int i = 0; i < eventInLumi; ++i) { bool found = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock()); assert(found); } } assert(rootFile()); bool found = rootFile()->setEntryAtNextEventInLumi(id.run(), id.luminosityBlock()); - if(found) { + if (found) { found = rootFile()->readCurrentEvent(cache); } - if(!found) { + if (!found) { bool found = rootFile()->setEntryAtItem(id.run(), id.luminosityBlock(), 0); - if(!found) { + if (!found) { return false; } return readOneRandomWithID(cache, fileNameHash, engine, idp, recycleFiles); @@ -319,23 +313,28 @@ namespace edm { return true; } - bool - RootEmbeddedFileSequence::readOneEvent(EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine* engine, EventID const* id, bool recycleFiles) { + bool RootEmbeddedFileSequence::readOneEvent(EventPrincipal& cache, + size_t& fileNameHash, + CLHEP::HepRandomEngine* engine, + EventID const* id, + bool recycleFiles) { assert(!sameLumiBlock_ || id != nullptr); assert(sequential_ || engine != nullptr); return (this->*fptr_)(cache, fileNameHash, engine, id, recycleFiles); } - void - RootEmbeddedFileSequence::fillDescription(ParameterSetDescription & desc) { + void RootEmbeddedFileSequence::fillDescription(ParameterSetDescription& desc) { desc.addUntracked("sequential", false) - ->setComment("True: loopEvents() reads events sequentially from beginning of first file.\n" - "False: loopEvents() first reads events beginning at random event. New files also chosen randomly"); + ->setComment( + "True: loopEvents() reads events sequentially from beginning of first file.\n" + "False: loopEvents() first reads events beginning at random event. New files also chosen randomly"); desc.addUntracked("sameLumiBlock", false) - ->setComment("True: loopEvents() reads events only in same lumi as the specified event.\n" - "False: loopEvents() reads events regardless of lumi."); + ->setComment( + "True: loopEvents() reads events only in same lumi as the specified event.\n" + "False: loopEvents() reads events regardless of lumi."); desc.addUntracked("skipEvents", 0U) - ->setComment("Skip the first 'skipEvents' events. Used only if 'sequential' is True and 'sameLumiBlock' is False"); + ->setComment( + "Skip the first 'skipEvents' events. Used only if 'sequential' is True and 'sameLumiBlock' is False"); desc.addUntracked("cacheSize", roottree::defaultCacheSize) ->setComment("Size of ROOT TTree prefetch cache. Affects performance."); desc.addUntracked("enforceGUIDInFileName", false) @@ -343,4 +342,4 @@ namespace edm { "True: file name part is required to be equal to the GUID of the file\n" "False: file name can be anything"); } -} +} // namespace edm diff --git a/IOPool/Input/src/RootEmbeddedFileSequence.h b/IOPool/Input/src/RootEmbeddedFileSequence.h index 62b6bb0e0b8df..07ffc17859e7d 100644 --- a/IOPool/Input/src/RootEmbeddedFileSequence.h +++ b/IOPool/Input/src/RootEmbeddedFileSequence.h @@ -39,7 +39,6 @@ namespace edm { RootEmbeddedFileSequence(RootEmbeddedFileSequence const&) = delete; // Disallow copying and moving RootEmbeddedFileSequence& operator=(RootEmbeddedFileSequence const&) = delete; // Disallow copying and moving - void closeFile_() override; void endJob(); void skipEntries(unsigned int offset); bool readOneEvent(EventPrincipal& cache, size_t& fileNameHash, CLHEP::HepRandomEngine*, EventID const* id, bool recycleFiles); @@ -51,6 +50,7 @@ namespace edm { static void fillDescription(ParameterSetDescription & desc); private: + void closeFile_() override; void initFile_(bool skipBadFiles) override; RootFileSharedPtr makeRootFile(std::shared_ptr filePtr) override; diff --git a/IOPool/Input/src/RootInputFileSequence.cc b/IOPool/Input/src/RootInputFileSequence.cc index 469a6d717d6cb..6bba8eb2a2fd9 100644 --- a/IOPool/Input/src/RootInputFileSequence.cc +++ b/IOPool/Input/src/RootInputFileSequence.cc @@ -10,6 +10,8 @@ #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "Utilities/StorageFactory/interface/StorageFactory.h" +#include "Utilities/StorageFactory/interface/StatisticsSenderService.h" +#include "FWCore/ServiceRegistry/interface/Service.h" #include "TSystem.h" @@ -19,62 +21,51 @@ namespace edm { class LuminosityBlockPrincipal; class RunPrincipal; - RootInputFileSequence::RootInputFileSequence( - ParameterSet const& pset, - InputFileCatalog const& catalog) : - catalog_(catalog), - lfn_("unknown"), - lfnHash_(0U), - usedFallback_(false), - findFileForSpecifiedID_(nullptr), - fileIterBegin_(fileCatalogItems().begin()), - fileIterEnd_(fileCatalogItems().end()), - fileIter_(fileIterEnd_), - fileIterLastOpened_(fileIterEnd_), - rootFile_(), - indexesIntoFiles_(fileCatalogItems().size()) { - } + RootInputFileSequence::RootInputFileSequence(ParameterSet const& pset, InputFileCatalog const& catalog) + : catalog_(catalog), + lfn_("unknown"), + lfnHash_(0U), + usedFallback_(false), + findFileForSpecifiedID_(nullptr), + fileIterBegin_(fileCatalogItems().begin()), + fileIterEnd_(fileCatalogItems().end()), + fileIter_(fileIterEnd_), + fileIterLastOpened_(fileIterEnd_), + rootFile_(), + indexesIntoFiles_(fileCatalogItems().size()) {} - std::vector const& - RootInputFileSequence::fileCatalogItems() const { + std::vector const& RootInputFileSequence::fileCatalogItems() const { return catalog_.fileCatalogItems(); } - std::shared_ptr - RootInputFileSequence::fileProductRegistry() const { + std::shared_ptr RootInputFileSequence::fileProductRegistry() const { assert(rootFile()); return rootFile()->productRegistry(); } - std::shared_ptr - RootInputFileSequence::fileBranchIDListHelper() const { + std::shared_ptr RootInputFileSequence::fileBranchIDListHelper() const { assert(rootFile()); return rootFile()->branchIDListHelper(); } - RootInputFileSequence::~RootInputFileSequence() { - } + RootInputFileSequence::~RootInputFileSequence() {} - std::shared_ptr - RootInputFileSequence::readRunAuxiliary_() { + std::shared_ptr RootInputFileSequence::readRunAuxiliary_() { assert(rootFile()); return rootFile()->readRunAuxiliary_(); } - std::shared_ptr - RootInputFileSequence::readLuminosityBlockAuxiliary_() { + std::shared_ptr RootInputFileSequence::readLuminosityBlockAuxiliary_() { assert(rootFile()); return rootFile()->readLuminosityBlockAuxiliary_(); } - void - RootInputFileSequence::readRun_(RunPrincipal& runPrincipal) { + void RootInputFileSequence::readRun_(RunPrincipal& runPrincipal) { assert(rootFile()); rootFile()->readRun_(runPrincipal); } - void - RootInputFileSequence::readLuminosityBlock_(LuminosityBlockPrincipal& lumiPrincipal) { + void RootInputFileSequence::readLuminosityBlock_(LuminosityBlockPrincipal& lumiPrincipal) { assert(rootFile()); rootFile()->readLuminosityBlock_(lumiPrincipal); } @@ -92,42 +83,46 @@ namespace edm { // when it is asked to do so. // - void - RootInputFileSequence::readEvent(EventPrincipal& eventPrincipal) { + void RootInputFileSequence::readEvent(EventPrincipal& eventPrincipal) { assert(rootFile()); rootFile()->readEvent(eventPrincipal); } - bool - RootInputFileSequence::containedInCurrentFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event) const { - if(!rootFile()) return false; + bool RootInputFileSequence::containedInCurrentFile(RunNumber_t run, + LuminosityBlockNumber_t lumi, + EventNumber_t event) const { + if (!rootFile()) + return false; return rootFile()->containsItem(run, lumi, event); } - bool - RootInputFileSequence::skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash) { + bool RootInputFileSequence::skipToItemInNewFile(RunNumber_t run, + LuminosityBlockNumber_t lumi, + EventNumber_t event, + size_t fileNameHash) { // Look for item in files not yet opened. We have a hash of the logical file name assert(fileNameHash != 0U); - // If the lookup table is not yet filled in, fill it. - if(!findFileForSpecifiedID_) { + // If the lookup table is not yet filled in, fill it. + if (!findFileForSpecifiedID_) { // We use a multimap because there may be hash collisions (Two different LFNs could have the same hash). // We map the hash of the LFN to the index into the list of files. - findFileForSpecifiedID_ = std::make_unique>(); // propagate_const has no reset() function + findFileForSpecifiedID_ = + std::make_unique>(); // propagate_const has no reset() function auto hasher = std::hash(); - for(auto fileIter = fileIterBegin_; fileIter != fileIterEnd_; ++fileIter) { + for (auto fileIter = fileIterBegin_; fileIter != fileIterEnd_; ++fileIter) { findFileForSpecifiedID_->insert(std::make_pair(hasher(fileIter->logicalFileName()), fileIter - fileIterBegin_)); } } // Look up the logical file name in the table auto range = findFileForSpecifiedID_->equal_range(fileNameHash); - for(auto iter = range.first; iter != range.second; ++iter) { + for (auto iter = range.first; iter != range.second; ++iter) { // Don't look in files previously opened, because those have already been searched. - if(!indexesIntoFiles_[iter->second]) { + if (!indexesIntoFiles_[iter->second]) { setAtFileSequenceNumber(iter->second); initFile_(false); assert(rootFile()); bool found = rootFile()->setEntryAtItem(run, lumi, event); - if(found) { + if (found) { return true; } } @@ -136,17 +131,16 @@ namespace edm { return false; } - bool - RootInputFileSequence::skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event) { + bool RootInputFileSequence::skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event) { // Look for item in files not yet opened. We do not have a valid hash of the logical file name. - for(auto it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) { - if(!*it) { + for (auto it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) { + if (!*it) { // File not yet opened. setAtFileSequenceNumber(it - indexesIntoFiles_.begin()); initFile_(false); assert(rootFile()); bool found = rootFile()->setEntryAtItem(run, lumi, event); - if(found) { + if (found) { return true; } } @@ -155,22 +149,22 @@ namespace edm { return false; } - bool - RootInputFileSequence::skipToItem(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash, bool currentFileFirst) { + bool RootInputFileSequence::skipToItem( + RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash, bool currentFileFirst) { // Attempt to find item in currently open input file. bool found = currentFileFirst && rootFile() && rootFile()->setEntryAtItem(run, lumi, event); - if(!found) { + if (!found) { // If only one input file, give up now, to save time. - if(currentFileFirst && rootFile() && indexesIntoFiles_.size() == 1) { + if (currentFileFirst && rootFile() && indexesIntoFiles_.size() == 1) { return false; } // Look for item (run/lumi/event) in files previously opened without reopening unnecessary files. - for(auto it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) { - if(*it && (*it)->containsItem(run, lumi, event)) { + for (auto it = indexesIntoFiles_.begin(), itEnd = indexesIntoFiles_.end(); it != itEnd; ++it) { + if (*it && (*it)->containsItem(run, lumi, event)) { // We found it. Close the currently open file, and open the correct one. std::vector::const_iterator currentIter = fileIter_; setAtFileSequenceNumber(it - indexesIntoFiles_.begin()); - if(fileIter_ != currentIter) { + if (fileIter_ != currentIter) { initFile(false); } // Now get the item from the correct file. @@ -180,46 +174,45 @@ namespace edm { return true; } } - return (fileNameHash != 0U && skipToItemInNewFile(run, lumi, event, fileNameHash)) || skipToItemInNewFile(run, lumi, event); + return (fileNameHash != 0U && skipToItemInNewFile(run, lumi, event, fileNameHash)) || + skipToItemInNewFile(run, lumi, event); } return true; } - void - RootInputFileSequence::initTheFile(bool skipBadFiles, - bool deleteIndexIntoFile, - InputSource* input, - char const* inputTypeName, - InputType inputType) { + void RootInputFileSequence::initTheFile( + bool skipBadFiles, bool deleteIndexIntoFile, InputSource* input, char const* inputTypeName, InputType inputType) { // We are really going to close the open file. - if(fileIterLastOpened_ != fileIterEnd_) { + if (fileIterLastOpened_ != fileIterEnd_) { size_t currentIndexIntoFile = fileIterLastOpened_ - fileIterBegin_; - if(deleteIndexIntoFile) { + if (deleteIndexIntoFile) { indexesIntoFiles_[currentIndexIntoFile].reset(); } else { - if(indexesIntoFiles_[currentIndexIntoFile]) indexesIntoFiles_[currentIndexIntoFile]->inputFileClosed(); + if (indexesIntoFiles_[currentIndexIntoFile]) + indexesIntoFiles_[currentIndexIntoFile]->inputFileClosed(); } fileIterLastOpened_ = fileIterEnd_; } - closeFile_(); + closeFile(); - if(noMoreFiles()) { + if (noMoreFiles()) { // No files specified return; } // Check if the logical file name was found. - if(fileName().empty()) { + if (fileName().empty()) { // LFN not found in catalog. InputFile::reportSkippedFile(fileName(), logicalFileName()); - if(!skipBadFiles) { + if (!skipBadFiles) { throw cms::Exception("LogicalFileNameNotFound", "RootFileSequenceBase::initTheFile()\n") - << "Logical file name '" << logicalFileName() << "' was not found in the file catalog.\n" - << "If you wanted a local file, you forgot the 'file:' prefix\n" - << "before the file name in your configuration file.\n"; + << "Logical file name '" << logicalFileName() << "' was not found in the file catalog.\n" + << "If you wanted a local file, you forgot the 'file:' prefix\n" + << "before the file name in your configuration file.\n"; } - LogWarning("") << "Input logical file: " << logicalFileName() << " was not found in the catalog, and will be skipped.\n"; + LogWarning("") << "Input logical file: " << logicalFileName() + << " was not found in the catalog, and will be skipped.\n"; return; } @@ -233,64 +226,68 @@ namespace edm { std::shared_ptr filePtr; std::list originalInfo; - try { - std::unique_ptr sentry(input ? std::make_unique(*input, lfn_, usedFallback_) : nullptr); - std::unique_ptr name(gSystem->ExpandPathName(fileName().c_str()));; - filePtr = std::make_shared(name.get(), " Initiating request to open file ", inputType); - } - catch (cms::Exception const& e) { - if(!skipBadFiles) { - if(hasFallbackUrl) { - std::ostringstream out; - out << e.explainSelf(); - - std::unique_ptr name(gSystem->ExpandPathName(fallbackFileName().c_str())); - std::string pfn(name.get()); - InputFile::reportFallbackAttempt(pfn, logicalFileName(), out.str()); - originalInfo = e.additionalInfo(); - } else { - InputFile::reportSkippedFile(fileName(), logicalFileName()); - Exception ex(errors::FileOpenError, "", e); - ex.addContext("Calling RootFileSequenceBase::initTheFile()"); - std::ostringstream out; - out << "Input file " << fileName() << " could not be opened."; - ex.addAdditionalInfo(out.str()); - throw ex; - } + { + std::unique_ptr sentry( + input ? std::make_unique(*input, lfn_, false) : nullptr); + edm::Service service; + if (service.isAvailable()) { + service->openingFile(lfn(), inputType, -1); } - } - if(!filePtr && (hasFallbackUrl)) { try { - usedFallback_ = true; - std::unique_ptr sentry(input ? std::make_unique(*input, lfn_, usedFallback_) : nullptr); - std::unique_ptr fallbackFullName(gSystem->ExpandPathName(fallbackFileName().c_str())); - filePtr.reset(new InputFile(fallbackFullName.get(), " Fallback request to file ", inputType)); - } - catch (cms::Exception const& e) { - if(!skipBadFiles) { - InputFile::reportSkippedFile(fileName(), logicalFileName()); - Exception ex(errors::FallbackFileOpenError, "", e); - ex.addContext("Calling RootFileSequenceBase::initTheFile()"); - std::ostringstream out; - out << "Input file " << fileName() << " could not be opened.\n"; - out << "Fallback Input file " << fallbackFileName() << " also could not be opened."; - if (!originalInfo.empty()) { - out << std::endl << "Original exception info is above; fallback exception info is below."; - ex.addAdditionalInfo(out.str()); - for (auto const & s : originalInfo) { - ex.addAdditionalInfo(s); - } + std::unique_ptr name(gSystem->ExpandPathName(fileName().c_str())); + filePtr = std::make_shared(name.get(), " Initiating request to open file ", inputType); + } catch (cms::Exception const& e) { + if (!skipBadFiles) { + if (hasFallbackUrl) { + std::ostringstream out; + out << e.explainSelf(); + + std::unique_ptr name(gSystem->ExpandPathName(fallbackFileName().c_str())); + std::string pfn(name.get()); + InputFile::reportFallbackAttempt(pfn, logicalFileName(), out.str()); + originalInfo = e.additionalInfo(); } else { + InputFile::reportSkippedFile(fileName(), logicalFileName()); + Exception ex(errors::FileOpenError, "", e); + ex.addContext("Calling RootFileSequenceBase::initTheFile()"); + std::ostringstream out; + out << "Input file " << fileName() << " could not be opened."; ex.addAdditionalInfo(out.str()); + throw ex; + } + } + } + if (!filePtr && (hasFallbackUrl)) { + try { + usedFallback_ = true; + std::unique_ptr fallbackFullName(gSystem->ExpandPathName(fallbackFileName().c_str())); + filePtr.reset(new InputFile(fallbackFullName.get(), " Fallback request to file ", inputType)); + } catch (cms::Exception const& e) { + if (!skipBadFiles) { + InputFile::reportSkippedFile(fileName(), logicalFileName()); + Exception ex(errors::FallbackFileOpenError, "", e); + ex.addContext("Calling RootFileSequenceBase::initTheFile()"); + std::ostringstream out; + out << "Input file " << fileName() << " could not be opened.\n"; + out << "Fallback Input file " << fallbackFileName() << " also could not be opened."; + if (!originalInfo.empty()) { + out << std::endl << "Original exception info is above; fallback exception info is below."; + ex.addAdditionalInfo(out.str()); + for (auto const& s : originalInfo) { + ex.addAdditionalInfo(s); + } + } else { + ex.addAdditionalInfo(out.str()); + } + throw ex; } - throw ex; } } } - if(filePtr) { + if (filePtr) { size_t currentIndexIntoFile = fileIter_ - fileIterBegin_; rootFile_ = makeRootFile(filePtr); - if(input) { + if (input) { rootFile_->setSignals(&(input->preEventReadFromSourceSignal_), &(input->postEventReadFromSourceSignal_)); } assert(rootFile_); @@ -299,17 +296,24 @@ namespace edm { rootFile_->reportOpened(inputTypeName); } else { InputFile::reportSkippedFile(fileName(), logicalFileName()); - if(!skipBadFiles) { - throw Exception(errors::FileOpenError) << - "RootFileSequenceBase::initTheFile(): Input file " << fileName() << " was not found or could not be opened.\n"; + if (!skipBadFiles) { + throw Exception(errors::FileOpenError) << "RootFileSequenceBase::initTheFile(): Input file " << fileName() + << " was not found or could not be opened.\n"; } LogWarning("") << "Input file: " << fileName() << " was not found or could not be opened, and will be skipped.\n"; } } - void - RootInputFileSequence::setIndexIntoFile(size_t index) { - indexesIntoFiles_[index] = rootFile()->indexIntoFileSharedPtr(); + void RootInputFileSequence::closeFile() { + edm::Service service; + if (rootFile() and service.isAvailable()) { + service->closedFile(lfn(), usedFallback()); + } + closeFile_(); + } + + void RootInputFileSequence::setIndexIntoFile(size_t index) { + indexesIntoFiles_[index] = rootFile()->indexIntoFileSharedPtr(); } -} +} // namespace edm diff --git a/IOPool/Input/src/RootInputFileSequence.h b/IOPool/Input/src/RootInputFileSequence.h index 5d82de0310260..ae132f6971eea 100644 --- a/IOPool/Input/src/RootInputFileSequence.h +++ b/IOPool/Input/src/RootInputFileSequence.h @@ -28,12 +28,11 @@ namespace edm { class RootInputFileSequence { public: - explicit RootInputFileSequence(ParameterSet const& pset, - InputFileCatalog const& catalog); + explicit RootInputFileSequence(ParameterSet const& pset, InputFileCatalog const& catalog); virtual ~RootInputFileSequence(); - RootInputFileSequence(RootInputFileSequence const&) = delete; // Disallow copying and moving - RootInputFileSequence& operator=(RootInputFileSequence const&) = delete; // Disallow copying and moving + RootInputFileSequence(RootInputFileSequence const&) = delete; // Disallow copying and moving + RootInputFileSequence& operator=(RootInputFileSequence const&) = delete; // Disallow copying and moving bool containedInCurrentFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event) const; void readEvent(EventPrincipal& cache); @@ -41,42 +40,54 @@ namespace edm { void readLuminosityBlock_(LuminosityBlockPrincipal& lumiPrincipal); std::shared_ptr readRunAuxiliary_(); void readRun_(RunPrincipal& runPrincipal); - bool skipToItem(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash = 0U, bool currentFileFirst = true); + bool skipToItem(RunNumber_t run, + LuminosityBlockNumber_t lumi, + EventNumber_t event, + size_t fileNameHash = 0U, + bool currentFileFirst = true); std::shared_ptr fileProductRegistry() const; std::shared_ptr fileBranchIDListHelper() const; + + void closeFile(); + protected: typedef std::shared_ptr RootFileSharedPtr; - void initFile(bool skipBadFiles) {initFile_(skipBadFiles);} - void initTheFile(bool skipBadFiles, bool deleteIndexIntoFile, InputSource* input, char const* inputTypeName, InputType inputType); + void initFile(bool skipBadFiles) { initFile_(skipBadFiles); } + void initTheFile(bool skipBadFiles, + bool deleteIndexIntoFile, + InputSource* input, + char const* inputTypeName, + InputType inputType); bool skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event); bool skipToItemInNewFile(RunNumber_t run, LuminosityBlockNumber_t lumi, EventNumber_t event, size_t fileNameHash); - bool atFirstFile() const {return fileIter_ == fileIterBegin_;} - bool atLastFile() const {return fileIter_ + 1 == fileIterEnd_;} - bool noMoreFiles() const {return fileIter_ == fileIterEnd_;} - bool noFiles() const {return fileIterBegin_ == fileIterEnd_;} - size_t sequenceNumberOfFile() const {return fileIter_ - fileIterBegin_;} - size_t numberOfFiles() const {return fileIterEnd_ - fileIterBegin_;} - - void setAtFirstFile() {fileIter_ = fileIterBegin_;} - void setAtFileSequenceNumber(size_t offset) {fileIter_ = fileIterBegin_ + offset;} - void setNoMoreFiles() {fileIter_ = fileIterEnd_;} - void setAtNextFile() {++fileIter_;} - void setAtPreviousFile() {--fileIter_;} - - std::string const& fileName() const {return fileIter_->fileName();} - std::string const& logicalFileName() const {return fileIter_->logicalFileName();} - std::string const& fallbackFileName() const {return fileIter_->fallbackFileName();} - std::string const& lfn() const {return lfn_;} + bool atFirstFile() const { return fileIter_ == fileIterBegin_; } + bool atLastFile() const { return fileIter_ + 1 == fileIterEnd_; } + bool noMoreFiles() const { return fileIter_ == fileIterEnd_; } + bool noFiles() const { return fileIterBegin_ == fileIterEnd_; } + size_t sequenceNumberOfFile() const { return fileIter_ - fileIterBegin_; } + size_t numberOfFiles() const { return fileIterEnd_ - fileIterBegin_; } + + void setAtFirstFile() { fileIter_ = fileIterBegin_; } + void setAtFileSequenceNumber(size_t offset) { fileIter_ = fileIterBegin_ + offset; } + void setNoMoreFiles() { fileIter_ = fileIterEnd_; } + void setAtNextFile() { ++fileIter_; } + void setAtPreviousFile() { --fileIter_; } + + std::string const& fileName() const { return fileIter_->fileName(); } + std::string const& logicalFileName() const { return fileIter_->logicalFileName(); } + std::string const& fallbackFileName() const { return fileIter_->fallbackFileName(); } + std::string const& lfn() const { return lfn_; } std::vector const& fileCatalogItems() const; - std::vector > const& indexesIntoFiles() const {return indexesIntoFiles_;} + std::vector> const& indexesIntoFiles() const { return indexesIntoFiles_; } void setIndexIntoFile(size_t index); - size_t lfnHash() const {return lfnHash_;} - bool usedFallback() const {return usedFallback_;} + size_t lfnHash() const { return lfnHash_; } + bool usedFallback() const { return usedFallback_; } + + std::shared_ptr rootFile() const { return get_underlying_safe(rootFile_); } + std::shared_ptr& rootFile() { return get_underlying_safe(rootFile_); } - std::shared_ptr rootFile() const {return get_underlying_safe(rootFile_);} - std::shared_ptr& rootFile() {return get_underlying_safe(rootFile_);} private: InputFileCatalog const& catalog_; std::string lfn_; @@ -88,13 +99,13 @@ namespace edm { std::vector::const_iterator fileIter_; std::vector::const_iterator fileIterLastOpened_; edm::propagate_const rootFile_; - std::vector > indexesIntoFiles_; + std::vector> indexesIntoFiles_; private: - virtual RootFileSharedPtr makeRootFile(std::shared_ptr filePtr) = 0; + virtual RootFileSharedPtr makeRootFile(std::shared_ptr filePtr) = 0; virtual void initFile_(bool skipBadFiles) = 0; virtual void closeFile_() = 0; - }; // class RootInputFileSequence -} + }; // class RootInputFileSequence +} // namespace edm #endif diff --git a/IOPool/Input/src/RootPrimaryFileSequence.cc b/IOPool/Input/src/RootPrimaryFileSequence.cc index 7e3a8d1ba46ee..7492316373ee6 100644 --- a/IOPool/Input/src/RootPrimaryFileSequence.cc +++ b/IOPool/Input/src/RootPrimaryFileSequence.cc @@ -18,35 +18,35 @@ #include "Utilities/StorageFactory/interface/StorageFactory.h" namespace edm { - RootPrimaryFileSequence::RootPrimaryFileSequence( - ParameterSet const& pset, - PoolSource& input, - InputFileCatalog const& catalog) : - RootInputFileSequence(pset, catalog), - input_(input), - firstFile_(true), - branchesMustMatch_(BranchDescription::Permissive), - orderedProcessHistoryIDs_(), - eventSkipperByID_(EventSkipperByID::create(pset).release()), - initialNumberOfEventsToSkip_(pset.getUntrackedParameter("skipEvents")), - noEventSort_(pset.getUntrackedParameter("noEventSort")), - treeCacheSize_(noEventSort_ ? pset.getUntrackedParameter("cacheSize") : 0U), - duplicateChecker_(new DuplicateChecker(pset)), - usingGoToEvent_(false), - enablePrefetching_(false), - enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName")) { - + RootPrimaryFileSequence::RootPrimaryFileSequence(ParameterSet const& pset, + PoolSource& input, + InputFileCatalog const& catalog) + : RootInputFileSequence(pset, catalog), + input_(input), + firstFile_(true), + branchesMustMatch_(BranchDescription::Permissive), + orderedProcessHistoryIDs_(), + eventSkipperByID_(EventSkipperByID::create(pset).release()), + initialNumberOfEventsToSkip_(pset.getUntrackedParameter("skipEvents")), + noEventSort_(pset.getUntrackedParameter("noEventSort")), + treeCacheSize_(noEventSort_ ? pset.getUntrackedParameter("cacheSize") : 0U), + duplicateChecker_(new DuplicateChecker(pset)), + usingGoToEvent_(false), + enablePrefetching_(false), + enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName")) { // The SiteLocalConfig controls the TTreeCache size and the prefetching settings. Service pSLC; - if(pSLC.isAvailable()) { - if(treeCacheSize_ != 0U && pSLC->sourceTTreeCacheSize()) { + if (pSLC.isAvailable()) { + if (treeCacheSize_ != 0U && pSLC->sourceTTreeCacheSize()) { treeCacheSize_ = *(pSLC->sourceTTreeCacheSize()); } enablePrefetching_ = pSLC->enablePrefetching(); } - std::string branchesMustMatch = pset.getUntrackedParameter("branchesMustMatch", std::string("permissive")); - if(branchesMustMatch == std::string("strict")) branchesMustMatch_ = BranchDescription::Strict; + std::string branchesMustMatch = + pset.getUntrackedParameter("branchesMustMatch", std::string("permissive")); + if (branchesMustMatch == std::string("strict")) + branchesMustMatch_ = BranchDescription::Strict; // Prestage the files for (setAtFirstFile(); !noMoreFiles(); setAtNextFile()) { @@ -55,200 +55,193 @@ namespace edm { // Open the first file. for (setAtFirstFile(); !noMoreFiles(); setAtNextFile()) { initFile(input_.skipBadFiles()); - if(rootFile()) break; + if (rootFile()) + break; } - if(rootFile()) { + if (rootFile()) { input_.productRegistryUpdate().updateFromInput(rootFile()->productRegistry()->productList()); - if(initialNumberOfEventsToSkip_ != 0) { + if (initialNumberOfEventsToSkip_ != 0) { skipEvents(initialNumberOfEventsToSkip_); } } } - RootPrimaryFileSequence::~RootPrimaryFileSequence() { - } + RootPrimaryFileSequence::~RootPrimaryFileSequence() {} - void - RootPrimaryFileSequence::endJob() { - closeFile_(); - } + void RootPrimaryFileSequence::endJob() { closeFile(); } - std::unique_ptr - RootPrimaryFileSequence::readFile_() { - if(firstFile_) { + std::unique_ptr RootPrimaryFileSequence::readFile_() { + if (firstFile_) { // The first input file has already been opened. firstFile_ = false; - if(!rootFile()) { + if (!rootFile()) { initFile(input_.skipBadFiles()); } } else { - if(!nextFile()) { + if (!nextFile()) { assert(0); } } - if(!rootFile()) { + if (!rootFile()) { return std::make_unique(); } return rootFile()->createFileBlock(); } - void - RootPrimaryFileSequence::closeFile_() { + void RootPrimaryFileSequence::closeFile_() { // close the currently open file, if any, and delete the RootFile object. - if(rootFile()) { + if (rootFile()) { auto sentry = std::make_unique(input_, lfn(), usedFallback()); rootFile()->close(); - if(duplicateChecker_) duplicateChecker_->inputFileClosed(); + if (duplicateChecker_) + duplicateChecker_->inputFileClosed(); rootFile().reset(); } } - void - RootPrimaryFileSequence::initFile_(bool skipBadFiles) { + void RootPrimaryFileSequence::initFile_(bool skipBadFiles) { // If we are not duplicate checking across files and we are not using random access to find events, // then we can delete the IndexIntoFile for the file we are closing. // If we can't delete all of it, then we can delete the parts we do not need. - bool deleteIndexIntoFile = !usingGoToEvent_ && !(duplicateChecker_ && duplicateChecker_->checkingAllFiles() && !duplicateChecker_->checkDisabled()); + bool deleteIndexIntoFile = !usingGoToEvent_ && !(duplicateChecker_ && duplicateChecker_->checkingAllFiles() && + !duplicateChecker_->checkDisabled()); initTheFile(skipBadFiles, deleteIndexIntoFile, &input_, "primaryFiles", InputType::Primary); } - RootPrimaryFileSequence::RootFileSharedPtr - RootPrimaryFileSequence::makeRootFile(std::shared_ptr filePtr) { - size_t currentIndexIntoFile = sequenceNumberOfFile(); - return std::make_shared( - fileName(), - input_.processConfiguration(), - logicalFileName(), - filePtr, - eventSkipperByID(), - initialNumberOfEventsToSkip_ != 0, - remainingEvents(), - remainingLuminosityBlocks(), - input_.nStreams(), - treeCacheSize_, - input_.treeMaxVirtualSize(), - input_.processingMode(), - input_.runHelper(), - noEventSort_, - input_.productSelectorRules(), - InputType::Primary, - input_.branchIDListHelper(), - input_.thinnedAssociationsHelper(), - nullptr, // associationsFromSecondary - duplicateChecker(), - input_.dropDescendants(), - input_.processHistoryRegistryForUpdate(), - indexesIntoFiles(), - currentIndexIntoFile, - orderedProcessHistoryIDs_, - input_.bypassVersionCheck(), - input_.labelRawDataLikeMC(), - usingGoToEvent_, - enablePrefetching_, - enforceGUIDInFileName_); + RootPrimaryFileSequence::RootFileSharedPtr RootPrimaryFileSequence::makeRootFile(std::shared_ptr filePtr) { + size_t currentIndexIntoFile = sequenceNumberOfFile(); + return std::make_shared(fileName(), + input_.processConfiguration(), + logicalFileName(), + filePtr, + eventSkipperByID(), + initialNumberOfEventsToSkip_ != 0, + remainingEvents(), + remainingLuminosityBlocks(), + input_.nStreams(), + treeCacheSize_, + input_.treeMaxVirtualSize(), + input_.processingMode(), + input_.runHelper(), + noEventSort_, + input_.productSelectorRules(), + InputType::Primary, + input_.branchIDListHelper(), + input_.thinnedAssociationsHelper(), + nullptr, // associationsFromSecondary + duplicateChecker(), + input_.dropDescendants(), + input_.processHistoryRegistryForUpdate(), + indexesIntoFiles(), + currentIndexIntoFile, + orderedProcessHistoryIDs_, + input_.bypassVersionCheck(), + input_.labelRawDataLikeMC(), + usingGoToEvent_, + enablePrefetching_, + enforceGUIDInFileName_); } bool RootPrimaryFileSequence::nextFile() { - if(!noMoreFiles()) setAtNextFile(); - if(noMoreFiles()) { + if (!noMoreFiles()) + setAtNextFile(); + if (noMoreFiles()) { return false; } initFile(input_.skipBadFiles()); - if(rootFile()) { + if (rootFile()) { // make sure the new product registry is compatible with the main one - std::string mergeInfo = input_.productRegistryUpdate().merge(*rootFile()->productRegistry(), - fileName(), - branchesMustMatch_); - if(!mergeInfo.empty()) { - throw Exception(errors::MismatchedInputFiles,"RootPrimaryFileSequence::nextFile()") << mergeInfo; + std::string mergeInfo = + input_.productRegistryUpdate().merge(*rootFile()->productRegistry(), fileName(), branchesMustMatch_); + if (!mergeInfo.empty()) { + throw Exception(errors::MismatchedInputFiles, "RootPrimaryFileSequence::nextFile()") << mergeInfo; } } return true; } bool RootPrimaryFileSequence::previousFile() { - if(atFirstFile()) { + if (atFirstFile()) { return false; } setAtPreviousFile(); initFile(false); - if(rootFile()) { + if (rootFile()) { // make sure the new product registry is compatible to the main one - std::string mergeInfo = input_.productRegistryUpdate().merge(*rootFile()->productRegistry(), - fileName(), - branchesMustMatch_); - if(!mergeInfo.empty()) { - throw Exception(errors::MismatchedInputFiles,"RootPrimaryFileSequence::previousEvent()") << mergeInfo; + std::string mergeInfo = + input_.productRegistryUpdate().merge(*rootFile()->productRegistry(), fileName(), branchesMustMatch_); + if (!mergeInfo.empty()) { + throw Exception(errors::MismatchedInputFiles, "RootPrimaryFileSequence::previousEvent()") << mergeInfo; } } - if(rootFile()) rootFile()->setToLastEntry(); + if (rootFile()) + rootFile()->setToLastEntry(); return true; } - InputSource::ItemType - RootPrimaryFileSequence::getNextItemType(RunNumber_t& run, LuminosityBlockNumber_t& lumi, EventNumber_t& event) { - if(noMoreFiles()) { + InputSource::ItemType RootPrimaryFileSequence::getNextItemType(RunNumber_t& run, + LuminosityBlockNumber_t& lumi, + EventNumber_t& event) { + if (noMoreFiles()) { return InputSource::IsStop; } - if(firstFile_) { + if (firstFile_) { return InputSource::IsFile; } - if(rootFile()) { + if (rootFile()) { IndexIntoFile::EntryType entryType = rootFile()->getNextItemType(run, lumi, event); - if(entryType == IndexIntoFile::kEvent) { + if (entryType == IndexIntoFile::kEvent) { return InputSource::IsEvent; - } else if(entryType == IndexIntoFile::kLumi) { + } else if (entryType == IndexIntoFile::kLumi) { return InputSource::IsLumi; - } else if(entryType == IndexIntoFile::kRun) { + } else if (entryType == IndexIntoFile::kRun) { return InputSource::IsRun; } assert(entryType == IndexIntoFile::kEnd); } - if(atLastFile()) { + if (atLastFile()) { return InputSource::IsStop; } return InputSource::IsFile; } // Rewind to before the first event that was read. - void - RootPrimaryFileSequence::rewind_() { - if(!atFirstFile()) { - closeFile_(); + void RootPrimaryFileSequence::rewind_() { + if (!atFirstFile()) { + closeFile(); setAtFirstFile(); } - if(!rootFile()) { + if (!rootFile()) { initFile(false); } rewindFile(); firstFile_ = true; - if(rootFile()) { - if(initialNumberOfEventsToSkip_ != 0) { + if (rootFile()) { + if (initialNumberOfEventsToSkip_ != 0) { skipEvents(initialNumberOfEventsToSkip_); } } } // Rewind to the beginning of the current file - void - RootPrimaryFileSequence::rewindFile() { - if(rootFile()) rootFile()->rewind(); + void RootPrimaryFileSequence::rewindFile() { + if (rootFile()) + rootFile()->rewind(); } // Advance "offset" events. Offset can be positive or negative (or zero). - bool - RootPrimaryFileSequence::skipEvents(int offset) { + bool RootPrimaryFileSequence::skipEvents(int offset) { assert(rootFile()); - while(offset != 0) { + while (offset != 0) { bool atEnd = rootFile()->skipEvents(offset); - if((offset > 0 || atEnd) && !nextFile()) { + if ((offset > 0 || atEnd) && !nextFile()) { return false; } - if(offset < 0 && !previousFile()) { + if (offset < 0 && !previousFile()) { setNoMoreFiles(); return false; } @@ -256,15 +249,14 @@ namespace edm { return true; } - bool - RootPrimaryFileSequence::goToEvent(EventID const& eventID) { + bool RootPrimaryFileSequence::goToEvent(EventID const& eventID) { usingGoToEvent_ = true; - if(rootFile()) { - if(rootFile()->goToEvent(eventID)) { + if (rootFile()) { + if (rootFile()->goToEvent(eventID)) { return true; } // If only one input file, give up now, to save time. - if(rootFile() && indexesIntoFiles().size() == 1) { + if (rootFile() && indexesIntoFiles().size() == 1) { return false; } // Save the current file and position so that we can restore them @@ -274,8 +266,8 @@ namespace edm { IndexIntoFile::IndexIntoFileItr originalPosition = rootFile()->indexIntoFileIter(); // Look for item (run/lumi/event) in files previously opened without reopening unnecessary files. - for(auto it = indexesIntoFiles().begin(), itEnd = indexesIntoFiles().end(); it != itEnd; ++it) { - if(*it && (*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) { + for (auto it = indexesIntoFiles().begin(), itEnd = indexesIntoFiles().end(); it != itEnd; ++it) { + if (*it && (*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) { // We found it. Close the currently open file, and open the correct one. setAtFileSequenceNumber(it - indexesIntoFiles().begin()); initFile(false); @@ -287,20 +279,20 @@ namespace edm { } } // Look for item in files not yet opened. - for(auto it = indexesIntoFiles().begin(), itEnd = indexesIntoFiles().end(); it != itEnd; ++it) { - if(!*it) { + for (auto it = indexesIntoFiles().begin(), itEnd = indexesIntoFiles().end(); it != itEnd; ++it) { + if (!*it) { setAtFileSequenceNumber(it - indexesIntoFiles().begin()); initFile(false); closedOriginalFile = true; - if((*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) { + if ((*it)->containsItem(eventID.run(), eventID.luminosityBlock(), eventID.event())) { assert(rootFile()); - if(rootFile()->goToEvent(eventID)) { + if (rootFile()->goToEvent(eventID)) { return true; } } } } - if(closedOriginalFile) { + if (closedOriginalFile) { setAtFileSequenceNumber(originalFileSequenceNumber); initFile(false); assert(rootFile()); @@ -310,32 +302,28 @@ namespace edm { return false; } - int - RootPrimaryFileSequence::remainingEvents() const { - return input_.remainingEvents(); - } + int RootPrimaryFileSequence::remainingEvents() const { return input_.remainingEvents(); } - int - RootPrimaryFileSequence::remainingLuminosityBlocks() const { - return input_.remainingLuminosityBlocks(); - } + int RootPrimaryFileSequence::remainingLuminosityBlocks() const { return input_.remainingLuminosityBlocks(); } - void - RootPrimaryFileSequence::fillDescription(ParameterSetDescription & desc) { + void RootPrimaryFileSequence::fillDescription(ParameterSetDescription& desc) { desc.addUntracked("skipEvents", 0U) ->setComment("Skip the first 'skipEvents' events that otherwise would have been processed."); desc.addUntracked("noEventSort", true) - ->setComment("True: Process runs, lumis and events in the order they appear in the file (but see notes 1 and 2).\n" - "False: Process runs, lumis and events in each file in numerical order (run#, lumi#, event#) (but see note 3).\n" - "Note 1: Events within the same lumi will always be processed contiguously.\n" - "Note 2: Lumis within the same run will always be processed contiguously.\n" - "Note 3: Any sorting occurs independently in each input file (no sorting across input files)."); + ->setComment( + "True: Process runs, lumis and events in the order they appear in the file (but see notes 1 and 2).\n" + "False: Process runs, lumis and events in each file in numerical order (run#, lumi#, event#) (but see note " + "3).\n" + "Note 1: Events within the same lumi will always be processed contiguously.\n" + "Note 2: Lumis within the same run will always be processed contiguously.\n" + "Note 3: Any sorting occurs independently in each input file (no sorting across input files)."); desc.addUntracked("cacheSize", roottree::defaultCacheSize) ->setComment("Size of ROOT TTree prefetch cache. Affects performance."); std::string defaultString("permissive"); desc.addUntracked("branchesMustMatch", defaultString) - ->setComment("'strict': Branches in each input file must match those in the first file.\n" - "'permissive': Branches in each input file may be any subset of those in the first file."); + ->setComment( + "'strict': Branches in each input file must match those in the first file.\n" + "'permissive': Branches in each input file may be any subset of those in the first file."); desc.addUntracked("enforceGUIDInFileName", false) ->setComment( "True: file name part is required to be equal to the GUID of the file\n" @@ -345,13 +333,12 @@ namespace edm { DuplicateChecker::fillDescription(desc); } - ProcessingController::ForwardState - RootPrimaryFileSequence::forwardState() const { - if(rootFile()) { - if(!rootFile()->wasLastEventJustRead()) { + ProcessingController::ForwardState RootPrimaryFileSequence::forwardState() const { + if (rootFile()) { + if (!rootFile()->wasLastEventJustRead()) { return ProcessingController::kEventsAheadInFile; } - if(noMoreFiles() || atLastFile()) { + if (noMoreFiles() || atLastFile()) { return ProcessingController::kAtLastEvent; } else { return ProcessingController::kNextFileExists; @@ -360,13 +347,12 @@ namespace edm { return ProcessingController::kUnknownForward; } - ProcessingController::ReverseState - RootPrimaryFileSequence::reverseState() const { - if(rootFile()) { - if(!rootFile()->wasFirstEventJustRead()) { + ProcessingController::ReverseState RootPrimaryFileSequence::reverseState() const { + if (rootFile()) { + if (!rootFile()->wasFirstEventJustRead()) { return ProcessingController::kEventsBackwardsInFile; } - if(!atFirstFile()) { + if (!atFirstFile()) { return ProcessingController::kPreviousFileExists; } return ProcessingController::kAtFirstEvent; @@ -374,4 +360,4 @@ namespace edm { return ProcessingController::kUnknownReverse; } -} +} // namespace edm diff --git a/IOPool/Input/src/RootPrimaryFileSequence.h b/IOPool/Input/src/RootPrimaryFileSequence.h index d919159690dd2..7444680c83c2a 100644 --- a/IOPool/Input/src/RootPrimaryFileSequence.h +++ b/IOPool/Input/src/RootPrimaryFileSequence.h @@ -42,7 +42,6 @@ namespace edm { RootPrimaryFileSequence& operator=(RootPrimaryFileSequence const&) = delete; // Disallow copying and moving std::unique_ptr readFile_(); - void closeFile_() override; void endJob(); InputSource::ItemType getNextItemType(RunNumber_t& run, LuminosityBlockNumber_t& lumi, EventNumber_t& event); bool skipEvents(int offset); @@ -57,6 +56,7 @@ namespace edm { bool nextFile(); bool previousFile(); void rewindFile(); + void closeFile_() override; int remainingEvents() const; int remainingLuminosityBlocks() const; diff --git a/IOPool/Input/src/RootSecondaryFileSequence.cc b/IOPool/Input/src/RootSecondaryFileSequence.cc index 6ad41390185ca..9d70a6c787199 100644 --- a/IOPool/Input/src/RootSecondaryFileSequence.cc +++ b/IOPool/Input/src/RootSecondaryFileSequence.cc @@ -18,19 +18,17 @@ #include "Utilities/StorageFactory/interface/StorageFactory.h" namespace edm { - RootSecondaryFileSequence::RootSecondaryFileSequence( - ParameterSet const& pset, - PoolSource& input, - InputFileCatalog const& catalog) : - RootInputFileSequence(pset, catalog), - input_(input), - orderedProcessHistoryIDs_(), - enablePrefetching_(false), - enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName")) { - + RootSecondaryFileSequence::RootSecondaryFileSequence(ParameterSet const& pset, + PoolSource& input, + InputFileCatalog const& catalog) + : RootInputFileSequence(pset, catalog), + input_(input), + orderedProcessHistoryIDs_(), + enablePrefetching_(false), + enforceGUIDInFileName_(pset.getUntrackedParameter("enforceGUIDInFileName")) { // The SiteLocalConfig controls the TTreeCache size and the prefetching settings. Service pSLC; - if(pSLC.isAvailable()) { + if (pSLC.isAvailable()) { enablePrefetching_ = pSLC->enablePrefetching(); } @@ -42,27 +40,23 @@ namespace edm { StorageFactory::get()->stagein(fileName()); // Open the first file. - for(setAtFirstFile(); !noMoreFiles(); setAtNextFile()) { + for (setAtFirstFile(); !noMoreFiles(); setAtNextFile()) { initFile(input_.skipBadFiles()); - if(rootFile()) break; + if (rootFile()) + break; } - if(rootFile()) { + if (rootFile()) { input_.productRegistryUpdate().updateFromInput(rootFile()->productRegistry()->productList()); } } - RootSecondaryFileSequence::~RootSecondaryFileSequence() { - } + RootSecondaryFileSequence::~RootSecondaryFileSequence() {} - void - RootSecondaryFileSequence::endJob() { - closeFile_(); - } + void RootSecondaryFileSequence::endJob() { closeFile(); } - void - RootSecondaryFileSequence::closeFile_() { + void RootSecondaryFileSequence::closeFile_() { // close the currently open file, if any, and delete the RootFile object. - if(rootFile()) { + if (rootFile()) { rootFile()->close(); rootFile().reset(); } @@ -72,39 +66,37 @@ namespace edm { initTheFile(skipBadFiles, false, nullptr, "secondaryFiles", InputType::SecondaryFile); } - RootSecondaryFileSequence::RootFileSharedPtr - RootSecondaryFileSequence::makeRootFile(std::shared_ptr filePtr) { + RootSecondaryFileSequence::RootFileSharedPtr RootSecondaryFileSequence::makeRootFile( + std::shared_ptr filePtr) { size_t currentIndexIntoFile = sequenceNumberOfFile(); - return std::make_shared( - fileName(), - input_.processConfiguration(), - logicalFileName(), - filePtr, - input_.nStreams(), - input_.treeMaxVirtualSize(), - input_.processingMode(), - input_.runHelper(), - input_.productSelectorRules(), - InputType::SecondaryFile, - input_.branchIDListHelper(), - input_.thinnedAssociationsHelper(), - &associationsFromSecondary_, - input_.dropDescendants(), - input_.processHistoryRegistryForUpdate(), - indexesIntoFiles(), - currentIndexIntoFile, - orderedProcessHistoryIDs_, - input_.bypassVersionCheck(), - input_.labelRawDataLikeMC(), - enablePrefetching_, - enforceGUIDInFileName_); + return std::make_shared(fileName(), + input_.processConfiguration(), + logicalFileName(), + filePtr, + input_.nStreams(), + input_.treeMaxVirtualSize(), + input_.processingMode(), + input_.runHelper(), + input_.productSelectorRules(), + InputType::SecondaryFile, + input_.branchIDListHelper(), + input_.thinnedAssociationsHelper(), + &associationsFromSecondary_, + input_.dropDescendants(), + input_.processHistoryRegistryForUpdate(), + indexesIntoFiles(), + currentIndexIntoFile, + orderedProcessHistoryIDs_, + input_.bypassVersionCheck(), + input_.labelRawDataLikeMC(), + enablePrefetching_, + enforceGUIDInFileName_); } - void - RootSecondaryFileSequence::initAssociationsFromSecondary(std::set const& associationsFromSecondary) { - for(auto const& branchID : associationsFromSecondary) { + void RootSecondaryFileSequence::initAssociationsFromSecondary(std::set const& associationsFromSecondary) { + for (auto const& branchID : associationsFromSecondary) { associationsFromSecondary_.push_back(branchID); } rootFile()->initAssociationsFromSecondary(associationsFromSecondary_); } -} +} // namespace edm diff --git a/IOPool/Input/src/RootSecondaryFileSequence.h b/IOPool/Input/src/RootSecondaryFileSequence.h index af91b30f0d5b5..e99df65e93e69 100644 --- a/IOPool/Input/src/RootSecondaryFileSequence.h +++ b/IOPool/Input/src/RootSecondaryFileSequence.h @@ -35,10 +35,10 @@ namespace edm { RootSecondaryFileSequence(RootSecondaryFileSequence const&) = delete; // Disallow copying and moving RootSecondaryFileSequence& operator=(RootSecondaryFileSequence const&) = delete; // Disallow copying and moving - void closeFile_() override; void endJob(); void initAssociationsFromSecondary(std::set const&); private: + void closeFile_() override; void initFile_(bool skipBadFiles) override; RootFileSharedPtr makeRootFile(std::shared_ptr filePtr) override; diff --git a/IOPool/SecondaryInput/test/SecondaryProducer.h b/IOPool/SecondaryInput/test/SecondaryProducer.h index 6ebb5c16d53fa..8436f66b1f8d2 100644 --- a/IOPool/SecondaryInput/test/SecondaryProducer.h +++ b/IOPool/SecondaryInput/test/SecondaryProducer.h @@ -9,7 +9,7 @@ ************************************************************/ #include "DataFormats/Provenance/interface/EventID.h" -#include "FWCore/Framework/interface/EDProducer.h" +#include "FWCore/Framework/interface/one/EDProducer.h" #include "FWCore/Utilities/interface/get_underlying_safe.h" #include @@ -19,9 +19,8 @@ namespace edm { class ProcessConfiguration; class VectorInputSource; - class SecondaryProducer: public EDProducer { + class SecondaryProducer : public one::EDProducer<> { public: - /** standard constructor*/ explicit SecondaryProducer(ParameterSet const& pset); @@ -34,13 +33,13 @@ namespace edm { void processOneEvent(EventPrincipal const& eventPrincipal, Event& e); private: - virtual void put(Event &) {} + virtual void put(Event&) {} virtual void beginJob(); virtual void endJob(); std::shared_ptr makeSecInput(ParameterSet const& ps); - std::shared_ptr productRegistry() const {return get_underlying_safe(productRegistry_);} - std::shared_ptr& productRegistry() {return get_underlying_safe(productRegistry_);} + std::shared_ptr productRegistry() const { return get_underlying_safe(productRegistry_); } + std::shared_ptr& productRegistry() { return get_underlying_safe(productRegistry_); } edm::propagate_const> productRegistry_; edm::propagate_const const> secInput_; @@ -53,6 +52,6 @@ namespace edm { bool firstLoop_; EventNumber_t expectedEventNumber_; }; -}//edm +} // namespace edm #endif diff --git a/IOPool/TFileAdaptor/src/TStorageFactoryFile.cc b/IOPool/TFileAdaptor/src/TStorageFactoryFile.cc index 017426df7b3c7..408111747b61b 100644 --- a/IOPool/TFileAdaptor/src/TStorageFactoryFile.cc +++ b/IOPool/TFileAdaptor/src/TStorageFactoryFile.cc @@ -205,7 +205,7 @@ TStorageFactoryFile::Initialize(const char *path, try { edm::Service statsService; if (statsService.isAvailable()) { - statsService->setSize(storage_->size()); + statsService->setSize(path, storage_->size()); } } catch (edm::Exception e) { if (e.categoryCode() != edm::errors::NotFound) { diff --git a/Utilities/StorageFactory/interface/StatisticsSenderService.h b/Utilities/StorageFactory/interface/StatisticsSenderService.h index fd316899a43a5..25d715e035abb 100644 --- a/Utilities/StorageFactory/interface/StatisticsSenderService.h +++ b/Utilities/StorageFactory/interface/StatisticsSenderService.h @@ -6,59 +6,87 @@ #include #include #include +#include +#include "FWCore/Utilities/interface/InputType.h" namespace edm { - class ParameterSet; + class ParameterSet; class ActivityRegistry; namespace storage { class StatisticsSenderService { + public: + StatisticsSenderService(edm::ParameterSet const& pset, edm::ActivityRegistry& ar); + + void setSize(const std::string& urlOrLfn, size_t size); + void setCurrentServer(const std::string& urlOrLfn, const std::string& servername); + static const char* getJobID(); + static bool getX509Subject(std::string&); + + void openingFile(std::string const& lfn, edm::InputType type, size_t size = -1); + void closedFile(std::string const& lfn, bool usedFallback); + + private: + void filePostCloseEvent(std::string const& lfn, bool usedFallback); + + std::string const* matchedLfn(std::string const& iURL); //updates its internal cache + class FileStatistics { public: - StatisticsSenderService(edm::ParameterSet const& pset, edm::ActivityRegistry& ar); + FileStatistics(); + void fillUDP(std::ostringstream& os) const; + void update(); - void setSize(size_t size); - void setCurrentServer(const std::string &servername); - void filePreCloseEvent(std::string const& lfn, bool usedFallback); - static const char * getJobID(); - static bool getX509Subject(std::string &); private: + ssize_t m_read_single_operations; + ssize_t m_read_single_bytes; + ssize_t m_read_single_square; + ssize_t m_read_vector_operations; + ssize_t m_read_vector_bytes; + ssize_t m_read_vector_square; + ssize_t m_read_vector_count_sum; + ssize_t m_read_vector_count_square; + time_t m_start_time; + }; - class FileStatistics { - public: - FileStatistics(); - void fillUDP(std::ostringstream &os); - private: - ssize_t m_read_single_operations; - ssize_t m_read_single_bytes; - ssize_t m_read_single_square; - ssize_t m_read_vector_operations; - ssize_t m_read_vector_bytes; - ssize_t m_read_vector_square; - ssize_t m_read_vector_count_sum; - ssize_t m_read_vector_count_square; - time_t m_start_time; - }; - - void determineHostnames(void); - void fillUDP(const std::string&, bool, std::string &); - std::string m_clienthost; - std::string m_clientdomain; - std::string m_serverhost; - std::string m_serverdomain; - std::string m_filelfn; - FileStatistics m_filestats; - std::string m_guid; - size_t m_counter; + struct FileInfo { + explicit FileInfo(std::string const& iLFN, edm::InputType); + + FileInfo(FileInfo&& iInfo) + : m_filelfn(std::move(iInfo.m_filelfn)), + m_serverhost(std::move(iInfo.m_serverhost)), + m_serverdomain(std::move(iInfo.m_serverdomain)), + m_type(iInfo.m_type), + m_size(iInfo.m_size.load()), + m_id(iInfo.m_id), + m_openCount(iInfo.m_openCount.load()) {} + std::string m_filelfn; + std::string m_serverhost; + std::string m_serverdomain; + edm::InputType m_type; std::atomic m_size; - std::string m_userdn; - std::mutex m_servermutex; + size_t m_id; //from m_counter + std::atomic m_openCount; + }; + + void determineHostnames(); + void fillUDP(const std::string& site, const FileInfo& fileinfo, bool, std::string&) const; + void cleanupOldFiles(); + + std::string m_clienthost; + std::string m_clientdomain; + tbb::concurrent_unordered_map m_lfnToFileInfo; + tbb::concurrent_unordered_map m_urlToLfn; + FileStatistics m_filestats; + std::string m_guid; + size_t m_counter; + std::string m_userdn; + std::mutex m_servermutex; + const bool m_debug; }; - - } -} + } // namespace storage +} // namespace edm #endif - diff --git a/Utilities/StorageFactory/src/StatisticsSenderService.cc b/Utilities/StorageFactory/src/StatisticsSenderService.cc index fe53f9f2f1576..ad0ff9d5d3bf9 100644 --- a/Utilities/StorageFactory/src/StatisticsSenderService.cc +++ b/Utilities/StorageFactory/src/StatisticsSenderService.cc @@ -4,6 +4,7 @@ #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" #include "FWCore/Catalog/interface/SiteLocalConfig.h" #include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/Utilities/src/Guid.h" #include @@ -15,12 +16,7 @@ #include #include -#define UPDATE_STATISTIC(x) \ - m_ ## x = x; - -#define UPDATE_AND_OUTPUT_STATISTIC(x) \ - os << "\"" #x "\":" << (x-m_ ## x) << ", "; \ - UPDATE_STATISTIC(x) +#define OUTPUT_STATISTIC(x) os << "\"" #x "\":" << (x - m_##x) << ", "; // Simple hack to define HOST_NAME_MAX on Mac. // Allows arrays to be statically allocated @@ -28,25 +24,23 @@ #define HOST_NAME_MAX 128 #endif -#define JOB_UNIQUE_ID_ENV "CRAB_UNIQUE_JOB_ID" -#define JOB_UNIQUE_ID_ENV_V2 "DashboardJobId" +static constexpr char const *const JOB_UNIQUE_ID_ENV = "CRAB_UNIQUE_JOB_ID"; +static constexpr char const *const JOB_UNIQUE_ID_ENV_V2 = "DashboardJobId"; using namespace edm::storage; -StatisticsSenderService::FileStatistics::FileStatistics() : - m_read_single_operations(0), - m_read_single_bytes(0), - m_read_single_square(0), - m_read_vector_operations(0), - m_read_vector_bytes(0), - m_read_vector_square(0), - m_read_vector_count_sum(0), - m_read_vector_count_square(0), - m_start_time(time(NULL)) -{} - -void -StatisticsSenderService::FileStatistics::fillUDP(std::ostringstream &os) { +StatisticsSenderService::FileStatistics::FileStatistics() + : m_read_single_operations(0), + m_read_single_bytes(0), + m_read_single_square(0), + m_read_vector_operations(0), + m_read_vector_bytes(0), + m_read_vector_square(0), + m_read_vector_count_sum(0), + m_read_vector_count_square(0), + m_start_time(time(nullptr)) {} + +void StatisticsSenderService::FileStatistics::fillUDP(std::ostringstream &os) const { const StorageAccount::StorageStats &stats = StorageAccount::summary(); ssize_t read_single_operations = 0; ssize_t read_single_bytes = 0; @@ -57,7 +51,7 @@ StatisticsSenderService::FileStatistics::fillUDP(std::ostringstream &os) { ssize_t read_vector_count_sum = 0; ssize_t read_vector_count_square = 0; auto token = StorageAccount::tokenForStorageClassName("tstoragefile"); - for (StorageAccount::StorageStats::const_iterator i = stats.begin (); i != stats.end(); ++i) { + for (StorageAccount::StorageStats::const_iterator i = stats.begin(); i != stats.end(); ++i) { if (i->first == token.value()) { continue; } @@ -77,69 +71,151 @@ StatisticsSenderService::FileStatistics::fillUDP(std::ostringstream &os) { } int64_t single_op_count = read_single_operations - m_read_single_operations; if (single_op_count > 0) { - double single_sum = read_single_bytes-m_read_single_bytes; - double single_average = single_sum/static_cast(single_op_count); - os << "\"read_single_sigma\":" << sqrt((static_cast(read_single_square-m_read_single_square) - single_average*single_average*single_op_count)/static_cast(single_op_count)) << ", "; + double single_sum = read_single_bytes - m_read_single_bytes; + double single_average = single_sum / static_cast(single_op_count); + os << "\"read_single_sigma\":" + << sqrt(std::abs((static_cast(read_single_square - m_read_single_square) - + single_average * single_average * single_op_count) / + static_cast(single_op_count))) + << ", "; os << "\"read_single_average\":" << single_average << ", "; } - m_read_single_square = read_single_square; int64_t vector_op_count = read_vector_operations - m_read_vector_operations; if (vector_op_count > 0) { - double vector_average = static_cast(read_vector_bytes-m_read_vector_bytes)/static_cast(vector_op_count); + double vector_average = + static_cast(read_vector_bytes - m_read_vector_bytes) / static_cast(vector_op_count); os << "\"read_vector_average\":" << vector_average << ", "; - os << "\"read_vector_sigma\":" << sqrt((static_cast(read_vector_square-m_read_vector_square) - vector_average*vector_average*vector_op_count)/static_cast(vector_op_count)) << ", "; - double vector_count_average = static_cast(read_vector_count_sum-m_read_vector_count_sum)/static_cast(vector_op_count); + os << "\"read_vector_sigma\":" + << sqrt(std::abs((static_cast(read_vector_square - m_read_vector_square) - + vector_average * vector_average * vector_op_count) / + static_cast(vector_op_count))) + << ", "; + double vector_count_average = + static_cast(read_vector_count_sum - m_read_vector_count_sum) / static_cast(vector_op_count); os << "\"read_vector_count_average\":" << vector_count_average << ", "; - os << "\"read_vector_count_sigma\":" << sqrt((static_cast(read_vector_count_square-m_read_vector_count_square) - vector_count_average*vector_count_average*vector_op_count)/static_cast(vector_op_count)) << ", "; + os << "\"read_vector_count_sigma\":" + << sqrt(std::abs((static_cast(read_vector_count_square - m_read_vector_count_square) - + vector_count_average * vector_count_average * vector_op_count) / + static_cast(vector_op_count))) + << ", "; } - m_read_vector_square = read_vector_square; - m_read_vector_count_square = read_vector_count_square; - m_read_vector_count_sum = read_vector_count_sum; - os << "\"read_bytes\":" << (read_vector_bytes + read_single_bytes - m_read_vector_bytes - m_read_single_bytes) << ", "; - os << "\"read_bytes_at_close\":" << (read_vector_bytes + read_single_bytes - m_read_vector_bytes - m_read_single_bytes) << ", "; + os << "\"read_bytes\":" << (read_vector_bytes + read_single_bytes - m_read_vector_bytes - m_read_single_bytes) + << ", "; + os << "\"read_bytes_at_close\":" + << (read_vector_bytes + read_single_bytes - m_read_vector_bytes - m_read_single_bytes) << ", "; // See top of file for macros; not complex, just avoiding copy/paste - UPDATE_AND_OUTPUT_STATISTIC(read_single_operations) - UPDATE_AND_OUTPUT_STATISTIC(read_single_bytes) - UPDATE_AND_OUTPUT_STATISTIC(read_vector_operations) - UPDATE_AND_OUTPUT_STATISTIC(read_vector_bytes) + OUTPUT_STATISTIC(read_single_operations) + OUTPUT_STATISTIC(read_single_bytes) + OUTPUT_STATISTIC(read_vector_operations) + OUTPUT_STATISTIC(read_vector_bytes) os << "\"start_time\":" << m_start_time << ", "; - m_start_time = time(NULL); // NOTE: last entry doesn't have the trailing comma. - os << "\"end_time\":" << m_start_time; + os << "\"end_time\":" << time(nullptr); } -StatisticsSenderService::StatisticsSenderService(edm::ParameterSet const& /*pset*/, edm::ActivityRegistry& ar) : - m_clienthost("unknown"), - m_clientdomain("unknown"), - m_serverhost("unknown"), - m_serverdomain("unknown"), - m_filelfn("unknown"), - m_filestats(), - m_guid(Guid().toString()), - m_counter(0), - m_size(-1), - m_userdn("unknown") -{ +void StatisticsSenderService::FileStatistics::update() { + const StorageAccount::StorageStats &stats = StorageAccount::summary(); + ssize_t read_single_operations = 0; + ssize_t read_single_bytes = 0; + ssize_t read_single_square = 0; + ssize_t read_vector_operations = 0; + ssize_t read_vector_bytes = 0; + ssize_t read_vector_square = 0; + ssize_t read_vector_count_sum = 0; + ssize_t read_vector_count_square = 0; + auto token = StorageAccount::tokenForStorageClassName("tstoragefile"); + for (StorageAccount::StorageStats::const_iterator i = stats.begin(); i != stats.end(); ++i) { + if (i->first == token.value()) { + continue; + } + for (StorageAccount::OperationStats::const_iterator j = i->second.begin(); j != i->second.end(); ++j) { + if (j->first == static_cast(StorageAccount::Operation::readv)) { + read_vector_operations += j->second.attempts; + read_vector_bytes += j->second.amount; + read_vector_count_square += j->second.vector_square; + read_vector_square += j->second.amount_square; + read_vector_count_sum += j->second.vector_count; + } else if (j->first == static_cast(StorageAccount::Operation::read)) { + read_single_operations += j->second.attempts; + read_single_bytes += j->second.amount; + read_single_square += j->second.amount_square; + } + } + } + + m_read_single_square = read_single_square; + m_read_vector_square = read_vector_square; + m_read_vector_count_square = read_vector_count_square; + m_read_vector_count_sum = read_vector_count_sum; + m_read_single_operations = read_single_operations; + m_read_single_bytes = read_single_bytes; + m_read_vector_operations = read_vector_operations; + m_read_vector_bytes = read_vector_bytes; + m_start_time = time(nullptr); +} +StatisticsSenderService::FileInfo::FileInfo(std::string const &iLFN, edm::InputType iType) + : m_filelfn(iLFN), + m_serverhost("unknown"), + m_serverdomain("unknown"), + m_type(iType), + m_size(-1), + m_id(0), + m_openCount(1) {} + +StatisticsSenderService::StatisticsSenderService(edm::ParameterSet const &iPSet, edm::ActivityRegistry &ar) + : m_clienthost("unknown"), + m_clientdomain("unknown"), + m_filestats(), + m_guid(Guid().toString()), + m_counter(0), + m_userdn("unknown"), + m_debug(iPSet.getUntrackedParameter("debug", false)) { determineHostnames(); - ar.watchPreCloseFile(this, &StatisticsSenderService::filePreCloseEvent); + ar.watchPostCloseFile(this, &StatisticsSenderService::filePostCloseEvent); if (!getX509Subject(m_userdn)) { m_userdn = "unknown"; } } -const char * -StatisticsSenderService::getJobID() { - const char * id = getenv(JOB_UNIQUE_ID_ENV); +const char *StatisticsSenderService::getJobID() { + const char *id = getenv(JOB_UNIQUE_ID_ENV); // Dashboard developers requested that we migrate to this environment variable. return id ? id : getenv(JOB_UNIQUE_ID_ENV_V2); } -void -StatisticsSenderService::setCurrentServer(const std::string &servername) { - size_t dot_pos = servername.find("."); +std::string const *StatisticsSenderService::matchedLfn(std::string const &iURL) { + auto found = m_urlToLfn.find(iURL); + if (found != m_urlToLfn.end()) { + return &found->second; + } + for (auto const &v : m_lfnToFileInfo) { + if (v.first.size() < iURL.size()) { + if (v.first == iURL.substr(iURL.size() - v.first.size())) { + m_urlToLfn.emplace(iURL, v.first); + return &m_urlToLfn.find(iURL)->second; + } + } + } + //does the lfn have a protocol and the iURL not? + if (std::string::npos == iURL.find(':')) { + for (auto const &v : m_lfnToFileInfo) { + if ((std::string::npos != v.first.find(':')) and (v.first.size() > iURL.size())) { + if (iURL == v.first.substr(v.first.size() - iURL.size())) { + m_urlToLfn.emplace(iURL, v.first); + return &m_urlToLfn.find(iURL)->second; + } + } + } + } + + return nullptr; +} + +void StatisticsSenderService::setCurrentServer(const std::string &url, const std::string &servername) { + size_t dot_pos = servername.find('.'); std::string serverhost; std::string serverdomain; if (dot_pos == std::string::npos) { @@ -147,66 +223,139 @@ StatisticsSenderService::setCurrentServer(const std::string &servername) { serverdomain = "unknown"; } else { serverhost = servername.substr(0, dot_pos); - serverdomain = servername.substr(dot_pos+1, servername.find(":")-dot_pos-1); + serverdomain = servername.substr(dot_pos + 1, servername.find(":") - dot_pos - 1); if (serverdomain.empty()) { serverdomain = "unknown"; } } { + auto lfn = matchedLfn(url); std::lock_guard sentry(m_servermutex); - m_serverhost = std::move(serverhost); - m_serverdomain = std::move(serverdomain); + if (nullptr != lfn) { + auto found = m_lfnToFileInfo.find(*lfn); + if (found != m_lfnToFileInfo.end()) { + found->second.m_serverhost = std::move(serverhost); + found->second.m_serverdomain = std::move(serverdomain); + } + } else if (m_debug) { + edm::LogWarning("StatisticsSenderService") << "setCurrentServer: unknown url name " << url << "\n"; + } } } -void -StatisticsSenderService::setSize(size_t size) { - m_size = size; +void StatisticsSenderService::openingFile(std::string const &lfn, edm::InputType type, size_t size) { + m_urlToLfn.emplace(lfn, lfn); + auto attempt = m_lfnToFileInfo.emplace(lfn, FileInfo{lfn, type}); + if (attempt.second) { + attempt.first->second.m_size = size; + attempt.first->second.m_id = m_counter++; + edm::LogInfo("StatisticsSenderService") << "openingFile: opening " << lfn << "\n"; + } else { + ++(attempt.first->second.m_openCount); + edm::LogInfo("StatisticsSenderService") << "openingFile: re-opening" << lfn << "\n"; + } } -void -StatisticsSenderService::filePreCloseEvent(std::string const& lfn, bool usedFallback) { - m_filelfn = lfn; - +void StatisticsSenderService::closedFile(std::string const &url, bool usedFallback) { edm::Service pSLC; if (!pSLC.isAvailable()) { return; } - const struct addrinfo * addresses = pSLC->statisticsDestination(); - if (!addresses) { + const struct addrinfo *addresses = pSLC->statisticsDestination(); + if (!addresses and !m_debug) { return; } - std::set const * info = pSLC->statisticsInfo(); - if (info && info->size() && (m_userdn != "unknown") && ( - (info->find("dn") == info->end()) || - (info->find("nodn") != info->end())) - ) - { + std::set const *info = pSLC->statisticsInfo(); + if (info && !info->empty() && (m_userdn != "unknown") && + ((info->find("dn") == info->end()) || (info->find("nodn") != info->end()))) { m_userdn = "not reported"; } - std::string results; - fillUDP(pSLC->siteName(), usedFallback, results); + auto lfn = matchedLfn(url); + if (nullptr != lfn) { + auto found = m_lfnToFileInfo.find(*lfn); + assert(found != m_lfnToFileInfo.end()); - for (const struct addrinfo *address = addresses; address != nullptr; address = address->ai_next) { - int sock = socket(address->ai_family, address->ai_socktype, address->ai_protocol); - if (sock < 0) { - continue; + std::string results; + fillUDP(pSLC->siteName(), found->second, usedFallback, results); + if (m_debug) { + edm::LogSystem("StatisticSenderService") << "\n" << results << "\n"; } - auto close_del = [](int* iSocket) { close(*iSocket); }; - std::unique_ptr guard(&sock, close_del); - if (sendto(sock, results.c_str(), results.size(), 0, address->ai_addr, address->ai_addrlen) >= 0) { - break; + + for (const struct addrinfo *address = addresses; address != nullptr; address = address->ai_next) { + int sock = socket(address->ai_family, address->ai_socktype, address->ai_protocol); + if (sock < 0) { + continue; + } + auto close_del = [](int *iSocket) { close(*iSocket); }; + std::unique_ptr guard(&sock, close_del); + if (sendto(sock, results.c_str(), results.size(), 0, address->ai_addr, address->ai_addrlen) >= 0) { + break; + } } + + auto c = --found->second.m_openCount; + if (m_debug) { + if (c == 0) { + edm::LogWarning("StatisticsSenderService") << "fully closed: " << *lfn << "\n"; + } else { + edm::LogWarning("StatisticsSenderService") << "partially closed: " << *lfn << "\n"; + } + } + } else if (m_debug) { + edm::LogWarning("StatisticsSenderService") << "closed: unknown url name " << url << "\n"; } +} - m_counter++; +void StatisticsSenderService::cleanupOldFiles() { + //remove entries with openCount of 0 + bool moreToTest = false; + do { + moreToTest = false; + for (auto it = m_lfnToFileInfo.begin(); it != m_lfnToFileInfo.end(); ++it) { + if (it->second.m_openCount == 0) { + auto lfn = it->first; + bool moreToTest2 = false; + do { + moreToTest2 = false; + for (auto it2 = m_urlToLfn.begin(); it2 != m_urlToLfn.end(); ++it2) { + if (it2->second == lfn) { + m_urlToLfn.unsafe_erase(it2); + moreToTest2 = true; + break; + } + } + } while (moreToTest2); + + m_lfnToFileInfo.unsafe_erase(it); + moreToTest = true; + break; + } + } + } while (moreToTest); } -void -StatisticsSenderService::determineHostnames(void) { +void StatisticsSenderService::setSize(const std::string &url, size_t size) { + auto lfn = matchedLfn(url); + if (nullptr != lfn) { + auto itFound = m_lfnToFileInfo.find(*lfn); + if (itFound != m_lfnToFileInfo.end()) { + itFound->second.m_size = size; + } + } else if (m_debug) { + edm::LogWarning("StatisticsSenderService") << "setSize: unknown url name " << url << "\n"; + } +} + +void StatisticsSenderService::filePostCloseEvent(std::string const &lfn, bool usedFallback) { + //we are at a sync point in the framwework so no new files are being opened + cleanupOldFiles(); + m_filestats.update(); +} + +void StatisticsSenderService::determineHostnames(void) { char tmpName[HOST_NAME_MAX]; if (gethostname(tmpName, HOST_NAME_MAX) != 0) { // Sigh, no way to log errors from here. @@ -218,13 +367,15 @@ StatisticsSenderService::determineHostnames(void) { if (dot_pos == std::string::npos) { m_clientdomain = "unknown"; } else { - m_clientdomain = m_clienthost.substr(dot_pos+1, m_clienthost.size()-dot_pos-1); + m_clientdomain = m_clienthost.substr(dot_pos + 1, m_clienthost.size() - dot_pos - 1); m_clienthost = m_clienthost.substr(0, dot_pos); } } -void -StatisticsSenderService::fillUDP(const std::string& siteName, bool usedFallback, std::string &udpinfo) { +void StatisticsSenderService::fillUDP(const std::string &siteName, + const FileInfo &fileinfo, + bool usedFallback, + std::string &udpinfo) const { std::ostringstream os; // Header - same for all IO accesses @@ -234,31 +385,43 @@ StatisticsSenderService::fillUDP(const std::string& siteName, bool usedFallback, } if (usedFallback) { os << "\"fallback\": true, "; + } else { + os << "\"fallback\": false, "; } - std::string serverhost; - std::string serverdomain; - { - std::lock_guard sentry(m_servermutex); - serverhost = m_serverhost; - serverdomain = m_serverdomain; + os << "\"type\": "; + switch (fileinfo.m_type) { + case edm::InputType::Primary: { + os << "\"primary\", "; + break; + } + case edm::InputType::SecondaryFile: { + os << "\"secondary\", "; + break; + } + case edm::InputType::SecondarySource: { + os << "\"embedded\", "; + break; + } } - + auto serverhost = fileinfo.m_serverhost; + auto serverdomain = fileinfo.m_serverdomain; + os << "\"user_dn\":\"" << m_userdn << "\", "; os << "\"client_host\":\"" << m_clienthost << "\", "; os << "\"client_domain\":\"" << m_clientdomain << "\", "; os << "\"server_host\":\"" << serverhost << "\", "; os << "\"server_domain\":\"" << serverdomain << "\", "; - os << "\"unique_id\":\"" << m_guid << "-" << m_counter << "\", "; - os << "\"file_lfn\":\"" << m_filelfn << "\", "; + os << "\"unique_id\":\"" << m_guid << "-" << fileinfo.m_id << "\", "; + os << "\"file_lfn\":\"" << fileinfo.m_filelfn << "\", "; // Dashboard devs requested that we send out no app_info if a job ID // is not present in the environment. - const char * jobId = getJobID(); + const char *jobId = getJobID(); if (jobId) { os << "\"app_info\":\"" << jobId << "\", "; } - if (m_size >= 0) { - os << "\"file_size\":" << m_size << ", "; + if (fileinfo.m_size >= 0) { + os << "\"file_size\":" << fileinfo.m_size << ", "; } m_filestats.fillUDP(os); @@ -283,17 +446,17 @@ StatisticsSenderService::fillUDP(const std::string& siteName, bool usedFallback, * THIS DOES NOT VERIFY THE RESULTS, and is a best-effort GUESS. * Again, DO NOT REUSE THIS CODE THINKING IT VERIFIES THE CHAIN! */ -static X509 * findEEC(STACK_OF(X509) * certstack) { +static X509 *findEEC(STACK_OF(X509) * certstack) { int depth = sk_X509_num(certstack); if (depth == 0) { return nullptr; } - int idx = depth-1; + int idx = depth - 1; char *priorsubject = nullptr; char *subject = nullptr; X509 *x509cert = sk_X509_value(certstack, idx); - for (; x509cert && idx>0; idx--) { - subject = X509_NAME_oneline(X509_get_subject_name(x509cert),0,0); + for (; x509cert && idx > 0; idx--) { + subject = X509_NAME_oneline(X509_get_subject_name(x509cert), nullptr, 0); if (subject && priorsubject && (strncmp(subject, priorsubject, strlen(subject)) != 0)) { break; } @@ -310,8 +473,7 @@ static X509 * findEEC(STACK_OF(X509) * certstack) { return x509cert; } -static bool -getX509SubjectFromFile(const std::string &filename, std::string &result) { +static bool getX509SubjectFromFile(const std::string &filename, std::string &result) { BIO *biof = nullptr; STACK_OF(X509) *certs = nullptr; char *subject = nullptr; @@ -320,34 +482,42 @@ getX509SubjectFromFile(const std::string &filename, std::string &result) { char *name = nullptr; long len = 0U; - if((biof = BIO_new_file(filename.c_str(), "r"))) { - + if ((biof = BIO_new_file(filename.c_str(), "r"))) { certs = sk_X509_new_null(); bool encountered_error = false; while ((!encountered_error) && (!BIO_eof(biof)) && PEM_read_bio(biof, &name, &header, &data, &len)) { if (strcmp(name, PEM_STRING_X509) == 0 || strcmp(name, PEM_STRING_X509_OLD) == 0) { - X509 * tmp_cert = nullptr; + X509 *tmp_cert = nullptr; // See WARNINGS section in http://www.openssl.org/docs/crypto/d2i_X509.html // Without this cmsRun crashes on a mac with a valid grid proxy. const unsigned char *p; - p=data; + p = data; tmp_cert = d2i_X509(&tmp_cert, &p, len); if (tmp_cert) { sk_X509_push(certs, tmp_cert); } else { encountered_error = true; } - } // Note we ignore any proxy key in the file. - if (data) { OPENSSL_free(data); data = nullptr;} - if (header) { OPENSSL_free(header); header = nullptr;} - if (name) { OPENSSL_free(name); name = nullptr;} + } // Note we ignore any proxy key in the file. + if (data) { + OPENSSL_free(data); + data = nullptr; + } + if (header) { + OPENSSL_free(header); + header = nullptr; + } + if (name) { + OPENSSL_free(name); + name = nullptr; + } } X509 *x509cert = nullptr; if (!encountered_error && sk_X509_num(certs)) { x509cert = findEEC(certs); } if (x509cert) { - subject = X509_NAME_oneline(X509_get_subject_name(x509cert),0,0); + subject = X509_NAME_oneline(X509_get_subject_name(x509cert), nullptr, 0); } // Note we do not free x509cert directly, as it's still owned by the certs stack. if (certs) { @@ -358,14 +528,13 @@ getX509SubjectFromFile(const std::string &filename, std::string &result) { if (subject) { result = subject; OPENSSL_free(subject); - return true; + return true; } } return false; } -bool -StatisticsSenderService::getX509Subject(std::string &result) { +bool StatisticsSenderService::getX509Subject(std::string &result) { char *filename = getenv("X509_USER_PROXY"); if (filename && getX509SubjectFromFile(filename, result)) { return true; diff --git a/Utilities/StorageFactory/test/BuildFile.xml b/Utilities/StorageFactory/test/BuildFile.xml index 27329acd2a6d8..2ce22f617536c 100644 --- a/Utilities/StorageFactory/test/BuildFile.xml +++ b/Utilities/StorageFactory/test/BuildFile.xml @@ -37,3 +37,4 @@ # to wait until the framework decides on a threading model to implement a fix. # file="threadsafe.cpp" name="test_StorageFactory_threadsafe" + diff --git a/Utilities/StorageFactory/test/make_2nd_file_cfg.py b/Utilities/StorageFactory/test/make_2nd_file_cfg.py new file mode 100644 index 0000000000000..65da742632ca9 --- /dev/null +++ b/Utilities/StorageFactory/test/make_2nd_file_cfg.py @@ -0,0 +1,10 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("SECOND") + +process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring("file:stat_sender_first.root")) + +process.o = cms.OutputModule("PoolOutputModule", fileName = cms.untracked.string("stat_sender_second.root"), outputCommands = cms.untracked.vstring("drop *")) + +process.ep = cms.EndPath(process.o) + diff --git a/Utilities/StorageFactory/test/make_test_files_cfg.py b/Utilities/StorageFactory/test/make_test_files_cfg.py new file mode 100644 index 0000000000000..4cece1abd6a99 --- /dev/null +++ b/Utilities/StorageFactory/test/make_test_files_cfg.py @@ -0,0 +1,20 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("FIRST") + +process.source = cms.Source("EmptySource") + +process.first = cms.OutputModule("PoolOutputModule", fileName = cms.untracked.string("stat_sender_first.root")) +process.b = cms.OutputModule("PoolOutputModule", fileName = cms.untracked.string("stat_sender_b.root")) +process.c = cms.OutputModule("PoolOutputModule", fileName = cms.untracked.string("stat_sender_c.root")) +process.d = cms.OutputModule("PoolOutputModule", fileName = cms.untracked.string("stat_sender_d.root")) +process.e = cms.OutputModule("PoolOutputModule", fileName = cms.untracked.string("stat_sender_e.root")) + +process.Thing = cms.EDProducer("ThingProducer") +process.OtherThing = cms.EDProducer("OtherThingProducer") +process.EventNumber = cms.EDProducer("EventNumberIntProducer") + + +process.o = cms.EndPath(process.first+process.b+process.c+process.d+process.e, cms.Task(process.Thing, process.OtherThing, process.EventNumber)) + +process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(10)) diff --git a/Utilities/StorageFactory/test/test_file_statistics_sender.sh b/Utilities/StorageFactory/test/test_file_statistics_sender.sh new file mode 100755 index 0000000000000..523da45696e34 --- /dev/null +++ b/Utilities/StorageFactory/test/test_file_statistics_sender.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Pass in name and status +function die { echo $1: status $2 ; exit $2; } + +LOCAL_TEST_DIR=${CMSSW_BASE}/src/Utilities/StorageFactory/test +LOCAL_TMP_DIR=${CMSSW_BASE}/tmp/${SCRAM_ARCH} + +pushd ${LOCAL_TMP_DIR} + +#setup files used in tests +cmsRun ${LOCAL_TEST_DIR}/make_test_files_cfg.py &> make_test_files.log || die "cmsRun make_test_files_cfg.py" $? +rm make_test_files.log +cmsRun ${LOCAL_TEST_DIR}/make_2nd_file_cfg.py &> make_2nd_file.log || die "cmsRun make_2nd_file_cfg.py" $? +rm make_2nd_file.log + +cmsRun ${LOCAL_TEST_DIR}/test_single_file_statistics_sender_cfg.py &> test_single_file_statistics_sender.log || die "cmsRun test_single_file_statistics_sender_cfg.py" $? +grep -q '"file_lfn":"file:stat_sender_first.root"' test_single_file_statistics_sender.log || die "no StatisticsSenderService output for single file" 1 +rm test_single_file_statistics_sender.log + +cmsRun ${LOCAL_TEST_DIR}/test_multiple_files_file_statistics_sender_cfg.py &> test_multiple_files_file_statistics_sender.log || die "cmsRun test_multiple_files_file_statistics_sender_cfg.py" $? +grep -q '"file_lfn":"file:stat_sender_b.root"' test_multiple_files_file_statistics_sender.log || die "no StatisticsSenderService output for file b in multiple files" 1 +grep -q '"file_lfn":"file:stat_sender_c.root"' test_multiple_files_file_statistics_sender.log || die "no StatisticsSenderService output for file c in multiple files" 1 +grep -q '"file_lfn":"file:stat_sender_d.root"' test_multiple_files_file_statistics_sender.log || die "no StatisticsSenderService output for file d in multiple files" 1 +grep -q '"file_lfn":"file:stat_sender_e.root"' test_multiple_files_file_statistics_sender.log || die "no StatisticsSenderService output for file e in multiple files" 1 +rm test_multiple_files_file_statistics_sender.log + +cmsRun ${LOCAL_TEST_DIR}/test_multi_file_statistics_sender_cfg.py &> test_multi_file_statistics_sender.log || die "cmsRun test_multi_file_statistics_sender_cfg.py" $? +grep -q '"file_lfn":"file:stat_sender_first.root"' test_multi_file_statistics_sender.log || die "no StatisticsSenderService output for file first in multi file" 1 +grep -q '"file_lfn":"file:stat_sender_second.root"' test_multi_file_statistics_sender.log || die "no StatisticsSenderService output for file second in multi file" 1 +rm test_multi_file_statistics_sender.log + +cmsRun ${LOCAL_TEST_DIR}/test_secondary_file_statistics_sender_cfg.py &> test_secondary_file_statistics_sender.log || die "cmsRun test_secondary_file_statistics_sender_cfg.py" $? +grep -q '"file_lfn":"file:stat_sender_first.root"' test_secondary_file_statistics_sender.log || die "no StatisticsSenderService output for file 'first' in secondary files" 1 +grep -q '"file_lfn":"file:stat_sender_b.root"' test_secondary_file_statistics_sender.log || die "no StatisticsSenderService output for file 'b' in secondary files" 1 +grep -q '"file_lfn":"file:stat_sender_c.root"' test_secondary_file_statistics_sender.log || die "no StatisticsSenderService output for file 'c' in secondary files" 1 +grep -q '"file_lfn":"file:stat_sender_d.root"' test_secondary_file_statistics_sender.log || die "no StatisticsSenderService output for file 'd' in secondary files" 1 +grep -q '"file_lfn":"file:stat_sender_e.root"' test_secondary_file_statistics_sender.log || die "no StatisticsSenderService output for file 'e' in secondary files" 1 +rm test_secondary_file_statistics_sender.log + +popd \ No newline at end of file diff --git a/Utilities/StorageFactory/test/test_multi_file_statistics_sender_cfg.py b/Utilities/StorageFactory/test/test_multi_file_statistics_sender_cfg.py new file mode 100644 index 0000000000000..19815adc72c35 --- /dev/null +++ b/Utilities/StorageFactory/test/test_multi_file_statistics_sender_cfg.py @@ -0,0 +1,10 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("TEST") + +process.source = cms.Source("PoolSource", + fileNames = cms.untracked.vstring("file:stat_sender_second.root"), + secondaryFileNames = cms.untracked.vstring("file:stat_sender_first.root") +) + +process.add_(cms.Service("StatisticsSenderService", debug = cms.untracked.bool(True))) \ No newline at end of file diff --git a/Utilities/StorageFactory/test/test_multiple_files_file_statistics_sender_cfg.py b/Utilities/StorageFactory/test/test_multiple_files_file_statistics_sender_cfg.py new file mode 100644 index 0000000000000..a734285bd2487 --- /dev/null +++ b/Utilities/StorageFactory/test/test_multiple_files_file_statistics_sender_cfg.py @@ -0,0 +1,10 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("TEST") + +process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring("file:stat_sender_b.root", "file:stat_sender_c.root", "file:stat_sender_d.root", "file:stat_sender_e.root"), +duplicateCheckMode = cms.untracked.string('noDuplicateCheck'), +inputCommands = cms.untracked.vstring('drop *_*_beginRun_*', 'drop *_*_endRun_*', 'drop *_*_beginLumi_*', 'drop *_*_endLumi_*') +) + +process.add_(cms.Service("StatisticsSenderService", debug = cms.untracked.bool(True))) \ No newline at end of file diff --git a/Utilities/StorageFactory/test/test_secondary_file_statistics_sender_cfg.py b/Utilities/StorageFactory/test/test_secondary_file_statistics_sender_cfg.py new file mode 100644 index 0000000000000..250d575c7f39e --- /dev/null +++ b/Utilities/StorageFactory/test/test_secondary_file_statistics_sender_cfg.py @@ -0,0 +1,44 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("TEST") + +process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring("file:stat_sender_first.root")) + +process.b = cms.EDProducer("SecondaryProducer", + seq = cms.untracked.bool(True), + input = cms.SecSource("EmbeddedRootSource", + sequential = cms.untracked.bool(True), + fileNames = cms.untracked.vstring('file:stat_sender_b.root') + ) +) + +process.c = cms.EDProducer("SecondaryProducer", + seq = cms.untracked.bool(True), + input = cms.SecSource("EmbeddedRootSource", + sequential = cms.untracked.bool(True), + fileNames = cms.untracked.vstring('file:stat_sender_c.root') + ) +) + +process.d = cms.EDProducer("SecondaryProducer", + seq = cms.untracked.bool(True), + input = cms.SecSource("EmbeddedRootSource", + sequential = cms.untracked.bool(True), + fileNames = cms.untracked.vstring('file:stat_sender_d.root') + ) +) + +process.e = cms.EDProducer("SecondaryProducer", + seq = cms.untracked.bool(True), + input = cms.SecSource("EmbeddedRootSource", + sequential = cms.untracked.bool(True), + fileNames = cms.untracked.vstring('file:stat_sender_e.root') + ) +) + +process.pB = cms.Path(process.b) +process.pC = cms.Path(process.c) +process.pD = cms.Path(process.d) +process.pE = cms.Path(process.e) + +process.add_(cms.Service("StatisticsSenderService", debug = cms.untracked.bool(True))) \ No newline at end of file diff --git a/Utilities/StorageFactory/test/test_single_file_statistics_sender_cfg.py b/Utilities/StorageFactory/test/test_single_file_statistics_sender_cfg.py new file mode 100644 index 0000000000000..0576542a5e934 --- /dev/null +++ b/Utilities/StorageFactory/test/test_single_file_statistics_sender_cfg.py @@ -0,0 +1,11 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("TEST") + +process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring("file:stat_sender_first.root")) + +process.add_(cms.Service("StatisticsSenderService", debug = cms.untracked.bool(True))) + +process.load("FWCore.MessageService.MessageLogger_cfi") + +process.MessageLogger.cerr.INFO.limit = 1000 \ No newline at end of file diff --git a/Utilities/XrdAdaptor/src/XrdRequestManager.cc b/Utilities/XrdAdaptor/src/XrdRequestManager.cc index 24bcddc33d582..91477a41e4c47 100644 --- a/Utilities/XrdAdaptor/src/XrdRequestManager.cc +++ b/Utilities/XrdAdaptor/src/XrdRequestManager.cc @@ -18,7 +18,7 @@ #include "Utilities/XrdAdaptor/src/XrdRequestManager.h" #include "Utilities/XrdAdaptor/src/XrdHostHandler.hh" -#define XRD_CL_MAX_CHUNK 512*1024 +#define XRD_CL_MAX_CHUNK 512 * 1024 #define XRD_ADAPTOR_SHORT_OPEN_DELAY 5 @@ -29,35 +29,32 @@ #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 0 #else #define XRD_ADAPTOR_OPEN_PROBE_PERCENT 10 -#define XRD_ADAPTOR_LONG_OPEN_DELAY 2*60 +#define XRD_ADAPTOR_LONG_OPEN_DELAY 2 * 60 #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 100 #endif #define XRD_ADAPTOR_CHUNK_THRESHOLD 1000 - #ifdef __MACH__ #include #include -#define GET_CLOCK_MONOTONIC(ts) \ -{ \ - clock_serv_t cclock; \ - mach_timespec_t mts; \ - host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock); \ - clock_get_time(cclock, &mts); \ - mach_port_deallocate(mach_task_self(), cclock); \ - ts.tv_sec = mts.tv_sec; \ - ts.tv_nsec = mts.tv_nsec; \ -} +#define GET_CLOCK_MONOTONIC(ts) \ + { \ + clock_serv_t cclock; \ + mach_timespec_t mts; \ + host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock); \ + clock_get_time(cclock, &mts); \ + mach_port_deallocate(mach_task_self(), cclock); \ + ts.tv_sec = mts.tv_sec; \ + ts.tv_nsec = mts.tv_nsec; \ + } #else -#define GET_CLOCK_MONOTONIC(ts) \ - clock_gettime(CLOCK_MONOTONIC, &ts); +#define GET_CLOCK_MONOTONIC(ts) clock_gettime(CLOCK_MONOTONIC, &ts); #endif using namespace XrdAdaptor; -long long timeDiffMS(const timespec &a, const timespec &b) -{ +long long timeDiffMS(const timespec &a, const timespec &b) { long long diff = (a.tv_sec - b.tv_sec) * 1000; diff += (a.tv_nsec - b.tv_nsec) / 1e6; return diff; @@ -67,51 +64,45 @@ long long timeDiffMS(const timespec &a, const timespec &b) * We do not care about the response of sending the monitoring information; * this handler class simply frees any returned buffer to prevent memory leaks. */ -class SendMonitoringInfoHandler : boost::noncopyable, public XrdCl::ResponseHandler -{ - void HandleResponse(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response) override - { - if (response) - { - XrdCl::Buffer *buffer = nullptr; - response->Get(buffer); - response->Set(static_cast(nullptr)); - delete buffer; - } - // Send Info has a response object; we must delete it. - delete response; - delete status; +class SendMonitoringInfoHandler : boost::noncopyable, public XrdCl::ResponseHandler { + void HandleResponse(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response) override { + if (response) { + XrdCl::Buffer *buffer = nullptr; + response->Get(buffer); + response->Set(static_cast(nullptr)); + delete buffer; } + // Send Info has a response object; we must delete it. + delete response; + delete status; + } }; [[cms::thread_safe]] SendMonitoringInfoHandler nullHandler; +static void SendMonitoringInfo(XrdCl::File &file) { + // Do not send this to a dCache data server as they return an error. + // In some versions of dCache, sending the monitoring information causes + // the server to close the connection - resulting in failures. + if (Source::isDCachePool(file)) { + return; + } -static void -SendMonitoringInfo(XrdCl::File &file) -{ - // Do not send this to a dCache data server as they return an error. - // In some versions of dCache, sending the monitoring information causes - // the server to close the connection - resulting in failures. - if (Source::isDCachePool(file)) {return;} - - // Send the monitoring info, if available. - const char * jobId = edm::storage::StatisticsSenderService::getJobID(); - std::string lastUrl; - file.GetProperty("LastURL", lastUrl); - if (jobId && !lastUrl.empty()) - { - XrdCl::URL url(lastUrl); - XrdCl::FileSystem fs(url); - if (!(fs.SendInfo(jobId, &nullHandler, 30).IsOK())) - { - edm::LogWarning("XrdAdaptorInternal") << "Failed to send the monitoring information, monitoring ID is " << jobId << "."; - } - edm::LogInfo("XrdAdaptorInternal") << "Set monitoring ID to " << jobId << "."; + // Send the monitoring info, if available. + const char *jobId = edm::storage::StatisticsSenderService::getJobID(); + std::string lastUrl; + file.GetProperty("LastURL", lastUrl); + if (jobId && !lastUrl.empty()) { + XrdCl::URL url(lastUrl); + XrdCl::FileSystem fs(url); + if (!(fs.SendInfo(jobId, &nullHandler, 30).IsOK())) { + edm::LogWarning("XrdAdaptorInternal") + << "Failed to send the monitoring information, monitoring ID is " << jobId << "."; } + edm::LogInfo("XrdAdaptorInternal") << "Set monitoring ID to " << jobId << "."; + } } - RequestManager::RequestManager(const std::string &filename, XrdCl::OpenFlags::Flags flags, XrdCl::Access::Mode perms) : m_serverToAdvertise(nullptr), m_timeout(XRD_DEFAULT_TIMEOUT), @@ -119,26 +110,21 @@ RequestManager::RequestManager(const std::string &filename, XrdCl::OpenFlags::Fl m_name(filename), m_flags(flags), m_perms(perms), - m_distribution(0,100), - m_excluded_active_count(0) -{ -} - + m_distribution(0, 100), + m_excluded_active_count(0) {} -void -RequestManager::initialize(std::weak_ptr self) -{ +void RequestManager::initialize(std::weak_ptr self) { m_open_handler = OpenHandler::getInstance(self); XrdCl::Env *env = XrdCl::DefaultEnv::GetEnv(); - if (env) {env->GetInt("StreamErrorWindow", m_timeout);} + if (env) { + env->GetInt("StreamErrorWindow", m_timeout); + } std::string orig_site; - if (!Source::getXrootdSiteFromURL(m_name, orig_site) && (orig_site.find(".") == std::string::npos)) - { + if (!Source::getXrootdSiteFromURL(m_name, orig_site) && (orig_site.find(".") == std::string::npos)) { std::string hostname; - if (Source::getHostname(orig_site, hostname)) - { + if (Source::getHostname(orig_site, hostname)) { Source::getDomain(hostname, orig_site); } } @@ -148,26 +134,24 @@ RequestManager::initialize(std::weak_ptr self) bool validFile = false; const int retries = 5; std::string excludeString; - for (int idx=0; idxOpen(new_filename, m_flags, m_perms, &handler); - if (!openStatus.IsOK()) - { // In this case, we failed immediately - this indicates we have previously tried to talk to this + if (!openStatus + .IsOK()) { // In this case, we failed immediately - this indicates we have previously tried to talk to this // server and it was marked bad - xrootd couldn't even queue up the request internally! // In practice, we obsere this happening when the call to getXrootdSiteFromURL fails due to the // redirector being down or authentication failures. ex.clearMessage(); ex.clearContext(); ex.clearAdditionalInfo(); - ex << "XrdCl::File::Open(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ") => error '" << openStatus.ToStr() - << "' (errno=" << openStatus.errNo << ", code=" << openStatus.code << ")"; + ex << "XrdCl::File::Open(name='" << m_name << "', flags=0x" << std::hex << m_flags << ", permissions=0" + << std::oct << m_perms << std::dec << ") => error '" << openStatus.ToStr() << "' (errno=" << openStatus.errNo + << ", code=" << openStatus.code << ")"; ex.addContext("Calling XrdFile::open()"); ex.addAdditionalInfo("Remote server already encountered a fatal error; no redirections were performed."); throw ex; @@ -177,56 +161,46 @@ RequestManager::initialize(std::weak_ptr self) std::unique_ptr hostList = handler.GetHosts(); Source::determineHostExcludeString(*file, hostList.get(), excludeString); assert(status); - if (status->IsOK()) - { + if (status->IsOK()) { validFile = true; break; - } - else - { + } else { ex.clearMessage(); ex.clearContext(); ex.clearAdditionalInfo(); - ex << "XrdCl::File::Open(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ") => error '" << status->ToStr() - << "' (errno=" << status->errNo << ", code=" << status->code << ")"; + ex << "XrdCl::File::Open(name='" << m_name << "', flags=0x" << std::hex << m_flags << ", permissions=0" + << std::oct << m_perms << std::dec << ") => error '" << status->ToStr() << "' (errno=" << status->errNo + << ", code=" << status->code << ")"; ex.addContext("Calling XrdFile::open()"); addConnections(ex); std::string dataServer, lastUrl; file->GetProperty("DataServer", dataServer); file->GetProperty("LastURL", lastUrl); - if (!dataServer.empty()) - { + if (!dataServer.empty()) { ex.addAdditionalInfo("Problematic data server: " + dataServer); } - if (!lastUrl.empty()) - { + if (!lastUrl.empty()) { ex.addAdditionalInfo("Last URL tried: " + lastUrl); edm::LogWarning("XrdAdaptorInternal") << "Failed to open file at URL " << lastUrl << "."; } - if (std::find(m_disabledSourceStrings.begin(), m_disabledSourceStrings.end(), dataServer) != m_disabledSourceStrings.end()) - { + if (std::find(m_disabledSourceStrings.begin(), m_disabledSourceStrings.end(), dataServer) != + m_disabledSourceStrings.end()) { ex << ". No additional data servers were found."; throw ex; } - if (!dataServer.empty()) - { + if (!dataServer.empty()) { m_disabledSourceStrings.insert(dataServer); m_disabledExcludeStrings.insert(excludeString); } // In this case, we didn't go anywhere - we stayed at the redirector and it gave us a file-not-found. - if (lastUrl == new_filename) - { + if (lastUrl == new_filename) { edm::LogWarning("XrdAdaptorInternal") << lastUrl << ", " << new_filename; throw ex; } } } - if (!validFile) - { - throw ex; + if (!validFile) { + throw ex; } SendMonitoringInfo(*file); @@ -255,221 +229,223 @@ RequestManager::initialize(std::weak_ptr self) * from an edm-managed thread. It CANNOT be called from an Xrootd-managed * thread. */ -void -RequestManager::updateCurrentServer() -{ - // NOTE: we use memory_order_relaxed here, meaning that we may actually miss - // a pending update. *However*, since we call this for every read, we'll get it - // eventually. - if (likely(!m_serverToAdvertise.load(std::memory_order_relaxed))) {return;} - std::string *hostname_ptr; - if ((hostname_ptr = m_serverToAdvertise.exchange(nullptr))) - { - std::unique_ptr hostname(hostname_ptr); - edm::Service statsService; - if (statsService.isAvailable()) { - statsService->setCurrentServer(*hostname_ptr); - } +void RequestManager::updateCurrentServer() { + // NOTE: we use memory_order_relaxed here, meaning that we may actually miss + // a pending update. *However*, since we call this for every read, we'll get it + // eventually. + if (likely(!m_serverToAdvertise.load(std::memory_order_relaxed))) { + return; + } + std::string *hostname_ptr; + if ((hostname_ptr = m_serverToAdvertise.exchange(nullptr))) { + std::unique_ptr hostname(hostname_ptr); + edm::Service statsService; + if (statsService.isAvailable()) { + statsService->setCurrentServer(m_name, *hostname_ptr); } + } } - -void -RequestManager::queueUpdateCurrentServer(const std::string &id) -{ - auto hostname = std::make_unique(id); - if (Source::getHostname(id, *hostname)) - { - std::string *null_hostname = nullptr; - if (m_serverToAdvertise.compare_exchange_strong(null_hostname, hostname.get())) - { - hostname.release(); - } +void RequestManager::queueUpdateCurrentServer(const std::string &id) { + auto hostname = std::make_unique(id); + if (Source::getHostname(id, *hostname)) { + std::string *null_hostname = nullptr; + if (m_serverToAdvertise.compare_exchange_strong(null_hostname, hostname.get())) { + hostname.release(); } + } } -namespace { - std::string formatSites(std::vector > const& iSources) { +namespace { + std::string formatSites(std::vector> const &iSources) { std::string siteA, siteB; - if (!iSources.empty()) {siteA = iSources[0]->Site();} - if (iSources.size() == 2) {siteB = iSources[1]->Site();} + if (!iSources.empty()) { + siteA = iSources[0]->Site(); + } + if (iSources.size() == 2) { + siteB = iSources[1]->Site(); + } std::string siteList = siteA; - if (!siteB.empty() && (siteB != siteA)) {siteList = siteA + ", " + siteB;} + if (!siteB.empty() && (siteB != siteA)) { + siteList = siteA + ", " + siteB; + } return siteList; } -} - -void -RequestManager::reportSiteChange(std::vector > const& iOld, - std::vector > const& iNew, - std::string orig_site) const -{ +} // namespace + +void RequestManager::reportSiteChange(std::vector> const &iOld, + std::vector> const &iNew, + std::string orig_site) const { auto siteList = formatSites(iNew); - if (!orig_site.empty() && (orig_site != siteList)) - { + if (!orig_site.empty() && (orig_site != siteList)) { edm::LogWarning("XrdAdaptor") << "Data is served from " << siteList << " instead of original site " << orig_site; - } - else { + } else { auto oldSites = formatSites(iOld); - if (orig_site.empty() && (siteList != oldSites)) - { - if (!oldSites.empty() ) + if (orig_site.empty() && (siteList != oldSites)) { + if (!oldSites.empty()) edm::LogWarning("XrdAdaptor") << "Data is now served from " << siteList << " instead of previous " << oldSites; } } } - -void -RequestManager::checkSources(timespec &now, IOSize requestSize, - std::vector>& activeSources, - std::vector>& inactiveSources) -{ - edm::LogVerbatim("XrdAdaptorInternal") << "Time since last check " - << timeDiffMS(now, m_lastSourceCheck) << "; last check " - << m_lastSourceCheck.tv_sec << "; now " < 1000) - { - { // Be more aggressive about getting rid of very bad sources. +void RequestManager::checkSources(timespec &now, + IOSize requestSize, + std::vector> &activeSources, + std::vector> &inactiveSources) { + edm::LogVerbatim("XrdAdaptorInternal") << "Time since last check " << timeDiffMS(now, m_lastSourceCheck) + << "; last check " << m_lastSourceCheck.tv_sec << "; now " << now.tv_sec + << "; next check " << m_nextActiveSourceCheck.tv_sec << std::endl; + if (timeDiffMS(now, m_lastSourceCheck) > 1000) { + { // Be more aggressive about getting rid of very bad sources. compareSources(now, 0, 1, activeSources, inactiveSources); compareSources(now, 1, 0, activeSources, inactiveSources); } - if (timeDiffMS(now, m_nextActiveSourceCheck) > 0) - { + if (timeDiffMS(now, m_nextActiveSourceCheck) > 0) { checkSourcesImpl(now, requestSize, activeSources, inactiveSources); } } } - -bool -RequestManager::compareSources(const timespec &now, unsigned a, unsigned b, - std::vector>& activeSources, - std::vector>& inactiveSources) const -{ - if (activeSources.size() < std::max(a, b)+1) {return false;} +bool RequestManager::compareSources(const timespec &now, + unsigned a, + unsigned b, + std::vector> &activeSources, + std::vector> &inactiveSources) const { + if (activeSources.size() < std::max(a, b) + 1) { + return false; + } bool findNewSource = false; if ((activeSources[a]->getQuality() > 5130) || - ((activeSources[a]->getQuality() > 260) && (activeSources[b]->getQuality()*4 < activeSources[a]->getQuality()))) - { - edm::LogVerbatim("XrdAdaptorInternal") << "Removing " - << activeSources[a]->PrettyID() << " from active sources due to poor quality (" - << activeSources[a]->getQuality() << " vs " << activeSources[b]->getQuality() << ")" << std::endl; - if (activeSources[a]->getLastDowngrade().tv_sec != 0) {findNewSource = true;} + ((activeSources[a]->getQuality() > 260) && + (activeSources[b]->getQuality() * 4 < activeSources[a]->getQuality()))) { + edm::LogVerbatim("XrdAdaptorInternal") + << "Removing " << activeSources[a]->PrettyID() << " from active sources due to poor quality (" + << activeSources[a]->getQuality() << " vs " << activeSources[b]->getQuality() << ")" << std::endl; + if (activeSources[a]->getLastDowngrade().tv_sec != 0) { + findNewSource = true; + } activeSources[a]->setLastDowngrade(now); inactiveSources.emplace_back(activeSources[a]); auto oldSources = activeSources; - activeSources.erase(activeSources.begin()+a); - reportSiteChange(oldSources,activeSources); + activeSources.erase(activeSources.begin() + a); + reportSiteChange(oldSources, activeSources); } return findNewSource; } -void -RequestManager::checkSourcesImpl(timespec &now, - IOSize requestSize, - std::vector>& activeSources, - std::vector>& inactiveSources) -{ - +void RequestManager::checkSourcesImpl(timespec &now, + IOSize requestSize, + std::vector> &activeSources, + std::vector> &inactiveSources) { bool findNewSource = false; - if (activeSources.size() <= 1) - { + if (activeSources.size() <= 1) { findNewSource = true; - } - else if (activeSources.size() > 1) - { - edm::LogVerbatim("XrdAdaptorInternal") << "Source 0 quality " << activeSources[0]->getQuality() << ", source 1 quality " << activeSources[1]->getQuality() << std::endl; - findNewSource |= compareSources(now, 0, 1, activeSources,inactiveSources); - findNewSource |= compareSources(now, 1, 0,activeSources, inactiveSources); + } else if (activeSources.size() > 1) { + edm::LogVerbatim("XrdAdaptorInternal") << "Source 0 quality " << activeSources[0]->getQuality() + << ", source 1 quality " << activeSources[1]->getQuality() << std::endl; + findNewSource |= compareSources(now, 0, 1, activeSources, inactiveSources); + findNewSource |= compareSources(now, 1, 0, activeSources, inactiveSources); // NOTE: We could probably replace the copy with a better sort function. // However, there are typically very few sources and the correctness is more obvious right now. - std::vector > eligibleInactiveSources; eligibleInactiveSources.reserve(inactiveSources.size()); - for (const auto & source : inactiveSources) - { - if (timeDiffMS(now, source->getLastDowngrade()) > (XRD_ADAPTOR_SHORT_OPEN_DELAY-1)*1000) {eligibleInactiveSources.push_back(source);} - } - auto bestInactiveSource = std::min_element(eligibleInactiveSources.begin(), eligibleInactiveSources.end(), - [](const std::shared_ptr &s1, const std::shared_ptr &s2) {return s1->getQuality() < s2->getQuality();}); - auto worstActiveSource = std::max_element(activeSources.cbegin(), activeSources.cend(), - [](const std::shared_ptr &s1, const std::shared_ptr &s2) {return s1->getQuality() < s2->getQuality();}); - if (bestInactiveSource != eligibleInactiveSources.end() && bestInactiveSource->get()) - { - edm::LogVerbatim("XrdAdaptorInternal") << "Best inactive source: " <<(*bestInactiveSource)->PrettyID() - << ", quality " << (*bestInactiveSource)->getQuality(); + std::vector> eligibleInactiveSources; + eligibleInactiveSources.reserve(inactiveSources.size()); + for (const auto &source : inactiveSources) { + if (timeDiffMS(now, source->getLastDowngrade()) > (XRD_ADAPTOR_SHORT_OPEN_DELAY - 1) * 1000) { + eligibleInactiveSources.push_back(source); + } } - edm::LogVerbatim("XrdAdaptorInternal") << "Worst active source: " <<(*worstActiveSource)->PrettyID() - << ", quality " << (*worstActiveSource)->getQuality(); - // Only upgrade the source if we only have one source and the best inactive one isn't too horrible. - // Regardless, we will want to re-evaluate the new source quickly (within 5s). - if ((bestInactiveSource != eligibleInactiveSources.end()) && activeSources.size() == 1 && ((*bestInactiveSource)->getQuality() < 4*activeSources[0]->getQuality())) - { - auto oldSources = activeSources; - activeSources.push_back(*bestInactiveSource); - reportSiteChange(oldSources, activeSources); - for (auto it = inactiveSources.begin(); it != inactiveSources.end(); it++) if (it->get() == bestInactiveSource->get()) {inactiveSources.erase(it); break;} + auto bestInactiveSource = + std::min_element(eligibleInactiveSources.begin(), + eligibleInactiveSources.end(), + [](const std::shared_ptr &s1, const std::shared_ptr &s2) { + return s1->getQuality() < s2->getQuality(); + }); + auto worstActiveSource = std::max_element(activeSources.cbegin(), + activeSources.cend(), + [](const std::shared_ptr &s1, const std::shared_ptr &s2) { + return s1->getQuality() < s2->getQuality(); + }); + if (bestInactiveSource != eligibleInactiveSources.end() && bestInactiveSource->get()) { + edm::LogVerbatim("XrdAdaptorInternal") << "Best inactive source: " << (*bestInactiveSource)->PrettyID() + << ", quality " << (*bestInactiveSource)->getQuality(); } - else while ((bestInactiveSource != eligibleInactiveSources.end()) && (*worstActiveSource)->getQuality() > (*bestInactiveSource)->getQuality()+XRD_ADAPTOR_SOURCE_QUALITY_FUDGE) - { - edm::LogVerbatim("XrdAdaptorInternal") << "Removing " << (*worstActiveSource)->PrettyID() - << " from active sources due to quality (" << (*worstActiveSource)->getQuality() - << ") and promoting " << (*bestInactiveSource)->PrettyID() << " (quality: " - << (*bestInactiveSource)->getQuality() << ")" << std::endl; + edm::LogVerbatim("XrdAdaptorInternal") << "Worst active source: " << (*worstActiveSource)->PrettyID() + << ", quality " << (*worstActiveSource)->getQuality(); + // Only upgrade the source if we only have one source and the best inactive one isn't too horrible. + // Regardless, we will want to re-evaluate the new source quickly (within 5s). + if ((bestInactiveSource != eligibleInactiveSources.end()) && activeSources.size() == 1 && + ((*bestInactiveSource)->getQuality() < 4 * activeSources[0]->getQuality())) { + auto oldSources = activeSources; + activeSources.push_back(*bestInactiveSource); + reportSiteChange(oldSources, activeSources); + for (auto it = inactiveSources.begin(); it != inactiveSources.end(); it++) + if (it->get() == bestInactiveSource->get()) { + inactiveSources.erase(it); + break; + } + } else + while ((bestInactiveSource != eligibleInactiveSources.end()) && + (*worstActiveSource)->getQuality() > + (*bestInactiveSource)->getQuality() + XRD_ADAPTOR_SOURCE_QUALITY_FUDGE) { + edm::LogVerbatim("XrdAdaptorInternal") + << "Removing " << (*worstActiveSource)->PrettyID() << " from active sources due to quality (" + << (*worstActiveSource)->getQuality() << ") and promoting " << (*bestInactiveSource)->PrettyID() + << " (quality: " << (*bestInactiveSource)->getQuality() << ")" << std::endl; (*worstActiveSource)->setLastDowngrade(now); - for (auto it = inactiveSources.begin(); it != inactiveSources.end(); it++) if (it->get() == bestInactiveSource->get()) {inactiveSources.erase(it); break;} + for (auto it = inactiveSources.begin(); it != inactiveSources.end(); it++) + if (it->get() == bestInactiveSource->get()) { + inactiveSources.erase(it); + break; + } inactiveSources.emplace_back(std::move(*worstActiveSource)); auto oldSources = activeSources; activeSources.erase(worstActiveSource); activeSources.emplace_back(std::move(*bestInactiveSource)); reportSiteChange(oldSources, activeSources); eligibleInactiveSources.clear(); - for (const auto & source : inactiveSources) if (timeDiffMS(now, source->getLastDowngrade()) > (XRD_ADAPTOR_LONG_OPEN_DELAY-1)*1000) eligibleInactiveSources.push_back(source); - bestInactiveSource = std::min_element(eligibleInactiveSources.begin(), eligibleInactiveSources.end(), - [](const std::shared_ptr &s1, const std::shared_ptr &s2) {return s1->getQuality() < s2->getQuality();}); - worstActiveSource = std::max_element(activeSources.begin(), activeSources.end(), - [](const std::shared_ptr &s1, const std::shared_ptr &s2) {return s1->getQuality() < s2->getQuality();}); - } - if (!findNewSource && (timeDiffMS(now, m_lastSourceCheck) > 1000*XRD_ADAPTOR_LONG_OPEN_DELAY)) - { - float r = m_distribution(m_generator); - if (r < XRD_ADAPTOR_OPEN_PROBE_PERCENT) - { - findNewSource = true; - } + for (const auto &source : inactiveSources) + if (timeDiffMS(now, source->getLastDowngrade()) > (XRD_ADAPTOR_LONG_OPEN_DELAY - 1) * 1000) + eligibleInactiveSources.push_back(source); + bestInactiveSource = std::min_element(eligibleInactiveSources.begin(), + eligibleInactiveSources.end(), + [](const std::shared_ptr &s1, const std::shared_ptr &s2) { + return s1->getQuality() < s2->getQuality(); + }); + worstActiveSource = std::max_element(activeSources.begin(), + activeSources.end(), + [](const std::shared_ptr &s1, const std::shared_ptr &s2) { + return s1->getQuality() < s2->getQuality(); + }); + } + if (!findNewSource && (timeDiffMS(now, m_lastSourceCheck) > 1000 * XRD_ADAPTOR_LONG_OPEN_DELAY)) { + float r = m_distribution(m_generator); + if (r < XRD_ADAPTOR_OPEN_PROBE_PERCENT) { + findNewSource = true; + } } } - if (findNewSource) - { + if (findNewSource) { m_open_handler->open(); m_lastSourceCheck = now; } // Only aggressively look for new sources if we don't have two. - if (activeSources.size() == 2) - { + if (activeSources.size() == 2) { now.tv_sec += XRD_ADAPTOR_LONG_OPEN_DELAY - XRD_ADAPTOR_SHORT_OPEN_DELAY; - } - else - { + } else { now.tv_sec += XRD_ADAPTOR_SHORT_OPEN_DELAY; } m_nextActiveSourceCheck = now; } -std::shared_ptr -RequestManager::getActiveFile() const -{ +std::shared_ptr RequestManager::getActiveFile() const { std::lock_guard sentry(m_source_mutex); - if (m_activeSources.empty()) - { + if (m_activeSources.empty()) { edm::Exception ex(edm::errors::FileReadError); - ex << "XrdAdaptor::RequestManager::getActiveFile(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ") => Source used after fatal exception."; + ex << "XrdAdaptor::RequestManager::getActiveFile(name='" << m_name << "', flags=0x" << std::hex << m_flags + << ", permissions=0" << std::oct << m_perms << std::dec << ") => Source used after fatal exception."; ex.addContext("In XrdAdaptor::RequestManager::handle()"); addConnections(ex); throw ex; @@ -477,93 +453,69 @@ RequestManager::getActiveFile() const return m_activeSources[0]->getFileHandle(); } -void -RequestManager::getActiveSourceNames(std::vector & sources) const -{ +void RequestManager::getActiveSourceNames(std::vector &sources) const { std::lock_guard sentry(m_source_mutex); sources.reserve(m_activeSources.size()); - for (auto const& source : m_activeSources) { + for (auto const &source : m_activeSources) { sources.push_back(source->ID()); } } -void -RequestManager::getPrettyActiveSourceNames(std::vector & sources) const -{ +void RequestManager::getPrettyActiveSourceNames(std::vector &sources) const { std::lock_guard sentry(m_source_mutex); sources.reserve(m_activeSources.size()); - for (auto const& source : m_activeSources) { + for (auto const &source : m_activeSources) { sources.push_back(source->PrettyID()); } } -void -RequestManager::getDisabledSourceNames(std::vector & sources) const -{ +void RequestManager::getDisabledSourceNames(std::vector &sources) const { sources.reserve(m_disabledSourceStrings.size()); - for (auto const& source : m_disabledSourceStrings) { + for (auto const &source : m_disabledSourceStrings) { sources.push_back(source); } } -void -RequestManager::addConnections(cms::Exception &ex) const -{ +void RequestManager::addConnections(cms::Exception &ex) const { std::vector sources; getPrettyActiveSourceNames(sources); - for (auto const& source : sources) - { + for (auto const &source : sources) { ex.addAdditionalInfo("Active source: " + source); } sources.clear(); getDisabledSourceNames(sources); - for (auto const& source : sources) - { + for (auto const &source : sources) { ex.addAdditionalInfo("Disabled source: " + source); } } -std::shared_ptr -RequestManager::pickSingleSource() -{ +std::shared_ptr RequestManager::pickSingleSource() { std::shared_ptr source = nullptr; { std::lock_guard sentry(m_source_mutex); - if (m_activeSources.size() == 2) - { - if (m_nextInitialSourceToggle) - { - source = m_activeSources[0]; - m_nextInitialSourceToggle = false; - } - else - { - source = m_activeSources[1]; - m_nextInitialSourceToggle = true; - } - } - else if (m_activeSources.empty()) - { - edm::Exception ex(edm::errors::FileReadError); - ex << "XrdAdaptor::RequestManager::handle read(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ") => Source used after fatal exception."; - ex.addContext("In XrdAdaptor::RequestManager::handle()"); - addConnections(ex); - throw ex; - } - else - { + if (m_activeSources.size() == 2) { + if (m_nextInitialSourceToggle) { source = m_activeSources[0]; + m_nextInitialSourceToggle = false; + } else { + source = m_activeSources[1]; + m_nextInitialSourceToggle = true; + } + } else if (m_activeSources.empty()) { + edm::Exception ex(edm::errors::FileReadError); + ex << "XrdAdaptor::RequestManager::handle read(name='" << m_name << "', flags=0x" << std::hex << m_flags + << ", permissions=0" << std::oct << m_perms << std::dec << ") => Source used after fatal exception."; + ex.addContext("In XrdAdaptor::RequestManager::handle()"); + addConnections(ex); + throw ex; + } else { + source = m_activeSources[0]; } } return source; } -std::future -RequestManager::handle(std::shared_ptr c_ptr) -{ +std::future RequestManager::handle(std::shared_ptr c_ptr) { assert(c_ptr.get()); timespec now; GET_CLOCK_MONOTONIC(now); @@ -576,7 +528,7 @@ RequestManager::handle(std::shared_ptr c_ptr) } { //make sure we update values before calling pickSingelSource - std::shared_ptr guard(nullptr, [this, &activeSources, &inactiveSources](void *) { + std::shared_ptr guard(nullptr, [this, &activeSources, &inactiveSources](void *) { std::lock_guard sentry(m_source_mutex); m_activeSources = std::move(activeSources); m_inactiveSources = std::move(inactiveSources); @@ -584,495 +536,440 @@ RequestManager::handle(std::shared_ptr c_ptr) checkSources(now, c_ptr->getSize(), activeSources, inactiveSources); } - + std::shared_ptr source = pickSingleSource(); source->handle(c_ptr); return c_ptr->get_future(); } -std::string -RequestManager::prepareOpaqueString() const -{ - std::stringstream ss; - ss << "tried="; - size_t count = 0; - { - std::lock_guard sentry(m_source_mutex); - - for ( const auto & it : m_activeSources ) - { - count++; - ss << it->ExcludeID().substr(0, it->ExcludeID().find(":")) << ","; - } - for ( const auto & it : m_inactiveSources ) - { - count++; - ss << it->ExcludeID().substr(0, it->ExcludeID().find(":")) << ","; - } - } - for ( const auto & it : m_disabledExcludeStrings ) - { - count++; - ss << it.substr(0, it.find(":")) << ","; +std::string RequestManager::prepareOpaqueString() const { + std::stringstream ss; + ss << "tried="; + size_t count = 0; + { + std::lock_guard sentry(m_source_mutex); + + for (const auto &it : m_activeSources) { + count++; + ss << it->ExcludeID().substr(0, it->ExcludeID().find(":")) << ","; } - if (count) - { - std::string tmp_str = ss.str(); - return tmp_str.substr(0, tmp_str.size()-1); + for (const auto &it : m_inactiveSources) { + count++; + ss << it->ExcludeID().substr(0, it->ExcludeID().find(":")) << ","; } - return ""; + } + for (const auto &it : m_disabledExcludeStrings) { + count++; + ss << it.substr(0, it.find(":")) << ","; + } + if (count) { + std::string tmp_str = ss.str(); + return tmp_str.substr(0, tmp_str.size() - 1); + } + return ""; } -void -XrdAdaptor::RequestManager::handleOpen(XrdCl::XRootDStatus &status, std::shared_ptr source) -{ - std::lock_guard sentry(m_source_mutex); - if (status.IsOK()) - { - edm::LogVerbatim("XrdAdaptorInternal") << "Successfully opened new source: " << source->PrettyID() << std::endl; - for (const auto & s : m_activeSources) - { - if (source->ID() == s->ID()) - { - edm::LogVerbatim("XrdAdaptorInternal") << "Xrootd server returned excluded source " << source->PrettyID() - << "; ignoring" << std::endl; - unsigned returned_count = ++m_excluded_active_count; - m_nextActiveSourceCheck.tv_sec += XRD_ADAPTOR_SHORT_OPEN_DELAY; - if (returned_count >= 3) {m_nextActiveSourceCheck.tv_sec += XRD_ADAPTOR_LONG_OPEN_DELAY - 2*XRD_ADAPTOR_SHORT_OPEN_DELAY;} - return; - } - } - for (const auto & s : m_inactiveSources) - { - if (source->ID() == s->ID()) - { - edm::LogVerbatim("XrdAdaptorInternal") << "Xrootd server returned excluded inactive source " << source->PrettyID() - << "; ignoring" << std::endl; - m_nextActiveSourceCheck.tv_sec += XRD_ADAPTOR_LONG_OPEN_DELAY - XRD_ADAPTOR_SHORT_OPEN_DELAY; - return; - } - } - if (m_activeSources.size() < 2) - { - auto oldSources = m_activeSources; - m_activeSources.push_back(source); - reportSiteChange(oldSources, m_activeSources); - queueUpdateCurrentServer(source->ID()); - } - else - { - m_inactiveSources.push_back(source); +void XrdAdaptor::RequestManager::handleOpen(XrdCl::XRootDStatus &status, std::shared_ptr source) { + std::lock_guard sentry(m_source_mutex); + if (status.IsOK()) { + edm::LogVerbatim("XrdAdaptorInternal") << "Successfully opened new source: " << source->PrettyID() << std::endl; + for (const auto &s : m_activeSources) { + if (source->ID() == s->ID()) { + edm::LogVerbatim("XrdAdaptorInternal") + << "Xrootd server returned excluded source " << source->PrettyID() << "; ignoring" << std::endl; + unsigned returned_count = ++m_excluded_active_count; + m_nextActiveSourceCheck.tv_sec += XRD_ADAPTOR_SHORT_OPEN_DELAY; + if (returned_count >= 3) { + m_nextActiveSourceCheck.tv_sec += XRD_ADAPTOR_LONG_OPEN_DELAY - 2 * XRD_ADAPTOR_SHORT_OPEN_DELAY; } + return; + } } - else - { // File-open failure - wait at least 120s before next attempt. - edm::LogVerbatim("XrdAdaptorInternal") << "Got failure when trying to open a new source" << std::endl; + for (const auto &s : m_inactiveSources) { + if (source->ID() == s->ID()) { + edm::LogVerbatim("XrdAdaptorInternal") + << "Xrootd server returned excluded inactive source " << source->PrettyID() << "; ignoring" << std::endl; m_nextActiveSourceCheck.tv_sec += XRD_ADAPTOR_LONG_OPEN_DELAY - XRD_ADAPTOR_SHORT_OPEN_DELAY; + return; + } + } + if (m_activeSources.size() < 2) { + auto oldSources = m_activeSources; + m_activeSources.push_back(source); + reportSiteChange(oldSources, m_activeSources); + queueUpdateCurrentServer(source->ID()); + } else { + m_inactiveSources.push_back(source); } + } else { // File-open failure - wait at least 120s before next attempt. + edm::LogVerbatim("XrdAdaptorInternal") << "Got failure when trying to open a new source" << std::endl; + m_nextActiveSourceCheck.tv_sec += XRD_ADAPTOR_LONG_OPEN_DELAY - XRD_ADAPTOR_SHORT_OPEN_DELAY; + } } -std::future -XrdAdaptor::RequestManager::handle(std::shared_ptr > iolist) -{ - //Use a copy of m_activeSources and m_inactiveSources throughout this function - // in order to avoid holding the lock a long time and causing a deadlock. - // When the function is over we will update the values of the containers - std::vector> activeSources, inactiveSources; - { - std::lock_guard sentry(m_source_mutex); - activeSources = m_activeSources; - inactiveSources = m_inactiveSources; - } - //Make sure we update changes when we leave the function - std::shared_ptr guard(nullptr, [this, &activeSources, &inactiveSources](void *) { - std::lock_guard sentry(m_source_mutex); - m_activeSources = std::move(activeSources); - m_inactiveSources = std::move(inactiveSources); - }); - - updateCurrentServer(); +std::future XrdAdaptor::RequestManager::handle(std::shared_ptr> iolist) { + //Use a copy of m_activeSources and m_inactiveSources throughout this function + // in order to avoid holding the lock a long time and causing a deadlock. + // When the function is over we will update the values of the containers + std::vector> activeSources, inactiveSources; + { + std::lock_guard sentry(m_source_mutex); + activeSources = m_activeSources; + inactiveSources = m_inactiveSources; + } + //Make sure we update changes when we leave the function + std::shared_ptr guard(nullptr, [this, &activeSources, &inactiveSources](void *) { + std::lock_guard sentry(m_source_mutex); + m_activeSources = std::move(activeSources); + m_inactiveSources = std::move(inactiveSources); + }); - timespec now; - GET_CLOCK_MONOTONIC(now); + updateCurrentServer(); - edm::CPUTimer timer; - timer.start(); + timespec now; + GET_CLOCK_MONOTONIC(now); - if (activeSources.size() == 1) - { - auto c_ptr = std::make_shared(*this, iolist); - checkSources(now, c_ptr->getSize(), activeSources,inactiveSources); - activeSources[0]->handle(c_ptr); - return c_ptr->get_future(); - } - // Make sure active - else if (activeSources.empty()) - { - edm::Exception ex(edm::errors::FileReadError); - ex << "XrdAdaptor::RequestManager::handle readv(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ") => Source used after fatal exception."; - ex.addContext("In XrdAdaptor::RequestManager::handle()"); - addConnections(ex); - throw ex; - } + edm::CPUTimer timer; + timer.start(); - assert(iolist.get()); - auto req1 = std::make_shared>(); - auto req2 = std::make_shared>(); - splitClientRequest(*iolist, *req1, *req2, activeSources); - - checkSources(now, req1->size() + req2->size(), activeSources, inactiveSources); - // CheckSources may have removed a source - if (activeSources.size() == 1) - { - auto c_ptr = std::make_shared(*this, iolist); - activeSources[0]->handle(c_ptr); - return c_ptr->get_future(); - } + if (activeSources.size() == 1) { + auto c_ptr = std::make_shared(*this, iolist); + checkSources(now, c_ptr->getSize(), activeSources, inactiveSources); + activeSources[0]->handle(c_ptr); + return c_ptr->get_future(); + } + // Make sure active + else if (activeSources.empty()) { + edm::Exception ex(edm::errors::FileReadError); + ex << "XrdAdaptor::RequestManager::handle readv(name='" << m_name << "', flags=0x" << std::hex << m_flags + << ", permissions=0" << std::oct << m_perms << std::dec << ") => Source used after fatal exception."; + ex.addContext("In XrdAdaptor::RequestManager::handle()"); + addConnections(ex); + throw ex; + } - std::shared_ptr c_ptr1, c_ptr2; - std::future future1, future2; - if (!req1->empty()) - { - c_ptr1.reset(new XrdAdaptor::ClientRequest(*this, req1)); - activeSources[0]->handle(c_ptr1); - future1 = c_ptr1->get_future(); - } - if (!req2->empty()) - { - c_ptr2.reset(new XrdAdaptor::ClientRequest(*this, req2)); - activeSources[1]->handle(c_ptr2); - future2 = c_ptr2->get_future(); - } - if (!req1->empty() && !req2->empty()) - { - std::future task = std::async(std::launch::deferred, - [](std::future a, std::future b){ - // Wait until *both* results are available. This is essential - // as the callback may try referencing the RequestManager. If one - // throws an exception (causing the RequestManager to be destroyed by - // XrdFile) and the other has a failure, then the recovery code will - // reference the destroyed RequestManager. - // - // Unlike other places where we use shared/weak ptrs to maintain object - // lifetime and destruction asynchronously, we *cannot* destroy the request - // asynchronously as it is associated with a ROOT buffer. We must wait until we - // are guaranteed that XrdCl will not write into the ROOT buffer before we - // can return. - b.wait(); a.wait(); - return b.get() + a.get(); - }, - std::move(future1), - std::move(future2)); - timer.stop(); - //edm::LogVerbatim("XrdAdaptorInternal") << "Total time to create requests " << static_cast(1000*timer.realTime()) << std::endl; - return task; - } - else if (!req1->empty()) { return future1; } - else if (!req2->empty()) { return future2; } - else - { // Degenerate case - no bytes to read. - std::promise p; p.set_value(0); - return p.get_future(); - } + assert(iolist.get()); + auto req1 = std::make_shared>(); + auto req2 = std::make_shared>(); + splitClientRequest(*iolist, *req1, *req2, activeSources); + + checkSources(now, req1->size() + req2->size(), activeSources, inactiveSources); + // CheckSources may have removed a source + if (activeSources.size() == 1) { + auto c_ptr = std::make_shared(*this, iolist); + activeSources[0]->handle(c_ptr); + return c_ptr->get_future(); + } + + std::shared_ptr c_ptr1, c_ptr2; + std::future future1, future2; + if (!req1->empty()) { + c_ptr1.reset(new XrdAdaptor::ClientRequest(*this, req1)); + activeSources[0]->handle(c_ptr1); + future1 = c_ptr1->get_future(); + } + if (!req2->empty()) { + c_ptr2.reset(new XrdAdaptor::ClientRequest(*this, req2)); + activeSources[1]->handle(c_ptr2); + future2 = c_ptr2->get_future(); + } + if (!req1->empty() && !req2->empty()) { + std::future task = + std::async(std::launch::deferred, + [](std::future a, std::future b) { + // Wait until *both* results are available. This is essential + // as the callback may try referencing the RequestManager. If one + // throws an exception (causing the RequestManager to be destroyed by + // XrdFile) and the other has a failure, then the recovery code will + // reference the destroyed RequestManager. + // + // Unlike other places where we use shared/weak ptrs to maintain object + // lifetime and destruction asynchronously, we *cannot* destroy the request + // asynchronously as it is associated with a ROOT buffer. We must wait until we + // are guaranteed that XrdCl will not write into the ROOT buffer before we + // can return. + b.wait(); + a.wait(); + return b.get() + a.get(); + }, + std::move(future1), + std::move(future2)); + timer.stop(); + //edm::LogVerbatim("XrdAdaptorInternal") << "Total time to create requests " << static_cast(1000*timer.realTime()) << std::endl; + return task; + } else if (!req1->empty()) { + return future1; + } else if (!req2->empty()) { + return future2; + } else { // Degenerate case - no bytes to read. + std::promise p; + p.set_value(0); + return p.get_future(); + } } -void -RequestManager::requestFailure(std::shared_ptr c_ptr, XrdCl::Status &c_status) -{ - std::shared_ptr source_ptr = c_ptr->getCurrentSource(); - - // Fail early for invalid responses - XrdFile has a separate path for handling this. - if (c_status.code == XrdCl::errInvalidResponse) - { - edm::LogWarning("XrdAdaptorInternal") << "Invalid response when reading from " << source_ptr->PrettyID(); - XrootdException ex(c_status, edm::errors::FileReadError); - ex << "XrdAdaptor::RequestManager::requestFailure readv(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ", old source=" << source_ptr->PrettyID() - << ") => Invalid ReadV response from server"; - ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); - addConnections(ex); - throw ex; - } - edm::LogWarning("XrdAdaptorInternal") << "Request failure when reading from " << source_ptr->PrettyID(); - - // Note that we do not delete the Source itself. That is because this - // function may be called from within XrdCl::ResponseHandler::HandleResponseWithHosts - // In such a case, if you close a file in the handler, it will deadlock - m_disabledSourceStrings.insert(source_ptr->ID()); - m_disabledExcludeStrings.insert(source_ptr->ExcludeID()); - m_disabledSources.insert(source_ptr); - - std::unique_lock sentry(m_source_mutex); - if ((!m_activeSources.empty()) && (m_activeSources[0].get() == source_ptr.get())) - { - auto oldSources = m_activeSources; - m_activeSources.erase(m_activeSources.begin()); - reportSiteChange(oldSources, m_activeSources); - } - else if ((m_activeSources.size() > 1) && (m_activeSources[1].get() == source_ptr.get())) - { - auto oldSources = m_activeSources; - m_activeSources.erase(m_activeSources.begin()+1); - reportSiteChange(oldSources, m_activeSources); +void RequestManager::requestFailure(std::shared_ptr c_ptr, XrdCl::Status &c_status) { + std::shared_ptr source_ptr = c_ptr->getCurrentSource(); + + // Fail early for invalid responses - XrdFile has a separate path for handling this. + if (c_status.code == XrdCl::errInvalidResponse) { + edm::LogWarning("XrdAdaptorInternal") << "Invalid response when reading from " << source_ptr->PrettyID(); + XrootdException ex(c_status, edm::errors::FileReadError); + ex << "XrdAdaptor::RequestManager::requestFailure readv(name='" << m_name << "', flags=0x" << std::hex << m_flags + << ", permissions=0" << std::oct << m_perms << std::dec << ", old source=" << source_ptr->PrettyID() + << ") => Invalid ReadV response from server"; + ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); + addConnections(ex); + throw ex; + } + edm::LogWarning("XrdAdaptorInternal") << "Request failure when reading from " << source_ptr->PrettyID(); + + // Note that we do not delete the Source itself. That is because this + // function may be called from within XrdCl::ResponseHandler::HandleResponseWithHosts + // In such a case, if you close a file in the handler, it will deadlock + m_disabledSourceStrings.insert(source_ptr->ID()); + m_disabledExcludeStrings.insert(source_ptr->ExcludeID()); + m_disabledSources.insert(source_ptr); + + std::unique_lock sentry(m_source_mutex); + if ((!m_activeSources.empty()) && (m_activeSources[0].get() == source_ptr.get())) { + auto oldSources = m_activeSources; + m_activeSources.erase(m_activeSources.begin()); + reportSiteChange(oldSources, m_activeSources); + } else if ((m_activeSources.size() > 1) && (m_activeSources[1].get() == source_ptr.get())) { + auto oldSources = m_activeSources; + m_activeSources.erase(m_activeSources.begin() + 1); + reportSiteChange(oldSources, m_activeSources); + } + std::shared_ptr new_source; + if (m_activeSources.empty()) { + std::shared_future> future = m_open_handler->open(); + timespec now; + GET_CLOCK_MONOTONIC(now); + m_lastSourceCheck = now; + // Note we only wait for 180 seconds here. This is because we've already failed + // once and the likelihood the program has some inconsistent state is decent. + // We'd much rather fail hard than deadlock! + sentry.unlock(); + std::future_status status = future.wait_for(std::chrono::seconds(m_timeout + 10)); + if (status == std::future_status::timeout) { + XrootdException ex(c_status, edm::errors::FileOpenError); + ex << "XrdAdaptor::RequestManager::requestFailure Open(name='" << m_name << "', flags=0x" << std::hex << m_flags + << ", permissions=0" << std::oct << m_perms << std::dec << ", old source=" << source_ptr->PrettyID() + << ") => timeout when waiting for file open"; + ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); + addConnections(ex); + throw ex; + } else { + try { + new_source = future.get(); + } catch (edm::Exception &ex) { + ex.addContext("Handling XrdAdaptor::RequestManager::requestFailure()"); + ex.addAdditionalInfo("Original failed source is " + source_ptr->PrettyID()); + throw; + } } - std::shared_ptr new_source; - if (m_activeSources.empty()) - { - std::shared_future > future = m_open_handler->open(); - timespec now; - GET_CLOCK_MONOTONIC(now); - m_lastSourceCheck = now; - // Note we only wait for 180 seconds here. This is because we've already failed - // once and the likelihood the program has some inconsistent state is decent. - // We'd much rather fail hard than deadlock! - sentry.unlock(); - std::future_status status = future.wait_for(std::chrono::seconds(m_timeout+10)); - if (status == std::future_status::timeout) - { - XrootdException ex(c_status, edm::errors::FileOpenError); - ex << "XrdAdaptor::RequestManager::requestFailure Open(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ", old source=" << source_ptr->PrettyID() - << ") => timeout when waiting for file open"; - ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); - addConnections(ex); - throw ex; - } - else - { - try - { - new_source = future.get(); - } - catch (edm::Exception &ex) - { - ex.addContext("Handling XrdAdaptor::RequestManager::requestFailure()"); - ex.addAdditionalInfo("Original failed source is " + source_ptr->PrettyID()); - throw; - } - } - - if (std::find(m_disabledSourceStrings.begin(), m_disabledSourceStrings.end(), new_source->ID()) != m_disabledSourceStrings.end()) - { - // The server gave us back a data node we requested excluded. Fatal! - XrootdException ex(c_status, edm::errors::FileOpenError); - ex << "XrdAdaptor::RequestManager::requestFailure Open(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ", old source=" << source_ptr->PrettyID() - << ", new source=" << new_source->PrettyID() << ") => Xrootd server returned an excluded source"; - ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); - addConnections(ex); - throw ex; - } - sentry.lock(); - auto oldSources = m_activeSources; - m_activeSources.push_back(new_source); - reportSiteChange(oldSources,m_activeSources); - } - else - { - new_source = m_activeSources[0]; + if (std::find(m_disabledSourceStrings.begin(), m_disabledSourceStrings.end(), new_source->ID()) != + m_disabledSourceStrings.end()) { + // The server gave us back a data node we requested excluded. Fatal! + XrootdException ex(c_status, edm::errors::FileOpenError); + ex << "XrdAdaptor::RequestManager::requestFailure Open(name='" << m_name << "', flags=0x" << std::hex << m_flags + << ", permissions=0" << std::oct << m_perms << std::dec << ", old source=" << source_ptr->PrettyID() + << ", new source=" << new_source->PrettyID() << ") => Xrootd server returned an excluded source"; + ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); + addConnections(ex); + throw ex; } - new_source->handle(c_ptr); + sentry.lock(); + + auto oldSources = m_activeSources; + m_activeSources.push_back(new_source); + reportSiteChange(oldSources, m_activeSources); + } else { + new_source = m_activeSources[0]; + } + new_source->handle(c_ptr); } -static void -consumeChunkFront(size_t &front, std::vector &input, std::vector &output, IOSize chunksize) -{ - while ((chunksize > 0) && (front < input.size()) && (output.size() <= XRD_ADAPTOR_CHUNK_THRESHOLD)) - { - IOPosBuffer &io = input[front]; - IOPosBuffer &outio = output.back(); - if (io.size() > chunksize) - { - IOSize consumed; - if (!output.empty() && (outio.size() < XRD_CL_MAX_CHUNK) && (outio.offset() + static_cast(outio.size()) == io.offset())) - { - if (outio.size() + chunksize > XRD_CL_MAX_CHUNK) - { - consumed = (XRD_CL_MAX_CHUNK - outio.size()); - outio.set_size(XRD_CL_MAX_CHUNK); - } - else - { - consumed = chunksize; - outio.set_size(outio.size() + consumed); - } - } - else - { - consumed = chunksize; - output.emplace_back(IOPosBuffer(io.offset(), io.data(), chunksize)); - } - chunksize -= consumed; - IOSize newsize = io.size() - consumed; - IOOffset newoffset = io.offset() + consumed; - void* newdata = static_cast(io.data()) + consumed; - io.set_offset(newoffset); - io.set_data(newdata); - io.set_size(newsize); - } - else if (io.size() == 0) - { - front++; - } - else - { - output.push_back(io); - chunksize -= io.size(); - front++; +static void consumeChunkFront(size_t &front, + std::vector &input, + std::vector &output, + IOSize chunksize) { + while ((chunksize > 0) && (front < input.size()) && (output.size() <= XRD_ADAPTOR_CHUNK_THRESHOLD)) { + IOPosBuffer &io = input[front]; + IOPosBuffer &outio = output.back(); + if (io.size() > chunksize) { + IOSize consumed; + if (!output.empty() && (outio.size() < XRD_CL_MAX_CHUNK) && + (outio.offset() + static_cast(outio.size()) == io.offset())) { + if (outio.size() + chunksize > XRD_CL_MAX_CHUNK) { + consumed = (XRD_CL_MAX_CHUNK - outio.size()); + outio.set_size(XRD_CL_MAX_CHUNK); + } else { + consumed = chunksize; + outio.set_size(outio.size() + consumed); } + } else { + consumed = chunksize; + output.emplace_back(IOPosBuffer(io.offset(), io.data(), chunksize)); + } + chunksize -= consumed; + IOSize newsize = io.size() - consumed; + IOOffset newoffset = io.offset() + consumed; + void *newdata = static_cast(io.data()) + consumed; + io.set_offset(newoffset); + io.set_data(newdata); + io.set_size(newsize); + } else if (io.size() == 0) { + front++; + } else { + output.push_back(io); + chunksize -= io.size(); + front++; } + } } -static void -consumeChunkBack(size_t front, std::vector &input, std::vector &output, IOSize chunksize) -{ - while ((chunksize > 0) && (front < input.size()) && (output.size() <= XRD_ADAPTOR_CHUNK_THRESHOLD)) - { - IOPosBuffer &io = input.back(); - IOPosBuffer &outio = output.back(); - if (io.size() > chunksize) - { - IOSize consumed; - if (!output.empty() && (outio.size() < XRD_CL_MAX_CHUNK) && (outio.offset() + static_cast(outio.size()) == io.offset())) - { - if (outio.size() + chunksize > XRD_CL_MAX_CHUNK) - { - consumed = (XRD_CL_MAX_CHUNK - outio.size()); - outio.set_size(XRD_CL_MAX_CHUNK); - } - else - { - consumed = chunksize; - outio.set_size(outio.size() + consumed); - } - } - else - { - consumed = chunksize; - output.emplace_back(IOPosBuffer(io.offset(), io.data(), chunksize)); - } - chunksize -= consumed; - IOSize newsize = io.size() - consumed; - IOOffset newoffset = io.offset() + consumed; - void* newdata = static_cast(io.data()) + consumed; - io.set_offset(newoffset); - io.set_data(newdata); - io.set_size(newsize); - } - else if (io.size() == 0) - { - input.pop_back(); - } - else - { - output.push_back(io); - chunksize -= io.size(); - input.pop_back(); +static void consumeChunkBack(size_t front, + std::vector &input, + std::vector &output, + IOSize chunksize) { + while ((chunksize > 0) && (front < input.size()) && (output.size() <= XRD_ADAPTOR_CHUNK_THRESHOLD)) { + IOPosBuffer &io = input.back(); + IOPosBuffer &outio = output.back(); + if (io.size() > chunksize) { + IOSize consumed; + if (!output.empty() && (outio.size() < XRD_CL_MAX_CHUNK) && + (outio.offset() + static_cast(outio.size()) == io.offset())) { + if (outio.size() + chunksize > XRD_CL_MAX_CHUNK) { + consumed = (XRD_CL_MAX_CHUNK - outio.size()); + outio.set_size(XRD_CL_MAX_CHUNK); + } else { + consumed = chunksize; + outio.set_size(outio.size() + consumed); } + } else { + consumed = chunksize; + output.emplace_back(IOPosBuffer(io.offset(), io.data(), chunksize)); + } + chunksize -= consumed; + IOSize newsize = io.size() - consumed; + IOOffset newoffset = io.offset() + consumed; + void *newdata = static_cast(io.data()) + consumed; + io.set_offset(newoffset); + io.set_data(newdata); + io.set_size(newsize); + } else if (io.size() == 0) { + input.pop_back(); + } else { + output.push_back(io); + chunksize -= io.size(); + input.pop_back(); } + } } -static IOSize validateList(const std::vector req) -{ - IOSize total = 0; - off_t last_offset = -1; - for (const auto & it : req) - { - total += it.size(); - assert(it.offset() > last_offset); - last_offset = it.offset(); - assert(it.size() <= XRD_CL_MAX_CHUNK); - assert(it.offset() < 0x1ffffffffff); - } - assert(req.size() <= 1024); - return total; +static IOSize validateList(const std::vector req) { + IOSize total = 0; + off_t last_offset = -1; + for (const auto &it : req) { + total += it.size(); + assert(it.offset() > last_offset); + last_offset = it.offset(); + assert(it.size() <= XRD_CL_MAX_CHUNK); + assert(it.offset() < 0x1ffffffffff); + } + assert(req.size() <= 1024); + return total; } -void -XrdAdaptor::RequestManager::splitClientRequest(const std::vector &iolist, std::vector &req1, std::vector &req2, std::vector> const& activeSources) const -{ - if (iolist.empty()) return; - std::vector tmp_iolist(iolist.begin(), iolist.end()); - req1.reserve(iolist.size()/2+1); - req2.reserve(iolist.size()/2+1); - size_t front=0; - - // The quality of both is increased by 5 to prevent strange effects if quality is 0 for one source. - float q1 = static_cast(activeSources[0]->getQuality())+5; - float q2 = static_cast(activeSources[1]->getQuality())+5; - IOSize chunk1, chunk2; - // Make sure the chunk size is at least 1024; little point to reads less than that size. - chunk1 = std::max(static_cast(static_cast(XRD_CL_MAX_CHUNK)*(q2*q2/(q1*q1+q2*q2))), static_cast(1024)); - chunk2 = std::max(static_cast(static_cast(XRD_CL_MAX_CHUNK)*(q1*q1/(q1*q1+q2*q2))), static_cast(1024)); - - IOSize size_orig = 0; - for (const auto & it : iolist) size_orig += it.size(); - - while (tmp_iolist.size()-front > 0) - { - if ((req1.size() >= XRD_ADAPTOR_CHUNK_THRESHOLD) && (req2.size() >= XRD_ADAPTOR_CHUNK_THRESHOLD)) - { // The XrdFile::readv implementation should guarantee that no more than approximately 1024 chunks - // are passed to the request manager. However, because we have a max chunk size, we increase - // the total number slightly. Theoretically, it's possible an individual readv of total size >2GB where - // each individual chunk is >1MB could result in this firing. However, within the context of CMSSW, - // this cannot happen (ROOT uses readv for TTreeCache; TTreeCache size is 20MB). - edm::Exception ex(edm::errors::FileReadError); - ex << "XrdAdaptor::RequestManager::splitClientRequest(name='" << m_name - << "', flags=0x" << std::hex << m_flags - << ", permissions=0" << std::oct << m_perms << std::dec - << ") => Unable to split request between active servers. This is an unexpected internal error and should be reported to CMSSW developers."; - ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); - addConnections(ex); - std::stringstream ss; ss << "Original request size " << iolist.size() << "(" << size_orig << " bytes)"; - ex.addAdditionalInfo(ss.str()); - std::stringstream ss2; ss2 << "Quality source 1 " << q1-5 << ", quality source 2: " << q2-5; - ex.addAdditionalInfo(ss2.str()); - throw ex; - } - if (req1.size() < XRD_ADAPTOR_CHUNK_THRESHOLD) {consumeChunkFront(front, tmp_iolist, req1, chunk1);} - if (req2.size() < XRD_ADAPTOR_CHUNK_THRESHOLD) {consumeChunkBack(front, tmp_iolist, req2, chunk2);} +void XrdAdaptor::RequestManager::splitClientRequest(const std::vector &iolist, + std::vector &req1, + std::vector &req2, + std::vector> const &activeSources) const { + if (iolist.empty()) + return; + std::vector tmp_iolist(iolist.begin(), iolist.end()); + req1.reserve(iolist.size() / 2 + 1); + req2.reserve(iolist.size() / 2 + 1); + size_t front = 0; + + // The quality of both is increased by 5 to prevent strange effects if quality is 0 for one source. + float q1 = static_cast(activeSources[0]->getQuality()) + 5; + float q2 = static_cast(activeSources[1]->getQuality()) + 5; + IOSize chunk1, chunk2; + // Make sure the chunk size is at least 1024; little point to reads less than that size. + chunk1 = std::max(static_cast(static_cast(XRD_CL_MAX_CHUNK) * (q2 * q2 / (q1 * q1 + q2 * q2))), + static_cast(1024)); + chunk2 = std::max(static_cast(static_cast(XRD_CL_MAX_CHUNK) * (q1 * q1 / (q1 * q1 + q2 * q2))), + static_cast(1024)); + + IOSize size_orig = 0; + for (const auto &it : iolist) + size_orig += it.size(); + + while (tmp_iolist.size() - front > 0) { + if ((req1.size() >= XRD_ADAPTOR_CHUNK_THRESHOLD) && + (req2.size() >= + XRD_ADAPTOR_CHUNK_THRESHOLD)) { // The XrdFile::readv implementation should guarantee that no more than approximately 1024 chunks + // are passed to the request manager. However, because we have a max chunk size, we increase + // the total number slightly. Theoretically, it's possible an individual readv of total size >2GB where + // each individual chunk is >1MB could result in this firing. However, within the context of CMSSW, + // this cannot happen (ROOT uses readv for TTreeCache; TTreeCache size is 20MB). + edm::Exception ex(edm::errors::FileReadError); + ex << "XrdAdaptor::RequestManager::splitClientRequest(name='" << m_name << "', flags=0x" << std::hex << m_flags + << ", permissions=0" << std::oct << m_perms << std::dec + << ") => Unable to split request between active servers. This is an unexpected internal error and should be " + "reported to CMSSW developers."; + ex.addContext("In XrdAdaptor::RequestManager::requestFailure()"); + addConnections(ex); + std::stringstream ss; + ss << "Original request size " << iolist.size() << "(" << size_orig << " bytes)"; + ex.addAdditionalInfo(ss.str()); + std::stringstream ss2; + ss2 << "Quality source 1 " << q1 - 5 << ", quality source 2: " << q2 - 5; + ex.addAdditionalInfo(ss2.str()); + throw ex; } - std::sort(req1.begin(), req1.end(), [](const IOPosBuffer & left, const IOPosBuffer & right){return left.offset() < right.offset();}); - std::sort(req2.begin(), req2.end(), [](const IOPosBuffer & left, const IOPosBuffer & right){return left.offset() < right.offset();}); - - IOSize size1 = validateList(req1); - IOSize size2 = validateList(req2); + if (req1.size() < XRD_ADAPTOR_CHUNK_THRESHOLD) { + consumeChunkFront(front, tmp_iolist, req1, chunk1); + } + if (req2.size() < XRD_ADAPTOR_CHUNK_THRESHOLD) { + consumeChunkBack(front, tmp_iolist, req2, chunk2); + } + } + std::sort(req1.begin(), req1.end(), [](const IOPosBuffer &left, const IOPosBuffer &right) { + return left.offset() < right.offset(); + }); + std::sort(req2.begin(), req2.end(), [](const IOPosBuffer &left, const IOPosBuffer &right) { + return left.offset() < right.offset(); + }); - assert(size_orig == size1 + size2); + IOSize size1 = validateList(req1); + IOSize size2 = validateList(req2); - edm::LogVerbatim("XrdAdaptorInternal") << "Original request size " << iolist.size() << " (" << size_orig << " bytes) split into requests size " << req1.size() << " (" << size1 << " bytes) and " << req2.size() << " (" << size2 << " bytes)" << std::endl; -} + assert(size_orig == size1 + size2); -XrdAdaptor::RequestManager::OpenHandler::OpenHandler(std::weak_ptr manager) - : m_manager(manager) -{ + edm::LogVerbatim("XrdAdaptorInternal") << "Original request size " << iolist.size() << " (" << size_orig + << " bytes) split into requests size " << req1.size() << " (" << size1 + << " bytes) and " << req2.size() << " (" << size2 << " bytes)" << std::endl; } +XrdAdaptor::RequestManager::OpenHandler::OpenHandler(std::weak_ptr manager) : m_manager(manager) {} - // Cannot use ~OpenHandler=default as XrdCl::File is not fully - // defined in the header. -XrdAdaptor::RequestManager::OpenHandler::~OpenHandler() -{ -} - +// Cannot use ~OpenHandler=default as XrdCl::File is not fully +// defined in the header. +XrdAdaptor::RequestManager::OpenHandler::~OpenHandler() {} -void -XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts(XrdCl::XRootDStatus *status_ptr, XrdCl::AnyObject *, XrdCl::HostList *hostList_ptr) -{ +void XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts(XrdCl::XRootDStatus *status_ptr, + XrdCl::AnyObject *, + XrdCl::HostList *hostList_ptr) { // NOTE: as in XrdCl::File (synchronous), we ignore the response object. // Make sure that we set m_outstanding_open to false on exit from this function. // NOTE: we need to pass non-nullptr to unique_ptr in order for the guard to run - std::unique_ptr> outstanding_guard(this, [&](OpenHandler*){m_outstanding_open=false;}); + std::unique_ptr> outstanding_guard( + this, [&](OpenHandler *) { m_outstanding_open = false; }); std::shared_ptr source; std::unique_ptr status(status_ptr); @@ -1083,10 +980,9 @@ XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts(XrdCl::XRootDSt m_self.reset(); auto manager = m_manager.lock(); - // Manager object has already been deleted. Cleanup the - // response objects, remove our self-reference, and ignore the response. - if (!manager) - { + // Manager object has already been deleted. Cleanup the + // response objects, remove our self-reference, and ignore the response. + if (!manager) { return; } //if we need to delete the File object we must do it outside @@ -1095,119 +991,107 @@ XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts(XrdCl::XRootDSt { std::lock_guard sentry(m_mutex); - if (status->IsOK()) - { - SendMonitoringInfo(*m_file); - timespec now; - GET_CLOCK_MONOTONIC(now); + if (status->IsOK()) { + SendMonitoringInfo(*m_file); + timespec now; + GET_CLOCK_MONOTONIC(now); - std::string excludeString; - Source::determineHostExcludeString(*m_file, hostList.get(), excludeString); + std::string excludeString; + Source::determineHostExcludeString(*m_file, hostList.get(), excludeString); - source.reset(new Source(now, std::move(m_file), excludeString)); - m_promise.set_value(source); - } - else - { - releaseFile = std::move(m_file); - edm::Exception ex(edm::errors::FileOpenError); - ex << "XrdCl::File::Open(name='" << manager->m_name - << "', flags=0x" << std::hex << manager->m_flags - << ", permissions=0" << std::oct << manager->m_perms << std::dec - << ") => error '" << status->ToStr() - << "' (errno=" << status->errNo << ", code=" << status->code << ")"; - ex.addContext("In XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts()"); - manager->addConnections(ex); - - m_promise.set_exception(std::make_exception_ptr(ex)); + source.reset(new Source(now, std::move(m_file), excludeString)); + m_promise.set_value(source); + } else { + releaseFile = std::move(m_file); + edm::Exception ex(edm::errors::FileOpenError); + ex << "XrdCl::File::Open(name='" << manager->m_name << "', flags=0x" << std::hex << manager->m_flags + << ", permissions=0" << std::oct << manager->m_perms << std::dec << ") => error '" << status->ToStr() + << "' (errno=" << status->errNo << ", code=" << status->code << ")"; + ex.addContext("In XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts()"); + manager->addConnections(ex); + + m_promise.set_exception(std::make_exception_ptr(ex)); } } manager->handleOpen(*status, source); } -std::string -XrdAdaptor::RequestManager::OpenHandler::current_source() -{ - std::lock_guard sentry(m_mutex); +std::string XrdAdaptor::RequestManager::OpenHandler::current_source() { + std::lock_guard sentry(m_mutex); - if (!m_file.get()) - { - return "(no open in progress)"; - } - std::string dataServer; - m_file->GetProperty("DataServer", dataServer); - if (dataServer.empty()) { return "(unknown source)"; } - return dataServer; + if (!m_file.get()) { + return "(no open in progress)"; + } + std::string dataServer; + m_file->GetProperty("DataServer", dataServer); + if (dataServer.empty()) { + return "(unknown source)"; + } + return dataServer; } -std::shared_future > -XrdAdaptor::RequestManager::OpenHandler::open() -{ - auto manager_ptr = m_manager.lock(); - if (!manager_ptr) - { - edm::Exception ex(edm::errors::LogicError); - ex << "XrdCl::File::Open() =>" - << " error: OpenHandler called within an invalid RequestManager context." - << " This is a logic error and should be reported to the CMSSW developers."; - ex.addContext("Calling XrdAdaptor::RequestManager::OpenHandler::open()"); - throw ex; - } - RequestManager &manager = *manager_ptr; - auto self_ptr = m_self_weak.lock(); - if (!self_ptr) - { - edm::Exception ex(edm::errors::LogicError); - ex << "XrdCl::File::Open() => error: " - << "OpenHandler called after it was deleted. This is a logic error " - << "and should be reported to the CMSSW developers."; - ex.addContext("Calling XrdAdapter::RequestManager::OpenHandler::open()"); - throw ex; - } +std::shared_future> XrdAdaptor::RequestManager::OpenHandler::open() { + auto manager_ptr = m_manager.lock(); + if (!manager_ptr) { + edm::Exception ex(edm::errors::LogicError); + ex << "XrdCl::File::Open() =>" + << " error: OpenHandler called within an invalid RequestManager context." + << " This is a logic error and should be reported to the CMSSW developers."; + ex.addContext("Calling XrdAdaptor::RequestManager::OpenHandler::open()"); + throw ex; + } + RequestManager &manager = *manager_ptr; + auto self_ptr = m_self_weak.lock(); + if (!self_ptr) { + edm::Exception ex(edm::errors::LogicError); + ex << "XrdCl::File::Open() => error: " + << "OpenHandler called after it was deleted. This is a logic error " + << "and should be reported to the CMSSW developers."; + ex.addContext("Calling XrdAdapter::RequestManager::OpenHandler::open()"); + throw ex; + } - // NOTE NOTE: we look at this variable *without* the lock. This means the method - // is not thread-safe; the caller is responsible to verify it is not called from - // multiple threads simultaneously. - // - // This is done because ::open may be called from a Xrootd callback; if we - // tried to hold m_mutex here, this object's callback may also be active, hold m_mutex, - // and make a call into xrootd (when it invokes m_file.reset()). Hence, our callback - // holds our mutex and attempts to grab an Xrootd mutex; RequestManager::requestFailure holds - // an Xrootd mutex and tries to hold m_mutex. This is a classic deadlock. - if (m_outstanding_open) - { - return m_shared_future; - } - std::lock_guard sentry(m_mutex); - std::promise > new_promise; - m_promise.swap(new_promise); - m_shared_future = m_promise.get_future().share(); - - auto opaque = manager.prepareOpaqueString(); - std::string new_name = manager.m_name + ((manager.m_name.find("?") == manager.m_name.npos) ? "?" : "&") + opaque; - edm::LogVerbatim("XrdAdaptorInternal") << "Trying to open URL: " << new_name; - m_file.reset(new XrdCl::File()); - m_outstanding_open = true; - - // Always make sure we release m_file and set m_outstanding_open to false on error. - std::unique_ptr> exit_guard(this, [&](OpenHandler*){m_outstanding_open = false; m_file.reset();}); - - XrdCl::XRootDStatus status; - if (!(status = m_file->Open(new_name, manager.m_flags, manager.m_perms, this)).IsOK()) - { - edm::Exception ex(edm::errors::FileOpenError); - ex << "XrdCl::File::Open(name='" << new_name - << "', flags=0x" << std::hex << manager.m_flags - << ", permissions=0" << std::oct << manager.m_perms << std::dec - << ") => error '" << status.ToStr() - << "' (errno=" << status.errNo << ", code=" << status.code << ")"; - ex.addContext("Calling XrdAdaptor::RequestManager::OpenHandler::open()"); - manager.addConnections(ex); - throw ex; - } - exit_guard.release(); - // Have a strong self-reference for as long as the callback is in-progress. - m_self = self_ptr; + // NOTE NOTE: we look at this variable *without* the lock. This means the method + // is not thread-safe; the caller is responsible to verify it is not called from + // multiple threads simultaneously. + // + // This is done because ::open may be called from a Xrootd callback; if we + // tried to hold m_mutex here, this object's callback may also be active, hold m_mutex, + // and make a call into xrootd (when it invokes m_file.reset()). Hence, our callback + // holds our mutex and attempts to grab an Xrootd mutex; RequestManager::requestFailure holds + // an Xrootd mutex and tries to hold m_mutex. This is a classic deadlock. + if (m_outstanding_open) { return m_shared_future; + } + std::lock_guard sentry(m_mutex); + std::promise> new_promise; + m_promise.swap(new_promise); + m_shared_future = m_promise.get_future().share(); + + auto opaque = manager.prepareOpaqueString(); + std::string new_name = manager.m_name + ((manager.m_name.find("?") == manager.m_name.npos) ? "?" : "&") + opaque; + edm::LogVerbatim("XrdAdaptorInternal") << "Trying to open URL: " << new_name; + m_file.reset(new XrdCl::File()); + m_outstanding_open = true; + + // Always make sure we release m_file and set m_outstanding_open to false on error. + std::unique_ptr> exit_guard(this, [&](OpenHandler *) { + m_outstanding_open = false; + m_file.reset(); + }); + + XrdCl::XRootDStatus status; + if (!(status = m_file->Open(new_name, manager.m_flags, manager.m_perms, this)).IsOK()) { + edm::Exception ex(edm::errors::FileOpenError); + ex << "XrdCl::File::Open(name='" << new_name << "', flags=0x" << std::hex << manager.m_flags << ", permissions=0" + << std::oct << manager.m_perms << std::dec << ") => error '" << status.ToStr() << "' (errno=" << status.errNo + << ", code=" << status.code << ")"; + ex.addContext("Calling XrdAdaptor::RequestManager::OpenHandler::open()"); + manager.addConnections(ex); + throw ex; + } + exit_guard.release(); + // Have a strong self-reference for as long as the callback is in-progress. + m_self = self_ptr; + return m_shared_future; } -