Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions FWCore/Services/src/SiteLocalConfigService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,12 @@ namespace edm {
"Specify the file containing the site local config. Empty string will load from default directory.");
desc.addOptionalUntracked<std::string>("overrideSourceCacheTempDir");
desc.addOptionalUntracked<double>("overrideSourceCacheMinFree");
desc.addOptionalUntracked<std::string>("overrideSourceCacheHintDir");
desc.addOptionalUntracked<std::string>("overrideSourceCacheHintDir")
->setComment("Set cache hint. See AdaptorConfig plugin for valid values.");
desc.addOptionalUntracked<std::string>("overrideSourceCloneCacheHintDir")
->setComment("Provide an alternate cache hint for fast cloning.");
desc.addOptionalUntracked<std::string>("overrideSourceReadHint");
desc.addOptionalUntracked<std::string>("overrideSourceReadHint")
->setComment("Set read hint. See AdaptorConfig plugin for valid values.");
desc.addOptionalUntracked<std::vector<std::string> >("overrideSourceNativeProtocols");
desc.addOptionalUntracked<unsigned int>("overrideSourceTTreeCacheSize");
desc.addOptionalUntracked<unsigned int>("overrideSourceTimeout");
Expand Down
94 changes: 67 additions & 27 deletions IOPool/TFileAdaptor/src/TFileAdaptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ParameterSet/interface/PluginDescription.h"
#include "FWCore/Reflection/interface/SetClassParsing.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "FWCore/Utilities/interface/EDMException.h"
#include "Utilities/StorageFactory/interface/StorageAccount.h"
#include "Utilities/StorageFactory/interface/StorageFactory.h"
#include "Utilities/StorageFactory/interface/StorageProxyMaker.h"
#include "Utilities/StorageFactory/interface/StorageProxyMakerFactory.h"

#include <TROOT.h>
#include <TFile.h>
Expand Down Expand Up @@ -68,32 +71,26 @@ bool TFileAdaptor::native(char const* proto) const {
}

TFileAdaptor::TFileAdaptor(edm::ParameterSet const& pset, edm::ActivityRegistry& ar)
: enabled_(true),
doStats_(true),
: enabled_(pset.getUntrackedParameter<bool>("enable")),
doStats_(pset.getUntrackedParameter<bool>("stats")),
enablePrefetching_(false),
cacheHint_("auto-detect"),
readHint_("auto-detect"),
tempDir_(),
minFree_(0),
// values set in the site local config or in SiteLocalConfigService override
// any values set here for this service.
// These parameters here are needed only for backward compatibility
// for WMDM tools until we switch to only using the site local config for this info.
cacheHint_(pset.getUntrackedParameter<std::string>("cacheHint")),
readHint_(pset.getUntrackedParameter<std::string>("readHint")),
tempDir_(pset.getUntrackedParameter<std::string>("tempDir")),
minFree_(pset.getUntrackedParameter<double>("tempMinFree")),
native_(pset.getUntrackedParameter<std::vector<std::string>>("native")),
// end of section of values overridden by SiteLocalConfigService
timeout_(0U),
debugLevel_(0U),
native_() {
if (!(enabled_ = pset.getUntrackedParameter<bool>("enable", enabled_)))
debugLevel_(0U) {
if (not enabled_)
return;

using namespace edm::storage;
StorageFactory* f = StorageFactory::getToModify();
doStats_ = pset.getUntrackedParameter<bool>("stats", doStats_);

// values set in the site local config or in SiteLocalConfigService override
// any values set here for this service.
// These parameters here are needed only for backward compatibility
// for WMDM tools until we switch to only using the site local config for this info.
cacheHint_ = pset.getUntrackedParameter<std::string>("cacheHint", cacheHint_);
readHint_ = pset.getUntrackedParameter<std::string>("readHint", readHint_);
tempDir_ = pset.getUntrackedParameter<std::string>("tempDir", f->tempPath());
minFree_ = pset.getUntrackedParameter<double>("tempMinFree", f->tempMinFree());
native_ = pset.getUntrackedParameter<std::vector<std::string> >("native", native_);

ar.watchPostEndJob(this, &TFileAdaptor::termination);

Expand Down Expand Up @@ -161,6 +158,15 @@ TFileAdaptor::TFileAdaptor(edm::ParameterSet const& pset, edm::ActivityRegistry&
// tell where to save files.
f->setTempDir(tempDir_, minFree_);

// forward generic storage proxy makers
{
std::vector<std::unique_ptr<StorageProxyMaker>> makers;
for (auto const& pset : pset.getUntrackedParameter<std::vector<edm::ParameterSet>>("storageProxies")) {
makers.push_back(StorageProxyMakerFactory::get()->create(pset.getUntrackedParameter<std::string>("type"), pset));
}
f->setStorageProxyMakers(std::move(makers));
}

// set our own root plugins
TPluginManager* mgr = gROOT->GetPluginManager();

Expand Down Expand Up @@ -203,15 +209,49 @@ TFileAdaptor::TFileAdaptor(edm::ParameterSet const& pset, edm::ActivityRegistry&
}

void TFileAdaptor::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
using namespace edm::storage;
edm::ParameterSetDescription desc;
desc.addOptionalUntracked<bool>("enable");
desc.addOptionalUntracked<bool>("stats");
desc.addOptionalUntracked<std::string>("cacheHint");
desc.addOptionalUntracked<std::string>("readHint");
desc.addOptionalUntracked<std::string>("tempDir");
desc.addOptionalUntracked<double>("tempMinFree");
desc.addOptionalUntracked<std::vector<std::string> >("native");
desc.addUntracked<bool>("enable", true)->setComment("Enable or disable TFileAdaptor behavior");
desc.addUntracked<bool>("stats", true);
desc.addUntracked<std::string>("cacheHint", "auto-detect")
->setComment(
"Hint for read caching. Possible values: 'application-only', 'storage-only', 'lazy-download', 'auto-detect'. "
"The value from the SiteLocalConfigService overrides the value set here. In addition, if the "
"SiteLocalConfigService has prefetching enabled, the default hint is 'application-only'.");
desc.addUntracked<std::string>("readHint", "auto-detect")
->setComment(
"Hint for reading itself. Possible values: 'direct-unbuffered', 'read-ahead-buffered', 'auto-detect'. The "
"value from SiteLocalConfigService overrides the value set here.");
desc.addUntracked<std::string>("tempDir", StorageFactory::defaultTempDir())
->setComment(
"Colon-separated list of directories that storage implementations downloading the full file could place the "
"file. The value from SiteLocalConfigService overrides the value set here.");
desc.addUntracked<double>("tempMinFree", StorageFactory::defaultMinTempFree())
->setComment(
"Minimum amount of space in GB required for a temporary data directory specified in tempDir. The value from "
"SiteLocalConfigService overrides the value set here.");
desc.addUntracked<std::vector<std::string>>("native", {})
->setComment(
"Set of protocols for which to use a native ROOT storage implementation instead of CMSSW's StorageFactory. "
"Valid "
"values are 'file', 'http', 'ftp', 'dcache', 'dcap', 'gsidcap', 'root', or 'all' to prefer ROOT for all "
"protocols. The value from SiteLocalConfigService overrides the value set here.");

edm::ParameterSetDescription proxyMakerDesc;
proxyMakerDesc.addNode(edm::PluginDescription<edm::storage::StorageProxyMakerFactory>("type", false));
std::vector<edm::ParameterSet> proxyMakerDefaults;
desc.addVPSetUntracked("storageProxies", proxyMakerDesc, proxyMakerDefaults)
->setComment(
"Ordered list of Storage proxies the real Storage object is wrapped into. The real Storage is wrapped into "
"the first element of the list, then that proxy is wrapped into the second element of the list and so on. "
"Only after this wrapping are the LocalCacheFile (lazy-download) and statistics accounting ('stats' "
"parameter) proxies applied.");

descriptions.add("AdaptorConfig", desc);
descriptions.setComment(
"AdaptorConfig Service is used to configure the TFileAdaptor. If enabled, the TFileAdaptor registers "
"TStorageFactoryFile as a handler for various protocols. The StorageFactory facility provides custom storage "
"access implementations for these protocols, as well as statistics accounting.");
}

// Write current Storage statistics on a ostream
Expand Down
2 changes: 1 addition & 1 deletion IOPool/TFileAdaptor/src/TFileAdaptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ class TFileAdaptor {
std::string readHint_;
std::string tempDir_;
double minFree_;
std::vector<std::string> native_;
unsigned int timeout_;
unsigned int debugLevel_;
std::vector<std::string> native_;
};

namespace edm {
Expand Down
5 changes: 3 additions & 2 deletions Utilities/DCacheAdaptor/plugins/DCacheStorageMaker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ namespace edm::storage {
else
mode |= IOFlags::OpenUnbuffered;

auto file = std::make_unique<DCacheFile>(normalise(proto, path), mode);
return f->wrapNonLocalFile(std::move(file), proto, std::string(), mode);
return std::make_unique<DCacheFile>(normalise(proto, path), mode);
}

void stagein(const std::string &proto, const std::string &path, const AuxSettings &aux) const override {
Expand Down Expand Up @@ -77,6 +76,8 @@ namespace edm::storage {
return true;
}

UseLocalFile usesLocalFile() const override { return UseLocalFile::kNo; }

private:
void setTimeout(unsigned int timeout) const {
if (timeout != 0)
Expand Down
6 changes: 3 additions & 3 deletions Utilities/DavixAdaptor/plugins/DavixStorageMaker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@ namespace edm::storage {
const std::string &path,
int mode,
AuxSettings const &aux) const override {
const StorageFactory *f = StorageFactory::get();
std::string newurl((proto == "web" ? "http" : proto) + ":" + path);
auto file = std::make_unique<DavixFile>(newurl, mode);
return f->wrapNonLocalFile(std::move(file), proto, std::string(), mode);
return std::make_unique<DavixFile>(newurl, mode);
}

bool check(const std::string &proto,
Expand All @@ -43,6 +41,8 @@ namespace edm::storage {
}
return true;
}

UseLocalFile usesLocalFile() const override { return UseLocalFile::kNo; }
};
} // namespace edm::storage

Expand Down
1 change: 1 addition & 0 deletions Utilities/StorageFactory/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
<use name="FWCore/ParameterSet"/>
<use name="FWCore/PluginManager"/>
<use name="FWCore/MessageLogger"/>
<use name="FWCore/Utilities"/>
Expand Down
60 changes: 55 additions & 5 deletions Utilities/StorageFactory/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Factory interface for constructing `edm::storage::Storage` instances. Also provi
`StorageFactory` provides two implementations of `edm::storage::Storage` classes which can be used to wrap around any other `Storage` object.

### `edm::storage::LocalCacheFile`
Does memory mapped caching of the wrapped `Storage` object. This is only applied if `CACHE_HINT_LAZY_DOWNLOAD` is set for `cacheHint` or the protocol handling code explicit passes `IOFlags::OpenWrap` to `StorageFactory::wrapNonLocalFile`. The wrapping does not happen if the Storage is open for writing nor if the Storage is associated with a file on the local file system.
Does memory mapped caching of the wrapped `Storage` object. This is only applied if `CACHE_HINT_LAZY_DOWNLOAD` is set for `cacheHint` or the protocol handling code explicit passes `IOFlags::OpenWrap` to `StorageFactory::wrapNonLocalFile`. The wrapping does not happen if the Storage is open for writing nor if the Storage is associated with a file on the local file system. Note that files using the `file:` protocol _can_ end up using `LocalCacheFile` if the path is determined to be on a non-local file system.

### `edm::storage::StorageAccountProxy`
This wraps the `Storage` object and provides per protocol accounting information (e.g. number of bytes read) to `edm::storage::StorageAccount`. This is only used if `StorageFactory::accounting()` returns `true`.
Expand All @@ -27,16 +27,66 @@ A singleton used to aggragate statistics about all storage calls for each protoc
### `edm::storage::StorageAccount::StorageClassToken`
Each protocol is associated to a token for quick lookup.


## Generic storage proxies

This facility resembles the `edm::storage::LocalCacheFile` and `edm::storage::StorageAccountProxy` in the way that `edm::storage::Storage` objects constructed by the concrete `edm::storage::StorageMaker` are wrapped into other `edm::storage::Storage` objects.

The proxies are configured via `TFileAdaptor`'s `storageProxies` `VPSet` configuration parameter. The proxies are wrapped in the order they are specified in the `VPSet`, i.e. the first element wraps the concrete `edm::storage::Storage`, second element wraps the first element etc. The `edm::storage::StorageAccountProxy` and `edm::storage::LocalCacheFile` wrap the last storage proxy according to their usual behavior.

Each concrete proxy comes with two classes, the proxy class itself (inheriting from the `edm::storage::StorageProxyBase`) and a maker class (inheriting from the `edm::storage::StorageProxyMaker`). This "factory of factories" pattern is used because a maker is created once per job (in `TFileAdaptor`), and the maker object is used to create a proxy object for each file.

### Concrete proxy classes

The convention is to use the proxy class name as the plugin name for the maker, as the proxy is really what the user would care for. The headings of the subsections correspond to the plugin names.

#### `StorageTracerProxy`

The `edm::storage::StorageTracerProxy` (and the corresponding `edm::storage::StorageTracerProxyMaker`) produces a text file with a trace of all IO operations at the `StorageFactory` level. The behavior of each concrete `Storage` object (such as further splitting of read requests in `XrdAdaptor`) is not captured in these tracers. The structure of the trace file is described in a preamble in the trace file.

The plugin has a configuration parameter for a pattern for the trace files. The pattern must contain at least one `%I`. The maker has an atomic counter for the files, and all occurrences of `%I` are replaced with the value of that counter for the given file.

There is an `edmStorageTracer.py` script for doing some analyses of the traces.

The `StorageTracerProxy` also provides a way to correlate the trace entries with the rest of the framework via [MessageLogger](../../FWCore/MessageService/Readme.md) messages. These messages are issued with the DEBUG severity and `IOTrace` category. There are additional, higher-level messages as part of the `PoolSource`. To see these messages, compile the `Utilities/Storage` and `IOPool/Input` packages with `USER_CXXFLAGS="-DEDM_ML_DEBUG", and customize the MessageLogger configuration along
```py
process.MessageLogger.cerr.threshold = "DEBUG"
process.MessageLogger.debugModules = ["*"]
process.MessageLogger.IOTrace = dict()
```

#### `StorageAddLatencyProxy`

The `edm::storage::StorageAddLatencyProxy` (and the corresponding `edm::storage::StorageAddLatencyProxyMaker`) can be used to add artifical latency to the IO operations. The plugin has configuration parameters for latencies of singular reads, vector reads, singular writes, and vector writes.

If used together with `StorageTracerProxy` to e.g. simulate the behavior of high-latency storage systems with e.g. local files, the `storageProxies` `VPSet` should have `StorageAddLatencyProxy` first, followed by `StorageTracerProxy`.

### Other components

#### `edm::storage::StorageProxyBase`

Inherits from `edm::storage::Storage` and is the base class for the proxy classes.

#### `edm::storage::StorageProxyMaker`

Base class for the proxy makers.


## Related classes in other packages

### TStorageFactoryFile
Inherits from `TFile` but uses `edm::storage::Storage` instances when doing the actual read/write operations. The class explicitly uses `"tstoragefile"` when communicating with `edm::storage::StorageAccount`.

### TFileAdaptor
TFileAdaptor is a cmsRun Service. It explicitly registers the use of `TStorageFactoryFile` with ROOT's `TFile::Open` system. The parameters passed to `TFileAdaptor` are relayed to `edm::storage::StorageFactory` to setup the defaults for the job.
### `TFileAdaptor`

### CondorStatusService
`TFileAdaptor` is a cmsRun Service (with a plugin name of `AdaptorConfig`, see [IOPool/TFileAdaptor/README.md](../../IOPool/TFileAdaptor/README.md)). It explicitly registers the use of `TStorageFactoryFile` with ROOT's `TFile::Open` system. The parameters passed to `TFileAdaptor` are relayed to `edm::storage::StorageFactory` to setup the defaults for the job.

### `CondorStatusService`
Sends condor _Chirp_ messages periodically from cmsRun. These include the most recent aggregated `edm::storage::StorageAccount` information for all protocols being used except for the `"tstoragefile"` protocol.

### StatisticsSenderService
### `StatisticsSenderService`
A cmsRun Service which sends out UDP packets about the state of the system. The information is sent when a primary file closes and includes the recent aggregated `edm::storage::StorageAccount` information for all protocols being used except for the `"tstoragefile"` protocol.

### `XrdAdaptor`

A `edm::storage::Storage` implementation for xrootd (see [Utilities/XrdAdaptor/README.md](../../Utilities/XrdAdaptor/README.md)).
28 changes: 21 additions & 7 deletions Utilities/StorageFactory/interface/StorageFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
#include "Utilities/StorageFactory/interface/LocalFileSystem.h"
#include "Utilities/StorageFactory/interface/IOTypes.h"
#include "Utilities/StorageFactory/interface/IOFlags.h"
#include <string>

#include <memory>
#include <string>
#include <tuple>

#include "oneapi/tbb/concurrent_unordered_map.h"

namespace edm::storage {
class Storage;
class StorageProxyMaker;
class StorageFactory {
public:
enum CacheHint { CACHE_HINT_APPLICATION, CACHE_HINT_STORAGE, CACHE_HINT_LAZY_DOWNLOAD, CACHE_HINT_AUTO_DETECT };
Expand All @@ -20,6 +24,10 @@ namespace edm::storage {
static const StorageFactory *get(void);
static StorageFactory *getToModify(void);

// in GB
static double defaultMinTempFree() { return 4.; }
static std::string defaultTempDir();

~StorageFactory(void);

// implicit copy constructor
Expand All @@ -45,22 +53,27 @@ namespace edm::storage {
std::string tempPath(void) const;
double tempMinFree(void) const;

void setStorageProxyMakers(std::vector<std::unique_ptr<StorageProxyMaker>> makers);

void stagein(const std::string &url) const;
std::unique_ptr<Storage> open(const std::string &url, int mode = IOFlags::OpenRead) const;
std::unique_ptr<Storage> open(const std::string &url, const int mode = IOFlags::OpenRead) const;
bool check(const std::string &url, IOOffset *size = nullptr) const;

std::unique_ptr<Storage> wrapNonLocalFile(std::unique_ptr<Storage> s,
const std::string &proto,
const std::string &path,
int mode) const;

private:
typedef oneapi::tbb::concurrent_unordered_map<std::string, std::shared_ptr<StorageMaker>> MakerTable;

StorageFactory(void);
StorageMaker *getMaker(const std::string &proto) const;
StorageMaker *getMaker(const std::string &url, std::string &protocol, std::string &rest) const;

// Returns
// - Storage 's' possibly wrapped in LocalCacheFile
// - bool telling if LocalCacheFile is used
std::tuple<std::unique_ptr<Storage>, bool> wrapNonLocalFile(std::unique_ptr<Storage> s,
const std::string &proto,
const std::string &path,
const int mode) const;

mutable MakerTable m_makers;
CacheHint m_cacheHint;
ReadHint m_readHint;
Expand All @@ -72,6 +85,7 @@ namespace edm::storage {
unsigned int m_timeout;
unsigned int m_debugLevel;
LocalFileSystem m_lfs;
std::vector<std::unique_ptr<StorageProxyMaker>> m_storageProxyMakers_;
static StorageFactory s_instance;
};
} // namespace edm::storage
Expand Down
Loading