Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 58 additions & 7 deletions FWCore/Services/plugins/SimpleMemoryCheck.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@
#include <atomic>

namespace edm {
class EventID;
class Timestamp;

namespace service {
struct smapsInfo {
smapsInfo() : private_(), pss_() {}
Expand Down Expand Up @@ -95,6 +92,9 @@ namespace edm {

void postEndJob();

void startSamplingThread();
void stopSamplingThread();

private:
ProcInfo fetch();
smapsInfo fetchSmaps();
Expand Down Expand Up @@ -126,6 +126,10 @@ namespace edm {
bool jobReportOutputOnly_;
bool monitorPssAndPrivate_;
std::atomic<int> count_;
unsigned int sampleEveryNSeconds_;
std::optional<std::thread> samplingThread_;
std::atomic<bool> stopThread_ = false;
std::atomic<edm::EventID> mostRecentlyStartedEvent_;

//smaps
edm::propagate_const<FILE*> smapsFile_;
Expand Down Expand Up @@ -337,6 +341,7 @@ namespace edm {
jobReportOutputOnly_(iPS.getUntrackedParameter<bool>("jobReportOutputOnly")),
monitorPssAndPrivate_(iPS.getUntrackedParameter<bool>("monitorPssAndPrivate")),
count_(),
sampleEveryNSeconds_(iPS.getUntrackedParameter<unsigned int>("sampleEveryNSeconds")),
smapsFile_(nullptr),
smapsLineBuffer_(nullptr),
smapsLineBufferLen_(0),
Expand All @@ -350,6 +355,22 @@ namespace edm {

openFiles();

if (sampleEveryNSeconds_ > 0) {
if (oncePerEventMode_) {
throw edm::Exception(edm::errors::Configuration)
<< "'sampleEventNSeconds' and 'oncePerEventMode' cannot be used together";
}
if (moduleSummaryRequested_) {
throw edm::Exception(edm::errors::Configuration)
<< "'sampleEventNSeconds' and 'moduleSummaryRequested' cannot be used together";
}
iReg.watchPostBeginJob(this, &SimpleMemoryCheck::startSamplingThread);
iReg.watchPreEndJob(this, &SimpleMemoryCheck::stopSamplingThread);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to listen the early termination signals too, or do we call PreEndJob after those?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess with #45434 we try to call EndJob if it is possible in any way (alternative being the job terminating via std::terminate() or similar)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that the EndJob transition should be run after early termination.

Copy link
Contributor

@wddgit wddgit Sep 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree also. That is what it should be doing.

iReg.watchPostEndJob(this, &SimpleMemoryCheck::postEndJob);
iReg.watchPreEvent([this](auto const& iContext) { mostRecentlyStartedEvent_.store(iContext.eventID()); });
return;
}

if (!oncePerEventMode_) { // default, prints on increases
iReg.watchPreSourceConstruction(this, &SimpleMemoryCheck::preSourceConstruction);
iReg.watchPostSourceConstruction(this, &SimpleMemoryCheck::postSourceConstruction);
Expand Down Expand Up @@ -395,13 +416,22 @@ namespace edm {

void SimpleMemoryCheck::fillDescriptions(ConfigurationDescriptions& descriptions) {
ParameterSetDescription desc;
desc.addUntracked<int>("ignoreTotal", 1);
desc.addUntracked<int>("ignoreTotal", 1)
->setComment("Number of events/samples to finish before starting measuring and reporting.");
desc.addUntracked<unsigned int>("sampleEveryNSeconds", 0)
->setComment(
"Use a special thread to sample memory at the set rate. A value of 0 means no sampling. This option "
"cannot be used with 'oncePerEventMode' or 'moduleMemorySummary'.");
desc.addUntracked<bool>("showMallocInfo", false);
desc.addUntracked<bool>("oncePerEventMode", false);
desc.addUntracked<bool>("oncePerEventMode", false)
->setComment(
"Only check memory at the end of each event. Not as useful in multi-threaded job as other running events "
"contribute.");
desc.addUntracked<bool>("jobReportOutputOnly", false);
desc.addUntracked<bool>("monitorPssAndPrivate", false);
desc.addUntracked<bool>("moduleMemorySummary", false);
desc.addUntracked<bool>("dump", false);
desc.addUntracked<bool>("moduleMemorySummary", false)
->setComment(
"Track significant memory events for each module. This does not work well in multi-threaded jobs.");
descriptions.add("SimpleMemoryCheck", desc);
}

Expand Down Expand Up @@ -467,6 +497,27 @@ namespace edm {
}
}

void SimpleMemoryCheck::startSamplingThread() {
samplingThread_ = std::thread{[this]() {
while (not stopThread_) {
std::this_thread::sleep_for(std::chrono::duration<unsigned int>(sampleEveryNSeconds_));
++count_;
update();
if (monitorPssAndPrivate_) {
currentSmaps_ = fetchSmaps();
}
auto e = mostRecentlyStartedEvent_.load();
andPrint("sampling", "", "");
updateEventStats(e);
updateMax();
}
}};
}
void SimpleMemoryCheck::stopSamplingThread() {
stopThread_ = true;
samplingThread_->join();
}

void SimpleMemoryCheck::postEndJob() {
if (not jobReportOutputOnly_) {
LogAbsolute("MemoryReport") // changelog 1
Expand Down