-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Improve Framework behavior after exceptions in begin/end transitions (Job, Stream, ProcessBlock) #45434
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve Framework behavior after exceptions in begin/end transitions (Job, Stream, ProcessBlock) #45434
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,7 +75,9 @@ | |
| #include "FWCore/MessageLogger/interface/JobReport.h" | ||
| #include "FWCore/MessageLogger/interface/MessageLogger.h" | ||
| #include "FWCore/ServiceRegistry/interface/Service.h" | ||
| #include "FWCore/ServiceRegistry/interface/ServiceRegistry.h" | ||
| #include "FWCore/ServiceRegistry/interface/ServiceRegistryfwd.h" | ||
| #include "FWCore/ServiceRegistry/interface/ServiceToken.h" | ||
| #include "FWCore/ServiceRegistry/interface/StreamContext.h" | ||
| #include "FWCore/Concurrency/interface/FunctorTask.h" | ||
| #include "FWCore/Concurrency/interface/WaitingTaskHolder.h" | ||
|
|
@@ -88,8 +90,10 @@ | |
| #include "FWCore/Utilities/interface/propagate_const.h" | ||
| #include "FWCore/Utilities/interface/thread_safety_macros.h" | ||
|
|
||
| #include <exception> | ||
| #include <map> | ||
| #include <memory> | ||
| #include <mutex> | ||
| #include <set> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
@@ -111,8 +115,6 @@ namespace edm { | |
| class PathStatusInserter; | ||
| class EndPathStatusInserter; | ||
| class PreallocationConfiguration; | ||
| class WaitingTaskHolder; | ||
|
|
||
| class ConditionalTaskHelper; | ||
|
|
||
| namespace service { | ||
|
|
@@ -123,7 +125,6 @@ namespace edm { | |
| public: | ||
| typedef std::vector<std::string> vstring; | ||
| typedef std::vector<Path> TrigPaths; | ||
| typedef std::vector<Path> NonTrigPaths; | ||
| typedef std::shared_ptr<HLTGlobalStatus> TrigResPtr; | ||
| typedef std::shared_ptr<HLTGlobalStatus const> TrigResConstPtr; | ||
| typedef std::shared_ptr<Worker> WorkerPtr; | ||
|
|
@@ -162,7 +163,7 @@ namespace edm { | |
| bool cleaningUpAfterException = false); | ||
|
|
||
| void beginStream(); | ||
| void endStream(); | ||
| void endStream(ExceptionCollector& collector, std::mutex& collectorMutex) noexcept; | ||
|
|
||
| StreamID streamID() const { return streamID_; } | ||
|
|
||
|
|
@@ -306,12 +307,9 @@ namespace edm { | |
| void preScheduleSignal(StreamContext const*) const; | ||
|
|
||
| template <typename T> | ||
| void postScheduleSignal(StreamContext const*, ServiceWeakToken const&, std::exception_ptr&) const noexcept; | ||
| void postScheduleSignal(StreamContext const*, std::exception_ptr&) const noexcept; | ||
|
|
||
| void handleException(StreamContext const&, | ||
| ServiceWeakToken const&, | ||
| bool cleaningUpAfterException, | ||
| std::exception_ptr&) const noexcept; | ||
| void handleException(StreamContext const&, bool cleaningUpAfterException, std::exception_ptr&) const noexcept; | ||
|
|
||
| WorkerManager workerManagerBeginEnd_; | ||
| WorkerManager workerManagerRuns_; | ||
|
|
@@ -370,11 +368,15 @@ namespace edm { | |
| auto doneTask = make_waiting_task([this, iHolder = std::move(iHolder), cleaningUpAfterException, weakToken]( | ||
| std::exception_ptr const* iPtr) mutable { | ||
| std::exception_ptr excpt; | ||
| if (iPtr) { | ||
| excpt = *iPtr; | ||
| handleException(streamContext_, weakToken, cleaningUpAfterException, excpt); | ||
| } | ||
| postScheduleSignal<T>(&streamContext_, weakToken, excpt); | ||
| { | ||
| ServiceRegistry::Operate op(weakToken.lock()); | ||
|
|
||
| if (iPtr) { | ||
| excpt = *iPtr; | ||
| handleException(streamContext_, cleaningUpAfterException, excpt); | ||
| } | ||
| postScheduleSignal<T>(&streamContext_, excpt); | ||
| } // release service token before calling doneWaiting | ||
| iHolder.doneWaiting(excpt); | ||
| }); | ||
|
|
||
|
|
@@ -391,7 +393,10 @@ namespace edm { | |
| preScheduleSignal<T>(&streamContext_); | ||
| workerManager->resetAll(); | ||
| } catch (...) { | ||
| h.doneWaiting(std::current_exception()); | ||
| // Just remember the exception at this point, | ||
| // let the destructor of h call doneWaiting() so the | ||
| // ServiceRegistry::Operator object is destroyed first | ||
| h.presetTaskAsFailed(std::current_exception()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you explain what is achieved by this change? AFAIU it moves the signaling of completion from here to wherever the lambda object is destructed.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is good to let the ServiceRegistry::Operate object go out of scope and be destroyed before WaitingTaskHolder::doneWaiting is called. I think there is a very small chance of major problems if the ServiceToken stays around until the near the end of the job when things are being shutdown and destroyed.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a comment in the code along "doneWaiting() signaling is left to the the destructor of h in order to ServiceRegistry::Operator object to be destructed first"?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Slightly reworded the comment. |
||
| return; | ||
| } | ||
|
|
||
|
|
@@ -430,13 +435,9 @@ namespace edm { | |
|
|
||
| template <typename T> | ||
| void StreamSchedule::postScheduleSignal(StreamContext const* streamContext, | ||
| ServiceWeakToken const& weakToken, | ||
| std::exception_ptr& excpt) const noexcept { | ||
| try { | ||
| convertException::wrap([this, &weakToken, streamContext]() { | ||
| ServiceRegistry::Operate op(weakToken.lock()); | ||
| T::postScheduleSignal(actReg_.get(), streamContext); | ||
| }); | ||
| convertException::wrap([this, streamContext]() { T::postScheduleSignal(actReg_.get(), streamContext); }); | ||
| } catch (cms::Exception& ex) { | ||
| if (not excpt) { | ||
| std::ostringstream ost; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the activation of Services here, instead of separately in
handleException()andpostScheduleSignal(), a simplification, or is there a deeper reason?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this PR, beginStream and endStream are now using those functions. They were not using them before. As currently implemented the service token is not available in beginStream and endStream and the services are turned on at a higher level for those. I wanted to share that code for those transitions also. Partially, it is also a simplification.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, thanks.