14#include "GaudiKernel/IScheduler.h"
15#include "GaudiKernel/ServiceHandle.h"
18#include "valgrind/valgrind.h"
23 if (RUNNING_ON_VALGRIND) {
24 ATH_MSG_INFO(
"Detected running on valgrind. Disabling algorithm timeout");
26 return StatusCode::SUCCESS;
33 incSvc->addListener(
this,
"EndAlgorithms", 0);
35 return StatusCode::SUCCESS;
43 [[maybe_unused]]
static const bool initThread = [&](){
52 *m_eventStartTime.get(ctx) = clock_t::now();
54 return StatusCode::SUCCESS;
60 if (inc.type() ==
"EndAlgorithms") {
61 ATH_MSG_DEBUG(
"Resetting event timeout for slot " << inc.context().slot());
63 *m_eventStartTime.get(inc.context()) = {};
70 if (m_thread.joinable() && !
m_stopped.exchange(
true)) {
77 return StatusCode::SUCCESS;
83 ATH_MSG_INFO(std::format(
"Setting per-event timeout of {}",
84 std::chrono::duration<double>(
m_timeout)));
87 const std::chrono::nanoseconds wakeup_interval =
92 while ( stop_signal.wait_for(wakeup_interval) == std::future_status::timeout ) {
95 const auto now = clock_t::now();
96 for (EventContext::ContextID_t slot = 0;
97 const auto& startTime : m_eventStartTime) {
99 if (startTime.time_since_epoch().count() > 0 && now > startTime +
m_timeout) {
115 const EventContext ctx(0, slot);
121 const std::string
msg = std::format(
"Event timeout ({}) in slot {} reached",
122 std::chrono::duration<double>(
m_timeout), slot);
131 if (schedulerSvc.retrieve().isSuccess()) {
132 schedulerSvc->dumpState();
143 if ( coreDumpSvc.retrieve().isSuccess() ) {
147 std::cerr <<
msg << std::endl;
#define ATH_CHECK
Evaluate an expression and check for errors.
Interface of a core dump service.
void setTimeout(Timeout &instance)
Set timeout.
static Timeout & instance()
Get reference to Timeout singleton.
virtual void setCoreDumpInfo(const std::string &name, const std::string &value) override
Set a name/value pair in the core dump record.
Algorithm to monitor event timeouts.
std::promise< void > m_stop_thread
Signal to stop watchdog thread.
void handleTimeout(EventContext::ContextID_t slot)
Handle timeout.
Gaudi::Property< bool > m_dumpState
virtual void handle(const Incident &inc) override
std::mutex m_handleMutex
Mutex for handleTimeout.
virtual StatusCode stop() override
Gaudi::Property< bool > m_abort
std::atomic< bool > m_stopped
Has watchdog thread already been stopped? (to avoid setting future twice)
Gaudi::Property< unsigned long long > m_timeoutProp
void timeoutThread()
Watchdog thread.
virtual StatusCode execute(const EventContext &ctx) const override
std::chrono::nanoseconds m_timeout
Timeout property as duration.
SG::SlotSpecificObj< clock_t::time_point > m_eventStartTime ATLAS_THREAD_SAFE
Start time of each event per slot.
Gaudi::Property< unsigned long long > m_checkInterval
virtual StatusCode initialize() override
Algorithm to monitor event timeouts.