19 #include <sys/types.h>
21 #include <sys/sysinfo.h>
23 #include <mach/task.h>
24 #include <mach/mach_init.h>
35 #include "Gaudi/Property.h"
36 #include "GaudiKernel/IAlgorithm.h"
37 #include "GaudiKernel/IIncidentSvc.h"
38 #include "GaudiKernel/IAlgContextSvc.h"
39 #include "GaudiKernel/IAlgExecStateSvc.h"
40 #include "GaudiKernel/ServiceHandle.h"
41 #include "GaudiKernel/System.h"
42 #include "GaudiKernel/ConcurrencyFlags.h"
43 #include "GaudiKernel/EventContext.h"
56 const char*
const horizLine =
"-------------------------------------------------------------------------------------\n";
59 if (
sig == SIGINT ) {
61 std::cout << std::endl;
62 std::cerr <<
"Athena CRITICAL stopped by user interrupt\n";
87 std::ostream* ostr(&std::cout);
102 const int maxcalls = 64;
103 static std::atomic<int> ncalls (0);
104 if (++ncalls >= maxcalls) _exit (98);
110 pthread_t
self = pthread_self();
111 std::lock_guard<std::mutex> lock (tidlist_mutex);
112 for (
size_t i = 0;
i < ntids;
i++) {
113 if (pthread_equal (
self, tids[
i]))
return;
115 if (ntids == maxcalls) _exit (98);
116 tids[ntids++] =
self;
120 static std::atomic<int> inThreads = 0;
125 if (
sig == SIGALRM) {
127 log() <<
"Received SIGALRM. Aborting job..." << std::endl;
133 log() <<
"Received SIGALRM. Terminating job..." << std::endl;
145 const timespec one_second { 1, 0 };
147 unsigned int waits = 0;
148 while (!threadMutex.try_lock()) {
149 nanosleep (&one_second,
nullptr);
150 if (++waits > timeoutSeconds) _exit (97);
155 if ( timeoutSeconds > 0 && (
sig == SIGSEGV ||
sig == SIGBUS ||
sig == SIGABRT) ) {
157 alarm(timeoutSeconds);
163 write (1, horizLine, strlen(horizLine));
164 const char*
msg =
"Producing (fast) stack trace...\n";
166 write (1, horizLine, strlen(horizLine));
184 log() << horizLine <<
"Producing stack trace (can be slow, check gdb process)...\n"
186 gSystem->StackTrace();
194 log() << horizLine <<
"Invoking previous signal handler (can be slow, check gdb process)...\n"
196 if ( oact.sa_flags & SA_SIGINFO ) {
199 else if (oact.sa_handler != SIG_DFL && oact.sa_handler != SIG_IGN ) {
200 oact.sa_handler(
sig);
203 log() <<
"Could not invoke previous signal handler" << std::endl;
208 threadMutex.unlock();
214 unsigned int waits = 0;
215 while (inThreads > 0 && waits < timeoutSeconds) {
216 nanosleep (&one_second,
nullptr);
220 log() <<
"Aborting job... " << std::endl;
237 base_class(
name, pSvcLocator )
243 m_dumpCoreFile.declareUpdateHandler(&CoreDumpSvc::propertyHandler,
this);
244 m_stackTrace.declareUpdateHandler(&CoreDumpSvc::propertyHandler,
this);
248 m_killOnSigInt.declareUpdateHandler(&CoreDumpSvc::propertyHandler,
this);
259 void CoreDumpSvc::propertyHandler(Gaudi::Details::PropertyBase&
p)
267 const std::string
val =
p.toString();
268 if (
val==
"stdout" ) {
269 CoreDumpSvcHandler::ostr = &std::cout;
271 else if (
val==
"stderr" ) {
272 CoreDumpSvcHandler::ostr = &std::cerr;
284 ATH_MSG_INFO(
"could not convert [" <<
p.toString() <<
"] to integer");
316 if ( installSignalHandler().isFailure() ) {
318 return StatusCode::FAILURE;
323 if ( !incSvc.retrieve().isSuccess() ) {
327 incSvc->addListener(
this, IncidentType::BeginRun);
328 incSvc->addListener(
this, IncidentType::BeginEvent);
329 incSvc->addListener(
this, IncidentType::EndRun);
330 incSvc->addListener(
this, IncidentType::EndEvent);
331 incSvc->addListener(
this,
"StoreCleared");
334 return StatusCode::SUCCESS;
339 auto numSlots = std::max<size_t>(1, Gaudi::Concurrency::ConcurrencyFlags::numConcurrentEvents());
342 return StatusCode::SUCCESS;
349 if ( uninstallSignalHandler().isFailure() ) {
351 return StatusCode::FAILURE;
354 return StatusCode::SUCCESS;
371 auto slot = ctx.valid() ? ctx.slot() : 0;
382 << m_coreDumpStream.value()
383 << (m_dumpCoreFile ?
". Will try to produce a core dump file on exit." :
".")
394 std::ostringstream
os;
396 const time_t
now =
time(
nullptr);
398 os <<
"-------------------------------------------------------------------------------------" <<
"\n";
399 os <<
"Core dump from " <<
name() <<
" on " << System::hostName()
400 <<
" at " << ctime_r(&
now, buf) ;
407 os <<
"Caught signal " << signo
408 <<
"(" << strsignal(signo) <<
"). Details: "
423 << std::hex <<
m_siginfo->si_ptr <<
")" << std::dec <<
"\n";
429 const long pagesz = sysconf(_SC_PAGESIZE);
430 os <<
" vmem = " <<
s.vm_pages*pagesz/1024./1024. <<
" MB\n"
431 <<
" rss = " <<
s.rss_pages*pagesz/1024./1024. <<
" MB\n";
439 if ( 0 == sysinfo(&
sys) ) {
441 const float mem_units =
sys.mem_unit/(1024.*1024.);
442 os <<
" total-ram = " <<
sys.totalram * mem_units <<
" MB\n"
443 <<
" free-ram = " <<
sys.freeram * mem_units <<
" MB\n"
444 <<
" buffer-ram= " <<
sys.bufferram* mem_units <<
" MB\n"
445 <<
" total-swap= " <<
sys.totalswap* mem_units <<
" MB\n"
446 <<
" free-swap = " <<
sys.freeswap * mem_units <<
" MB\n";
452 if (signo == SIGILL || signo == SIGFPE || signo == SIGSEGV || signo == SIGBUS)
453 os <<
" addr = " << std::hex <<
m_siginfo->si_addr << std::dec <<
"\n";
461 SmartIF<IAlgExecStateSvc> algExecStateSvc;
462 SmartIF<IAlgContextSvc> algContextSvc;
465 if (Gaudi::Concurrency::ConcurrencyFlags::numConcurrentEvents() > 0) {
466 algExecStateSvc = service(
"AlgExecStateSvc",
false);
469 algContextSvc = service(
"AlgContextSvc",
false);
476 std::string currentAlg;
477 if (algExecStateSvc) {
478 ATH_MSG_DEBUG(
"Using AlgExecStateSvc to determine current algorithm(s)");
481 auto states = algExecStateSvc->algExecStates(EventContext(0,
t));
482 for (
const auto& kv :
states) {
483 if (kv.second.state()==AlgExecState::State::Executing)
484 currentAlg += (kv.first +
" ");
487 catch (
const GaudiException&) {
488 ATH_MSG_INFO(
"No information from AlgExecStateSvc because no algorithm was executed yet.");
491 else if (algContextSvc) {
492 ATH_MSG_DEBUG(
"Using AlgContextSvc to determine current algorithm");
493 IAlgorithm*
alg = algContextSvc->currentAlg();
494 if (
alg) currentAlg =
alg->name();
497 ATH_MSG_WARNING(
"AlgExecStateSvc or AlgContextSvc not available. Cannot determine current algorithm.");
500 if (currentAlg.empty()) currentAlg =
"<NONE>";
501 os <<
"Slot " << std::setw(3) <<
t <<
" : Current algorithm = " << currentAlg << std::endl;
505 if (!
sys.LastInc.empty()) {
506 os <<
" : Last Incident = " <<
sys.LastInc << std::endl
507 <<
" : Event ID = " <<
sys.EvId << std::endl;
513 for (
auto &
s : usr) {
514 os <<
" : (usr) " <<
s.first <<
" = " <<
s.second << std::endl;
520 os <<
"Algorithm stack: ";
521 if ( algContextSvc->algorithms().empty() )
os <<
"<EMPTY>" <<
"\n";
524 for (
auto alg : algContextSvc->algorithms()) {
525 if (
alg)
os <<
" " <<
alg->name() <<
"\n";
531 os <<
"| AtlasBaseDir : " << std::setw(66) <<
getenv(
"AtlasBaseDir") <<
" |\n";
532 os <<
"| AtlasVersion : " << std::setw(66) <<
getenv(
"AtlasVersion") <<
" |\n";
533 os <<
"| BINARY_TAG : " << std::setw(66) <<
getenv(
"BINARY_TAG") <<
" |\n";
535 os <<
" Note: to see line numbers in below stacktrace you might consider running following :\n";
536 os <<
" atlasAddress2Line --file <logfile>\n";
538 SmartIF<IAthenaSummarySvc> iass{service(
"AthenaSummarySvc",
false)};
540 iass->addSummary(
"CoreDumpSvc",
os.str());
542 iass->createSummary().ignore();
555 auto slot = incident.context().valid() ? incident.context().slot() : 0;
558 currRec.LastInc = incident.source() +
":" + incident.type();
560 std::ostringstream oss;
561 oss << incident.context().eventID();
562 currRec.EvId = oss.str();
564 if (incident.type()==IncidentType::BeginEvent) {
568 }
else if (incident.type() ==
"StoreCleared") {
570 auto newstr = currRec.EvId;
573 newstr[0] = newstr[0];
574 currRec.EvId = newstr;
589 std::ostringstream oss;
591 for (
auto sig : m_signals) {
593 if (sig<1 || sig>SIGRTMAX) {
598 oss <<
sig <<
"(" << strsignal(
sig) <<
") ";
604 struct sigaction sigact;
605 memset (&sigact, 0,
sizeof(sigact));
608 sigact.sa_flags = SA_SIGINFO + SA_ONSTACK;
612 <<
": " << strerror(errno));
613 return StatusCode::FAILURE;
618 return StatusCode::SUCCESS;
631 int ret = sigaction(kv.first, &(kv.second),
nullptr);
633 sc = StatusCode::FAILURE;
634 ATH_MSG_WARNING(
"Error on uninstalling handler for signal " << kv.first
635 <<
": " << strerror(errno));
649 std::vector<uint8_t>& stack =
s_stack;
651 stack.resize (
std::max (SIGSTKSZ, MINSIGSTKSZ) + 2*1024*1024);
653 ss.ss_sp = stack.data();
655 ss.ss_size = stack.size();
656 int ret = sigaltstack (&
ss,
nullptr);