ATLAS Offline Software
Loading...
Searching...
No Matches
CoreDumpSvcHandler Namespace Reference

Signal handler for CoreDumpSvc. More...

Typedefs

typedef std::map< int, struct sigaction > SigHandler_t

Functions

bool callOldHandler (true)
 forward calls to old handlers?
bool dumpCoreFile (false)
 dump core file on exit?
bool stackTrace (false)
 produce stack trace?
bool fastStackTrace (false)
 produce fast stack trace using CxxUtils/Seal
CoreDumpSvccoreDumpSvc (nullptr)
 pointer to CoreDumpSvc
std::ostream &log ATLAS_NOT_THREAD_SAFE ()
 convenience method for logging
void action ATLAS_NOT_THREAD_SAFE (int sig, siginfo_t *info, void *extra)
 Signal handler for the CoreDumpSvc.

Variables

SigHandler_t oldSigHandler
 old signal handlers

Detailed Description

Signal handler for CoreDumpSvc.

All information accessible from the signal handler is in this namespace. It carries a pointer to the CoreDumpSvc instance. Therefore no static methods are needed in the CoreDumpSvc to provide a function pointer.

Typedef Documentation

◆ SigHandler_t

typedef std::map<int, struct sigaction> CoreDumpSvcHandler::SigHandler_t

Definition at line 80 of file CoreDumpSvc.cxx.

Function Documentation

◆ ATLAS_NOT_THREAD_SAFE() [1/2]

std::ostream &log CoreDumpSvcHandler::ATLAS_NOT_THREAD_SAFE ( void )

convenience method for logging

Definition at line 90 of file CoreDumpSvc.cxx.

◆ ATLAS_NOT_THREAD_SAFE() [2/2]

void action CoreDumpSvcHandler::ATLAS_NOT_THREAD_SAFE ( int sig,
siginfo_t * info,
void * extra )

Signal handler for the CoreDumpSvc.

Definition at line 95 of file CoreDumpSvc.cxx.

96 {
97 // Careful: don't do anything here that might allocate memory.
98
99 // Protect against recursion.
100 // We originally used a thread_local here --- but accessing
101 // a thread_local can result in a call to malloc.
102
103 const int maxcalls = 64;
104 static std::atomic<int> ncalls (0);
105 if (++ncalls >= maxcalls) _exit (98);
106
107 static std::mutex tidlist_mutex;
108 static size_t ntids ATLAS_THREAD_SAFE = 0;
109 static pthread_t tids[maxcalls] ATLAS_THREAD_SAFE;
110 {
111 pthread_t self = pthread_self();
112 std::lock_guard<std::mutex> lock (tidlist_mutex);
113 for (size_t i = 0; i < ntids; i++) {
114 if (pthread_equal (self, tids[i])) return;
115 }
116 if (ntids == maxcalls) _exit (98);
117 tids[ntids++] = self;
118 }
119
120 // Count the number of threads trying to dump.
121 static std::atomic<int> inThreads = 0;
122 ++inThreads;
123
124 const unsigned int timeoutSeconds = static_cast<unsigned int>(round(coreDumpSvc->m_timeout * 1e-9));
125
126 if ( sig == SIGALRM) {
127 if (dumpCoreFile) {
128 log() << "Received SIGALRM. Aborting job..." << std::endl;
129 // Restore default abort handler that should create a core file
130 Athena::Signal::revert (SIGABRT);
131 std::abort();
132 }
133 else {
134 log() << "Received SIGALRM. Terminating job..." << std::endl;
135 _exit(97); // exit without raising any further signals
136 }
137 }
138
139 // Only allow one thread past at a time.
140 // Try to assume as little as possible about the state of the library.
141 // We don't want to hang forever here, but we also don't want
142 // to call any library functions that might use signals under the hood.
143 // So use nanosleep() to do the delay --- that's defined to be
144 // independent of signals.
145 static std::mutex threadMutex;
146 const timespec one_second { 1, 0 };
147 {
148 unsigned int waits = 0;
149 while (!threadMutex.try_lock()) {
150 nanosleep (&one_second, nullptr);
151 if (++waits > timeoutSeconds) _exit (97);
152 }
153 }
154
155 // setup timeout
156 if ( timeoutSeconds > 0 && (sig == SIGSEGV || sig == SIGBUS || sig == SIGABRT) ) {
157 // This will trigger SIGALRM, which we then handle ourselves above
158 alarm(timeoutSeconds);
159 }
160
161 // Do fast stack trace before anything that might touch the heap.
162 // For extra paranoia, avoid iostreams/stdio and use write() directly.
163 if (fastStackTrace) {
164 write (1, horizLine, strlen(horizLine));
165 const char* msg = "Producing (fast) stack trace...\n";
166 write (1, msg, strlen (msg));
167 write (1, horizLine, strlen(horizLine));
168 Athena::Signal::fatalDump (sig, info, extra,
173 write (1, "\n", 1);
174 }
175
176 std::cout.flush();
177 std::cerr.flush();
178
179 if (coreDumpSvc) {
180 coreDumpSvc->setSigInfo(info);
181 coreDumpSvc->print();
182 }
183
184 if (gSystem && stackTrace) {
185 log() << horizLine << "Producing stack trace (can be slow, check gdb process)...\n"
186 << horizLine << std::flush;
187 gSystem->StackTrace();
188 log() << std::endl;
189 }
190
191 if (callOldHandler) {
192 // Call previous signal handler
193 // Need to distinguish between the two different types
194 const struct sigaction& oact = oldSigHandler[sig];
195 log() << horizLine << "Invoking previous signal handler (can be slow, check gdb process)...\n"
196 << horizLine << std::flush;
197 if ( oact.sa_flags & SA_SIGINFO ) {
198 oact.sa_sigaction(sig, info, extra);
199 }
200 else if (oact.sa_handler != SIG_DFL && oact.sa_handler != SIG_IGN ) {
201 oact.sa_handler(sig);
202 }
203 else {
204 log() << "Could not invoke previous signal handler" << std::endl;
205 }
206 }
207
208 // This thread is done dumping.
209 threadMutex.unlock();
210 --inThreads;
211
212 if (coreDumpSvc && (sig == SIGSEGV || sig == SIGBUS || sig == SIGABRT) ) {
213 // Don't terminate the program while there are other threads
214 // trying to dump (but don't wait forever either).
215 unsigned int waits = 0;
216 while (inThreads > 0 && waits < timeoutSeconds) {
217 nanosleep (&one_second, nullptr);
218 }
219
220 if (dumpCoreFile) {
221 log() << "Aborting job... " << std::endl;
222 // Restore default abort handler that should create a core file
223 Athena::Signal::revert (SIGABRT);
224 std::abort();
225 }
226
227 // Exit now on a fatal signal; otherwise, we can hang.
228 _exit (99);
229 }
230 }
#define ATLAS_THREAD_SAFE
static IOFD stacktraceFd(IOFD fd=IOFD_INVALID)
Set and return the file descriptor for stack trace output.
static const int FATAL_DUMP_CONTEXT
Option to make fataldump(int, siginfo_t *, void *) (invoked by fatal(int, siginfo_t *,...
Definition SealSignal.h:138
static void revert(int sig)
Revert the signal number sig back to its default behaviour.
static const int FATAL_DUMP_SIG
Option to make fataldump(int, siginfo_t *, void *) (invoked by fatal(int, siginfo_t *,...
Definition SealSignal.h:126
static const int FATAL_DUMP_STACK
Option to make fataldump(int, siginfo_t *, void *) (invoked by fatal(int, siginfo_t *,...
Definition SealSignal.h:130
CoreDumpSvc * coreDumpSvc(nullptr)
pointer to CoreDumpSvc
SigHandler_t oldSigHandler
old signal handlers
bool dumpCoreFile(false)
dump core file on exit?
bool callOldHandler(true)
forward calls to old handlers?
bool stackTrace(false)
produce stack trace?
bool fastStackTrace(false)
produce fast stack trace using CxxUtils/Seal
float round(const float toRound, const unsigned int decimals)
Definition Mdt.cxx:27
MsgStream & msg
Definition testRead.cxx:32

◆ callOldHandler()

bool CoreDumpSvcHandler::callOldHandler ( true )

forward calls to old handlers?

◆ coreDumpSvc()

CoreDumpSvc * CoreDumpSvcHandler::coreDumpSvc ( nullptr )

pointer to CoreDumpSvc

◆ dumpCoreFile()

bool CoreDumpSvcHandler::dumpCoreFile ( false )

dump core file on exit?

◆ fastStackTrace()

bool CoreDumpSvcHandler::fastStackTrace ( false )

produce fast stack trace using CxxUtils/Seal

◆ stackTrace()

bool CoreDumpSvcHandler::stackTrace ( false )

produce stack trace?

Variable Documentation

◆ oldSigHandler

SigHandler_t CoreDumpSvcHandler::oldSigHandler

old signal handlers

Definition at line 82 of file CoreDumpSvc.cxx.