ATLAS Offline Software
Loading...
Searching...
No Matches
FPEAuditor.cxx
Go to the documentation of this file.
1
2
3/*
4 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
5*/
6
7// FPEAuditor.cxx
8// Implementation file for class FPEAuditor
9// Author: S.Binet<binet@cern.ch>
11
12#include <stdexcept>
13#include <mutex>
14#include <atomic>
15
16// FrameWork includes
17#include "GaudiKernel/INamedInterface.h"
18#include "GaudiKernel/EventContext.h"
19
20#include "FPEAuditor.h"
21
23
24// C includes
25#include <fenv.h>
26
27
28namespace FPEAudit {
29 const int MAXARRAY=100;
35
37
38 std::atomic<bool> s_handlerInstalled = false;
39 std::atomic<bool> s_handlerDisabled = false;
40 struct sigaction s_oldactHandler ATLAS_THREAD_SAFE;
41
42 std::mutex s_mutex;
43 typedef std::lock_guard<std::mutex> lock_t;
44}
45
46
47#if defined(__linux__) && ( defined(__i386__) || defined(__x86_64__) )
48# include "FPEAudit_linux.icc"
49#else
50# include "FPEAudit_dummy.icc"
51#endif
52
53
55// Public methods:
57
59
60// Constructors
62FPEAuditor::FPEAuditor( const std::string& name,
63 ISvcLocator* pSvcLocator ) :
64 AthCommonMsg<Auditor> ( name, pSvcLocator ),
66 m_env(),
68{
69 declareProperty("NStacktracesOnFPE", m_NstacktracesOnFPE,
70 "Flag to configure, how many stack traces in case of FPE are printed. "
71 "Default: Zero, just report that FPE has happened. "
72 "After collecting the stacktrace, the code has to modify the mcontext_t "
73 "struct to ignore FPEs for the rest of the processing of the algorithm/service "
74 "This part is highly non-portable!" );
75
76 declareProperty("Nstacklines", m_Nstacklines,"Limit how deep we dive into the stack");
77}
78
79// Destructor
81#include <fstream>
82#include <iomanip>
84{
85 //m_msg << MSG::DEBUG << "Calling destructor" << endmsg;
86}
87
89{
90 for ( unsigned int i(0); i<FPEAUDITOR_ARRAYSIZE; ++i )
91 m_CountFPEs[i]=0;
92 //ATH_MSG_INFO("==> initialize");
93 // add a fake node here because we may come alive while the AuditorSvc is
94 // still initializing (so one edge is left orphaned)
96
97 return StatusCode::SUCCESS;
98}
99
101{
102 ATH_MSG_INFO("FPE summary for this job");
103 ATH_MSG_INFO(" FPE OVERFLOWs : " << m_CountFPEs[FPEAUDITOR_OVERFLOW] );
104 ATH_MSG_INFO(" FPE INVALIDs : " << m_CountFPEs[FPEAUDITOR_INVALID]);
105 ATH_MSG_INFO(" FPE DIVBYZEROs : " << m_CountFPEs[FPEAUDITOR_DIVBYZERO]);
106
107 {
111 }
112
113 return StatusCode::SUCCESS;
114}
115
117{
118 // Save the current FP environment.
119 fegetenv (&m_env);
120
121#ifdef __aarch64__
122 ATH_MSG_WARNING("Collecting stack traces for FPES is not supported on aarch64");
126#else
127 struct sigaction act;
128 memset(&act, 0, sizeof act);
129 act.sa_sigaction = FPEAudit::fpe_sig_action;
130 act.sa_flags = SA_SIGINFO;
131 if ( sigaction(SIGFPE, &act, &FPEAudit::s_oldactHandler) != 0 )
132 {
133 ATH_MSG_WARNING ("Printing stacktraces on FPE requested, but unable to install signal handler ! Switched off !");
135 }
137 {
138 ATH_MSG_INFO ("Installed Signalhandler !");
139 FPEAudit::unmask_fpe();
140 }
141 FPEAudit::s_tlsdata.s_array_O[0]=NULL;
142 FPEAudit::s_tlsdata.s_array_I[0]=NULL;
143 FPEAudit::s_tlsdata.s_array_D[0]=NULL;
144
146#endif
147}
148
150{
151 ATH_MSG_INFO("uninstalling SignalHandler");
153
154 feclearexcept(FE_ALL_EXCEPT);
155 fesetenv (&m_env);
156 FPEAudit::mask_fpe();
157
158 // feenableexcept (0);
159 // fedisableexcept (FE_ALL_EXCEPT);
160}
161
162void FPEAuditor::before(const std::string& event, const std::string& /*name*/,
163 const EventContext&)
164{
165 add_fpe_node();
166
167 if ( event==IAuditor::Execute ) {
173 }
174 }
175 }
176}
177
178void FPEAuditor::after(const std::string& event, const std::string& name,
179 const EventContext& ctx, const StatusCode&)
180{
181 report_fpe(event, name, ctx);
182 pop_fpe_node();
183
184 if ( event==IAuditor::Initialize ) {
186 // CoreDumpSvc can also install a FPE handler, grrr.
187 if (name == "CoreDumpSvc") FPEAudit::s_handlerInstalled = false;
191 }
192 }
193}
194
197void
198FPEAuditor::report_fpe(const std::string& step,
199 const std::string& caller,
200 const EventContext& ctx)
201{
202 // store current list of FPE flags which were raised before
203 int raised = fetestexcept(FE_OVERFLOW | FE_INVALID | FE_DIVBYZERO);
204 if (raised) {
205 std::stringstream evStr;
206 if (ctx.valid()) {
207 evStr << " on event " << ctx.eventID().event_number();
208 }
209
210 if (raised & FE_OVERFLOW) {
211 ATH_MSG_WARNING("FPE OVERFLOW in [" << step << "] of [" << caller << "]" << evStr.str() <<
212 " " << m_NstacktracesOnFPE << " " << FPEAudit::s_tlsdata.s_array_O[0]
213 );
215 if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_O[0] != NULL )
216 {
217 for (unsigned int j = 0; j < m_Nstacklines; j++)
218 {
220 if (FPEAudit::s_tlsdata.s_array_O[j]==NULL) break;
221 this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
222 FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_O[j],this->msg());
223 FPEAudit::s_tlsdata.s_array_O[j]=NULL;
224 this->msg(MSG::INFO) << endmsg;
225 }
226 }
227 }
228 if (raised & FE_INVALID) {
229 ATH_MSG_WARNING("FPE INVALID in [" << step << "] of [" << caller << "]" << evStr.str()
230 << " " << m_NstacktracesOnFPE << " " << FPEAudit::s_tlsdata.s_array_I[0]
231 );
233 }
234 if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_I[0] != NULL )
235 {
236 for (unsigned int j = 0; j < m_Nstacklines; j++)
237 {
239 if (FPEAudit::s_tlsdata.s_array_I[j]==NULL) break;
240 this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
241 FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_I[j],this->msg());
242 FPEAudit::s_tlsdata.s_array_I[j]=NULL;
243 this->msg(MSG::INFO) << endmsg;
244 }
245 }
246 if (raised & FE_DIVBYZERO) {
247 ATH_MSG_WARNING("FPE DIVBYZERO in [" << step << "] of [" << caller << "]" << evStr.str()
248 << " " << m_NstacktracesOnFPE << " " << FPEAudit::s_tlsdata.s_array_D[0]
249 );
251 if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_D[0] != NULL )
252 {
254 for (unsigned int j = 0; j < m_Nstacklines; j++)
255 {
256 if (FPEAudit::s_tlsdata.s_array_D[j]==NULL) break;
257 this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
258 FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_D[j],this->msg());
259 FPEAudit::s_tlsdata.s_array_D[j]=NULL;
260 this->msg(MSG::INFO) << endmsg;
261 }
262 }
263 }
264
265
267 if ( --m_nexceptions == 0
270 {
271 fprintf(stderr, "too many SIGFPE detected, will be uninstalling signal handler\n");
273 }
274 }
275}
276
277void
279{
280 // get current list of FPE flags so far
281 int raised = fetestexcept(FE_OVERFLOW | FE_INVALID | FE_DIVBYZERO);
282 s_fpe_stack.push_back(std::make_pair(raised, 0));
283
284 // clear FPE status word
285 feclearexcept(FE_ALL_EXCEPT);
286
288 // Make sure exceptions have been masked off if the handler has been
289 // disabled. Yes, this is already done in the signal handler.
290 // But when TBB starts a new task, it resets the FPU control words
291 // to values that were copied when the thread pool was initialized.
292 // FIXME: Do it properly with TBB? Not sure that the current
293 // interfaces allow it.
294 FPEAudit::mask_fpe();
295 }
296}
297
298void
300{
301 if (s_fpe_stack.empty()) {
302 ATH_MSG_ERROR("inconsistent fpe-stack !");
303 throw std::runtime_error("inconsistent fpe-stack");
304 }
305
306 // restore fpe stack info
307 int raised = s_fpe_stack.back().first;
308 s_fpe_stack.pop_back();
309
310 // consolidate
311 if (!s_fpe_stack.empty()) {
312 s_fpe_stack.back().second |= raised;
313 }
315 FPEAudit::unmask_fpe();
316 }
317}
#define endmsg
#define ATH_MSG_ERROR(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_WARNING(x)
Define macros for attributes used to control the static checker.
std::atomic< int > m_nexceptions
Definition FPEAuditor.h:98
unsigned int m_NstacktracesOnFPE
Definition FPEAuditor.h:85
@ FPEAUDITOR_INVALID
Definition FPEAuditor.h:81
@ FPEAUDITOR_ARRAYSIZE
Definition FPEAuditor.h:81
@ FPEAUDITOR_DIVBYZERO
Definition FPEAuditor.h:81
@ FPEAUDITOR_OVERFLOW
Definition FPEAuditor.h:81
std::atomic< unsigned int > m_CountFPEs[FPEAUDITOR_ARRAYSIZE]
Definition FPEAuditor.h:83
virtual StatusCode finalize() override
virtual void after(const std::string &event, const std::string &name, const EventContext &ctx, const StatusCode &sc) override
void InstallHandler()
std::list< FpeNode_t > FpeStack_t
Definition FPEAuditor.h:76
FPEAuditor(const std::string &name, ISvcLocator *pSvcLocator)
Constructor.
static thread_local FpeStack_t s_fpe_stack
a stack of FPE exceptions which have been raised
Definition FPEAuditor.h:79
virtual void before(const std::string &event, const std::string &name, const EventContext &ctx) override
Auditor hooks.
virtual ~FPEAuditor()
Destructor.
void add_fpe_node()
add an fpe node
fenv_t m_env
The FP environment before we initialize.
Definition FPEAuditor.h:96
virtual StatusCode initialize() override
Gaudi hooks.
void report_fpe(const std::string &step, const std::string &caller, const EventContext &ctx)
report fpes which happened during step 'step' on behalf of 'caller'
void UninstallHandler()
unsigned int m_Nstacklines
Definition FPEAuditor.h:87
void pop_fpe_node()
pop an fpe node
thread_local FPEAuditTLSData s_tlsdata
std::lock_guard< std::mutex > lock_t
std::mutex s_mutex
struct sigaction s_oldactHandler ATLAS_THREAD_SAFE
std::atomic< bool > s_handlerDisabled
std::atomic< bool > s_handlerInstalled
const int MAXARRAY
void * s_array_D[MAXARRAY]
void * s_array_O[MAXARRAY]
void * s_array_I[MAXARRAY]