ATLAS Offline Software
FPEAuditor.cxx
Go to the documentation of this file.
1 
3 /*
4  Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
5 */
6 
7 // FPEAuditor.cxx
8 // Implementation file for class FPEAuditor
9 // Author: S.Binet<binet@cern.ch>
11 
12 #include <stdexcept>
13 #include <mutex>
14 #include <atomic>
15 
16 // FrameWork includes
17 #include "GaudiKernel/INamedInterface.h"
18 #include "GaudiKernel/EventContext.h"
19 
20 #include "FPEAuditor.h"
21 
23 
24 // C includes
25 #include <fenv.h>
26 
27 
28 namespace FPEAudit {
29  const int MAXARRAY=100;
30  struct FPEAuditTLSData {
34  };
35 
36  thread_local FPEAuditTLSData s_tlsdata;
37 
38  std::atomic<bool> s_handlerInstalled = false;
39  std::atomic<bool> s_handlerDisabled = false;
40  struct sigaction s_oldactHandler ATLAS_THREAD_SAFE;
41 
43  typedef std::lock_guard<std::mutex> lock_t;
44 }
45 
46 
47 #if defined(__linux__) && ( defined(__i386__) || defined(__x86_64__) )
48 # include "FPEAudit_linux.icc"
49 #else
50 # include "FPEAudit_dummy.icc"
51 #endif
52 
53 
55 // Public methods:
57 
59 
60 // Constructors
62 FPEAuditor::FPEAuditor( const std::string& name,
63  ISvcLocator* pSvcLocator ) :
64  AthCommonMsg<Auditor> ( name, pSvcLocator ),
65  m_CountFPEs(),
66  m_env(),
67  m_nexceptions(0)
68 {
69  declareProperty("NStacktracesOnFPE", m_NstacktracesOnFPE,
70  "Flag to configure, how many stack traces in case of FPE are printed. "
71  "Default: Zero, just report that FPE has happened. "
72  "After collecting the stacktrace, the code has to modify the mcontext_t "
73  "struct to ignore FPEs for the rest of the processing of the algorithm/service "
74  "This part is highly non-portable!" );
75 
76  declareProperty("Nstacklines", m_Nstacklines,"Limit how deep we dive into the stack");
77 }
78 
79 // Destructor
81 #include <fstream>
82 #include <iomanip>
84 {
85  //m_msg << MSG::DEBUG << "Calling destructor" << endmsg;
86 }
87 
89 {
90  for ( unsigned int i(0); i<FPEAUDITOR_ARRAYSIZE; ++i )
91  m_CountFPEs[i]=0;
92  //ATH_MSG_INFO("==> initialize");
93  // add a fake node here because we may come alive while the AuditorSvc is
94  // still initializing (so one edge is left orphaned)
95  add_fpe_node();
96 
97  return StatusCode::SUCCESS;
98 }
99 
101 {
102  ATH_MSG_INFO("FPE summary for this job");
103  ATH_MSG_INFO(" FPE OVERFLOWs : " << m_CountFPEs[FPEAUDITOR_OVERFLOW] );
104  ATH_MSG_INFO(" FPE INVALIDs : " << m_CountFPEs[FPEAUDITOR_INVALID]);
105  ATH_MSG_INFO(" FPE DIVBYZEROs : " << m_CountFPEs[FPEAUDITOR_DIVBYZERO]);
106 
107  {
111  }
112 
113  return StatusCode::SUCCESS;
114 }
115 
117 {
118  // Save the current FP environment.
119  fegetenv (&m_env);
120 
121 #ifdef __aarch64__
122  ATH_MSG_WARNING("Collecting stack traces for FPES is not supported on aarch64");
126 #else
127  struct sigaction act;
128  memset(&act, 0, sizeof act);
129  act.sa_sigaction = FPEAudit::fpe_sig_action;
130  act.sa_flags = SA_SIGINFO;
131  if ( sigaction(SIGFPE, &act, &FPEAudit::s_oldactHandler) != 0 )
132  {
133  ATH_MSG_WARNING ("Printing stacktraces on FPE requested, but unable to install signal handler ! Switched off !");
135  }
136  if ( m_NstacktracesOnFPE )
137  {
138  ATH_MSG_INFO ("Installed Signalhandler !");
139  FPEAudit::unmask_fpe();
140  }
144 
146 #endif
147 }
148 
150 {
151  ATH_MSG_INFO("uninstalling SignalHandler");
153 
154  feclearexcept(FE_ALL_EXCEPT);
155  fesetenv (&m_env);
156  FPEAudit::mask_fpe();
157 
158  // feenableexcept (0);
159  // fedisableexcept (FE_ALL_EXCEPT);
160 }
161 
162 void FPEAuditor::before(const std::string& event, const std::string& /*name*/,
163  const EventContext&)
164 {
165  add_fpe_node();
166 
167  if ( event==IAuditor::Execute ) {
171  InstallHandler();
173  }
174  }
175  }
176 }
177 
178 void FPEAuditor::after(const std::string& event, const std::string& name,
179  const EventContext& ctx, const StatusCode&)
180 {
181  report_fpe(event, name, ctx);
182  pop_fpe_node();
183 
184  if ( event==IAuditor::Initialize ) {
186  // CoreDumpSvc can also install a FPE handler, grrr.
187  if (name == "CoreDumpSvc") FPEAudit::s_handlerInstalled = false;
189  InstallHandler();
191  }
192  }
193 }
194 
197 void
198 FPEAuditor::report_fpe(const std::string& step,
199  const std::string& caller,
200  const EventContext& ctx)
201 {
202  // store current list of FPE flags which were raised before
203  int raised = fetestexcept(FE_OVERFLOW | FE_INVALID | FE_DIVBYZERO);
204  if (raised) {
205  std::stringstream evStr;
206  if (ctx.valid()) {
207  evStr << " on event " << ctx.eventID().event_number();
208  }
209 
210  if (raised & FE_OVERFLOW) {
211  ATH_MSG_WARNING("FPE OVERFLOW in [" << step << "] of [" << caller << "]" << evStr.str() <<
213  );
215  if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_O[0] != NULL )
216  {
217  for (unsigned int j = 0; j < m_Nstacklines; j++)
218  {
220  if (FPEAudit::s_tlsdata.s_array_O[j]==NULL) break;
221  this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
222  FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_O[j],this->msg());
224  this->msg(MSG::INFO) << endmsg;
225  }
226  }
227  }
228  if (raised & FE_INVALID) {
229  ATH_MSG_WARNING("FPE INVALID in [" << step << "] of [" << caller << "]" << evStr.str()
230  << " " << m_NstacktracesOnFPE << " " << FPEAudit::s_tlsdata.s_array_I[0]
231  );
233  }
234  if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_I[0] != NULL )
235  {
236  for (unsigned int j = 0; j < m_Nstacklines; j++)
237  {
239  if (FPEAudit::s_tlsdata.s_array_I[j]==NULL) break;
240  this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
241  FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_I[j],this->msg());
243  this->msg(MSG::INFO) << endmsg;
244  }
245  }
246  if (raised & FE_DIVBYZERO) {
247  ATH_MSG_WARNING("FPE DIVBYZERO in [" << step << "] of [" << caller << "]" << evStr.str()
248  << " " << m_NstacktracesOnFPE << " " << FPEAudit::s_tlsdata.s_array_D[0]
249  );
251  if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_D[0] != NULL )
252  {
254  for (unsigned int j = 0; j < m_Nstacklines; j++)
255  {
256  if (FPEAudit::s_tlsdata.s_array_D[j]==NULL) break;
257  this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
258  FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_D[j],this->msg());
260  this->msg(MSG::INFO) << endmsg;
261  }
262  }
263  }
264 
265 
267  if ( --m_nexceptions == 0
270  {
271  fprintf(stderr, "too many SIGFPE detected, will be uninstalling signal handler\n");
273  }
274  }
275 }
276 
277 void
279 {
280  // get current list of FPE flags so far
281  int raised = fetestexcept(FE_OVERFLOW | FE_INVALID | FE_DIVBYZERO);
282  s_fpe_stack.push_back(std::make_pair(raised, 0));
283 
284  // clear FPE status word
285  feclearexcept(FE_ALL_EXCEPT);
286 
288  // Make sure exceptions have been masked off if the handler has been
289  // disabled. Yes, this is already done in the signal handler.
290  // But when TBB starts a new task, it resets the FPU control words
291  // to values that were copied when the thread pool was initialized.
292  // FIXME: Do it properly with TBB? Not sure that the current
293  // interfaces allow it.
294  FPEAudit::mask_fpe();
295  }
296 }
297 
298 void
300 {
301  if (s_fpe_stack.empty()) {
302  ATH_MSG_ERROR("inconsistent fpe-stack !");
303  throw std::runtime_error("inconsistent fpe-stack");
304  }
305 
306  // restore fpe stack info
307  int raised = s_fpe_stack.back().first;
308  s_fpe_stack.pop_back();
309 
310  // consolidate
311  if (!s_fpe_stack.empty()) {
312  s_fpe_stack.back().second |= raised;
313  }
315  FPEAudit::unmask_fpe();
316  }
317 }
python.CreateTierZeroArgdict.resolve
def resolve(tag)
utility functions
Definition: CreateTierZeroArgdict.py:73
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
FPEAuditor::FPEAUDITOR_INVALID
@ FPEAUDITOR_INVALID
Definition: FPEAuditor.h:81
BeamSpot::mutex
std::mutex mutex
Definition: InDetBeamSpotVertex.cxx:18
FPEAuditor::add_fpe_node
void add_fpe_node()
add an fpe node
Definition: FPEAuditor.cxx:278
FPEAudit
Definition: FPEAuditor.cxx:28
FPEAuditor::m_nexceptions
std::atomic< int > m_nexceptions
Definition: FPEAuditor.h:98
get_generator_info.stderr
stderr
Definition: get_generator_info.py:40
python.RatesEmulationExample.lock
lock
Definition: RatesEmulationExample.py:148
FPEAuditor::after
virtual void after(const std::string &event, const std::string &name, const EventContext &ctx, const StatusCode &sc) override
Definition: FPEAuditor.cxx:178
FPEAudit::lock_t
std::lock_guard< std::mutex > lock_t
Definition: FPEAuditor.cxx:43
FPEAuditor::~FPEAuditor
virtual ~FPEAuditor()
Destructor.
Definition: FPEAuditor.cxx:83
FPEAudit::s_handlerDisabled
std::atomic< bool > s_handlerDisabled
Definition: FPEAuditor.cxx:39
LArPulseShapeRunConfig.Execute
Execute
Definition: LArPulseShapeRunConfig.py:62
FPEAuditor.h
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
event
POOL::TEvent event(POOL::TEvent::kClassAccess)
FPEAudit::FPEAuditTLSData
Definition: FPEAuditor.cxx:30
lumiFormat.i
int i
Definition: lumiFormat.py:85
FPEAuditor::FPEAUDITOR_DIVBYZERO
@ FPEAUDITOR_DIVBYZERO
Definition: FPEAuditor.h:81
FPEAuditor::pop_fpe_node
void pop_fpe_node()
pop an fpe node
Definition: FPEAuditor.cxx:299
endmsg
#define endmsg
Definition: AnalysisConfig_Ntuple.cxx:63
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
FPEAuditor::report_fpe
void report_fpe(const std::string &step, const std::string &caller, const EventContext &ctx)
report fpes which happened during step 'step' on behalf of 'caller'
Definition: FPEAuditor.cxx:198
FPEAuditor::FpeStack_t
std::list< FpeNode_t > FpeStack_t
Definition: FPEAuditor.h:76
FPEAudit_linux.icc
FPEAudit::s_handlerInstalled
std::atomic< bool > s_handlerInstalled
Definition: FPEAuditor.cxx:38
FPEAuditor::InstallHandler
void InstallHandler()
Definition: FPEAuditor.cxx:116
FPEAuditor::finalize
virtual StatusCode finalize() override
Definition: FPEAuditor.cxx:100
FPEAudit::MAXARRAY
const int MAXARRAY
Definition: FPEAuditor.cxx:29
FPEAudit_dummy.icc
FPEAudit::s_mutex
std::mutex s_mutex
Definition: FPEAuditor.cxx:42
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:240
FPEAuditor::FPEAuditor
FPEAuditor(const std::string &name, ISvcLocator *pSvcLocator)
Constructor.
Definition: FPEAuditor.cxx:62
FPEAuditor::initialize
virtual StatusCode initialize() override
Gaudi hooks.
Definition: FPEAuditor.cxx:88
jobOptions.Initialize
Initialize
Definition: jobOptions.pA.py:28
FPEAuditor::m_Nstacklines
unsigned int m_Nstacklines
Definition: FPEAuditor.h:87
FPEAuditor::before
virtual void before(const std::string &event, const std::string &name, const EventContext &ctx) override
Auditor hooks.
Definition: FPEAuditor.cxx:162
AthCommonMsg
Definition: AthCommonMsg.h:19
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
python.Constants.INFO
int INFO
Definition: Control/AthenaCommon/python/Constants.py:15
AthCommonMsg< Gaudi::Auditor >::msg
MsgStream & msg() const
Definition: AthCommonMsg.h:24
FPEAudit::FPEAuditTLSData::s_array_I
void * s_array_I[MAXARRAY]
Definition: FPEAuditor.cxx:32
FPEAudit::FPEAuditTLSData::s_array_D
void * s_array_D[MAXARRAY]
Definition: FPEAuditor.cxx:33
LArCellBinning.step
step
Definition: LArCellBinning.py:158
FPEAuditor::m_CountFPEs
std::atomic< unsigned int > m_CountFPEs[FPEAUDITOR_ARRAYSIZE]
Definition: FPEAuditor.h:83
FPEAuditor::m_env
fenv_t m_env
The FP environment before we initialize.
Definition: FPEAuditor.h:96
FPEAuditor::s_fpe_stack
static thread_local FpeStack_t s_fpe_stack
a stack of FPE exceptions which have been raised
Definition: FPEAuditor.h:79
FPEAudit::s_tlsdata
thread_local FPEAuditTLSData s_tlsdata
Definition: FPEAuditor.cxx:36
FPEAudit::ATLAS_THREAD_SAFE
struct sigaction s_oldactHandler ATLAS_THREAD_SAFE
Definition: FPEAuditor.cxx:40
checker_macros.h
Define macros for attributes used to control the static checker.
FPEAuditor::m_NstacktracesOnFPE
unsigned int m_NstacktracesOnFPE
Definition: FPEAuditor.h:85
FPEAuditor::FPEAUDITOR_ARRAYSIZE
@ FPEAUDITOR_ARRAYSIZE
Definition: FPEAuditor.h:81
FPEAudit::FPEAuditTLSData::s_array_O
void * s_array_O[MAXARRAY]
Definition: FPEAuditor.cxx:31
FPEAuditor::FPEAUDITOR_OVERFLOW
@ FPEAUDITOR_OVERFLOW
Definition: FPEAuditor.h:81
FPEAuditor::UninstallHandler
void UninstallHandler()
Definition: FPEAuditor.cxx:149