ATLAS Offline Software
FPEAuditor.cxx
Go to the documentation of this file.
1 
3 /*
4  Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration
5 */
6 
7 // FPEAuditor.cxx
8 // Implementation file for class FPEAuditor
9 // Author: S.Binet<binet@cern.ch>
11 
12 #include <stdexcept>
13 #include <mutex>
14 #include <atomic>
15 
16 // FrameWork includes
17 #include "GaudiKernel/INamedInterface.h"
18 #include "GaudiKernel/EventContext.h"
19 #include "GaudiKernel/ThreadLocalContext.h"
20 
21 #include "FPEAuditor.h"
22 
24 
25 // C includes
26 #include <fenv.h>
27 
28 
29 namespace FPEAudit {
30  const int MAXARRAY=100;
31  struct FPEAuditTLSData {
35  };
36 
37  thread_local FPEAuditTLSData s_tlsdata;
38 
39  std::atomic<bool> s_handlerInstalled = false;
40  std::atomic<bool> s_handlerDisabled = false;
41  struct sigaction s_oldactHandler ATLAS_THREAD_SAFE;
42 
44  typedef std::lock_guard<std::mutex> lock_t;
45 }
46 
47 
48 #if defined(__linux__) && ( defined(__i386__) || defined(__x86_64__) )
49 # include "FPEAudit_linux.icc"
50 #else
51 # include "FPEAudit_dummy.icc"
52 #endif
53 
54 
56 // Public methods:
58 
60 
61 // Constructors
63 FPEAuditor::FPEAuditor( const std::string& name,
64  ISvcLocator* pSvcLocator ) :
65  AthCommonMsg<Auditor> ( name, pSvcLocator ),
66  m_CountFPEs(),
67  m_env(),
68  m_nexceptions(0)
69 {
70  declareProperty("NStacktracesOnFPE", m_NstacktracesOnFPE,
71  "Flag to configure, how many stack traces in case of FPE are printed. "
72  "Default: Zero, just report that FPE has happened. "
73  "After collecting the stacktrace, the code has to modify the mcontext_t "
74  "struct to ignore FPEs for the rest of the processing of the algorithm/service "
75  "This part is highly non-portable!" );
76 
77  declareProperty("Nstacklines", m_Nstacklines,"Limit how deep we dive into the stack");
78 }
79 
80 // Destructor
82 #include <fstream>
83 #include <iomanip>
85 {
86  //m_msg << MSG::DEBUG << "Calling destructor" << endmsg;
87 }
88 
90 {
91  for ( unsigned int i(0); i<FPEAUDITOR_ARRAYSIZE; ++i )
92  m_CountFPEs[i]=0;
93  //ATH_MSG_INFO("==> initialize");
94  // add a fake node here because we may come alive while the AuditorSvc is
95  // still initializing (so one edge is left orphaned)
96  add_fpe_node();
97 
98  return StatusCode::SUCCESS;
99 }
100 
102 {
103  ATH_MSG_INFO("FPE summary for this job");
104  ATH_MSG_INFO(" FPE OVERFLOWs : " << m_CountFPEs[FPEAUDITOR_OVERFLOW] );
105  ATH_MSG_INFO(" FPE INVALIDs : " << m_CountFPEs[FPEAUDITOR_INVALID]);
106  ATH_MSG_INFO(" FPE DIVBYZEROs : " << m_CountFPEs[FPEAUDITOR_DIVBYZERO]);
107 
108  {
112  }
113 
114  return StatusCode::SUCCESS;
115 }
116 
118 {
119  // Save the current FP environment.
120  fegetenv (&m_env);
121 
122 #ifdef __aarch64__
123  ATH_MSG_WARNING("Collecting stack traces for FPES is not supported on aarch64");
127 #else
128  struct sigaction act;
129  memset(&act, 0, sizeof act);
130  act.sa_sigaction = FPEAudit::fpe_sig_action;
131  act.sa_flags = SA_SIGINFO;
132  if ( sigaction(SIGFPE, &act, &FPEAudit::s_oldactHandler) != 0 )
133  {
134  ATH_MSG_WARNING ("Printing stacktraces on FPE requested, but unable to install signal handler ! Switched off !");
136  }
137  if ( m_NstacktracesOnFPE )
138  {
139  ATH_MSG_INFO ("Installed Signalhandler !");
140  FPEAudit::unmask_fpe();
141  }
145 
147 #endif
148 }
149 
151 {
152  ATH_MSG_INFO("uninstalling SignalHandler");
154 
155  feclearexcept(FE_ALL_EXCEPT);
156  fesetenv (&m_env);
157  FPEAudit::mask_fpe();
158 
159  // feenableexcept (0);
160  // fedisableexcept (FE_ALL_EXCEPT);
161 }
162 
163 void FPEAuditor::before(StandardEventType evt, INamedInterface*)
164 {
165  add_fpe_node();
166 
167  if ( evt==IAuditor::Execute ) {
171  InstallHandler();
173  }
174  }
175  }
176 }
177 
178 void FPEAuditor::after(StandardEventType evt, INamedInterface* comp, const StatusCode&)
179 {
180  report_fpe(toStr(evt), comp->name());
181  pop_fpe_node();
182 
183  if ( evt==IAuditor::Initialize ) {
185  // CoreDumpSvc can also install a FPE handler, grrr.
186  if (comp->name() == "CoreDumpSvc") FPEAudit::s_handlerInstalled = false;
188  InstallHandler();
190  }
191  }
192 }
193 
194 void FPEAuditor::before(CustomEventTypeRef /*evt*/,
195  const std::string& /*caller*/)
196 {
197  add_fpe_node();
198 }
199 
200 void FPEAuditor::after(CustomEventTypeRef evt,
201  const std::string& caller,
202  const StatusCode&)
203 {
204  report_fpe(evt, caller);
205  pop_fpe_node();
206 }
207 
210 void
211 FPEAuditor::report_fpe(const std::string& step,
212  const std::string& caller)
213 {
214  // store current list of FPE flags which were raised before
215  int raised = fetestexcept(FE_OVERFLOW | FE_INVALID | FE_DIVBYZERO);
216  if (raised) {
217  // FIXME: Gaudi should pass context to the auditors.
218  std::stringstream evStr;
219  const EventContext& ctx = Gaudi::Hive::currentContext();
220  if (ctx.valid()) {
221  evStr << " on event " << ctx.eventID().event_number();
222  }
223 
224  if (raised & FE_OVERFLOW) {
225  ATH_MSG_WARNING("FPE OVERFLOW in [" << step << "] of [" << caller << "]" << evStr.str() <<
227  );
229  if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_O[0] != NULL )
230  {
231  for (unsigned int j = 0; j < m_Nstacklines; j++)
232  {
234  if (FPEAudit::s_tlsdata.s_array_O[j]==NULL) break;
235  this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
236  FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_O[j],this->msg());
238  this->msg(MSG::INFO) << endmsg;
239  }
240  }
241  }
242  if (raised & FE_INVALID) {
243  ATH_MSG_WARNING("FPE INVALID in [" << step << "] of [" << caller << "]" << evStr.str()
244  << " " << m_NstacktracesOnFPE << " " << FPEAudit::s_tlsdata.s_array_I[0]
245  );
247  }
248  if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_I[0] != NULL )
249  {
250  for (unsigned int j = 0; j < m_Nstacklines; j++)
251  {
253  if (FPEAudit::s_tlsdata.s_array_I[j]==NULL) break;
254  this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
255  FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_I[j],this->msg());
257  this->msg(MSG::INFO) << endmsg;
258  }
259  }
260  if (raised & FE_DIVBYZERO) {
261  ATH_MSG_WARNING("FPE DIVBYZERO in [" << step << "] of [" << caller << "]" << evStr.str()
262  << " " << m_NstacktracesOnFPE << " " << FPEAudit::s_tlsdata.s_array_D[0]
263  );
265  if ( m_NstacktracesOnFPE && FPEAudit::s_tlsdata.s_array_D[0] != NULL )
266  {
268  for (unsigned int j = 0; j < m_Nstacklines; j++)
269  {
270  if (FPEAudit::s_tlsdata.s_array_D[j]==NULL) break;
271  this->msg(MSG::INFO) << "FPE stacktrace " << j << " :\n";
272  FPEAudit::resolve(FPEAudit::s_tlsdata.s_array_D[j],this->msg());
274  this->msg(MSG::INFO) << endmsg;
275  }
276  }
277  }
278 
279 
281  if ( --m_nexceptions == 0
284  {
285  fprintf(stderr, "too many SIGFPE detected, will be uninstalling signal handler\n");
287  }
288  }
289 }
290 
291 void
293 {
294  // get current list of FPE flags so far
295  int raised = fetestexcept(FE_OVERFLOW | FE_INVALID | FE_DIVBYZERO);
296  s_fpe_stack.push_back(std::make_pair(raised, 0));
297 
298  // clear FPE status word
299  feclearexcept(FE_ALL_EXCEPT);
300 
302  // Make sure exceptions have been masked off if the handler has been
303  // disabled. Yes, this is already done in the signal handler.
304  // But when TBB starts a new task, it resets the FPU control words
305  // to values that were copied when the thread pool was initialized.
306  // FIXME: Do it properly with TBB? Not sure that the current
307  // interfaces allow it.
308  FPEAudit::mask_fpe();
309  }
310 }
311 
312 void
314 {
315  if (s_fpe_stack.empty()) {
316  ATH_MSG_ERROR("inconsistent fpe-stack !");
317  throw std::runtime_error("inconsistent fpe-stack");
318  }
319 
320  // restore fpe stack info
321  int raised = s_fpe_stack.back().first;
322  s_fpe_stack.pop_back();
323 
324  // consolidate
325  if (!s_fpe_stack.empty()) {
326  s_fpe_stack.back().second |= raised;
327  }
329  FPEAudit::unmask_fpe();
330  }
331 }
plotting.yearwise_luminosity_vs_mu.comp
comp
Definition: yearwise_luminosity_vs_mu.py:24
python.CreateTierZeroArgdict.resolve
def resolve(tag)
utility functions
Definition: CreateTierZeroArgdict.py:73
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
FPEAuditor::FPEAUDITOR_INVALID
@ FPEAUDITOR_INVALID
Definition: FPEAuditor.h:109
BeamSpot::mutex
std::mutex mutex
Definition: InDetBeamSpotVertex.cxx:18
FPEAuditor::add_fpe_node
void add_fpe_node()
add an fpe node
Definition: FPEAuditor.cxx:292
FPEAudit
Definition: FPEAuditor.cxx:29
FPEAuditor::before
virtual void before(StandardEventType evt, INamedInterface *comp) override
Definition: FPEAuditor.cxx:163
Ringer::toStr
const char * toStr(SegmentationType e)
Transform enumeration types to string.
FPEAuditor::m_nexceptions
std::atomic< int > m_nexceptions
Definition: FPEAuditor.h:126
get_generator_info.stderr
stderr
Definition: get_generator_info.py:40
LArG4FSStartPointFilter.evt
evt
Definition: LArG4FSStartPointFilter.py:42
FPEAudit::lock_t
std::lock_guard< std::mutex > lock_t
Definition: FPEAuditor.cxx:44
FPEAuditor::~FPEAuditor
virtual ~FPEAuditor()
Destructor.
Definition: FPEAuditor.cxx:84
FPEAuditor::report_fpe
void report_fpe(const std::string &step, const std::string &caller)
report fpes which happened during step 'step' on behalf of 'caller'
Definition: FPEAuditor.cxx:211
FPEAudit::s_handlerDisabled
std::atomic< bool > s_handlerDisabled
Definition: FPEAuditor.cxx:40
LArPulseShapeRunConfig.Execute
Execute
Definition: LArPulseShapeRunConfig.py:62
FPEAuditor.h
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
FPEAudit::FPEAuditTLSData
Definition: FPEAuditor.cxx:31
lumiFormat.i
int i
Definition: lumiFormat.py:92
FPEAuditor::FPEAUDITOR_DIVBYZERO
@ FPEAUDITOR_DIVBYZERO
Definition: FPEAuditor.h:109
FPEAuditor::pop_fpe_node
void pop_fpe_node()
pop an fpe node
Definition: FPEAuditor.cxx:313
endmsg
#define endmsg
Definition: AnalysisConfig_Ntuple.cxx:63
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
FPEAuditor::FpeStack_t
std::list< FpeNode_t > FpeStack_t
Definition: FPEAuditor.h:104
FPEAuditor::after
virtual void after(StandardEventType evt, INamedInterface *comp, const StatusCode &sc) override
Definition: FPEAuditor.cxx:178
FPEAudit_linux.icc
FPEAudit::s_handlerInstalled
std::atomic< bool > s_handlerInstalled
Definition: FPEAuditor.cxx:39
FPEAuditor::InstallHandler
void InstallHandler()
Definition: FPEAuditor.cxx:117
FPEAuditor::finalize
virtual StatusCode finalize() override
Definition: FPEAuditor.cxx:101
FPEAudit::MAXARRAY
const int MAXARRAY
Definition: FPEAuditor.cxx:30
FPEAudit_dummy.icc
FPEAudit::s_mutex
std::mutex s_mutex
Definition: FPEAuditor.cxx:43
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:192
FPEAuditor::FPEAuditor
FPEAuditor(const std::string &name, ISvcLocator *pSvcLocator)
Constructor.
Definition: FPEAuditor.cxx:63
FPEAuditor::initialize
virtual StatusCode initialize() override
Gaudi hooks.
Definition: FPEAuditor.cxx:89
jobOptions.Initialize
Initialize
Definition: jobOptions.pA.py:28
FPEAuditor::m_Nstacklines
unsigned int m_Nstacklines
Definition: FPEAuditor.h:115
AthCommonMsg
Definition: AthCommonMsg.h:19
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
AthCommonMsg< Auditor >::msg
MsgStream & msg() const
Definition: AthCommonMsg.h:24
FPEAudit::FPEAuditTLSData::s_array_I
void * s_array_I[MAXARRAY]
Definition: FPEAuditor.cxx:33
FPEAudit::FPEAuditTLSData::s_array_D
void * s_array_D[MAXARRAY]
Definition: FPEAuditor.cxx:34
LArCellBinning.step
step
Definition: LArCellBinning.py:158
declareProperty
#define declareProperty(n, p, h)
Definition: BaseFakeBkgTool.cxx:15
FPEAuditor::m_CountFPEs
std::atomic< unsigned int > m_CountFPEs[FPEAUDITOR_ARRAYSIZE]
Definition: FPEAuditor.h:111
FPEAuditor::m_env
fenv_t m_env
The FP environment before we initialize.
Definition: FPEAuditor.h:124
FPEAuditor::s_fpe_stack
static thread_local FpeStack_t s_fpe_stack
a stack of FPE exceptions which have been raised
Definition: FPEAuditor.h:107
FPEAudit::s_tlsdata
thread_local FPEAuditTLSData s_tlsdata
Definition: FPEAuditor.cxx:37
FPEAudit::ATLAS_THREAD_SAFE
struct sigaction s_oldactHandler ATLAS_THREAD_SAFE
Definition: FPEAuditor.cxx:41
checker_macros.h
Define macros for attributes used to control the static checker.
FPEAuditor::m_NstacktracesOnFPE
unsigned int m_NstacktracesOnFPE
Definition: FPEAuditor.h:113
FPEAuditor::FPEAUDITOR_ARRAYSIZE
@ FPEAUDITOR_ARRAYSIZE
Definition: FPEAuditor.h:109
FPEAudit::FPEAuditTLSData::s_array_O
void * s_array_O[MAXARRAY]
Definition: FPEAuditor.cxx:32
FPEAuditor::FPEAUDITOR_OVERFLOW
@ FPEAUDITOR_OVERFLOW
Definition: FPEAuditor.h:109
FPEAuditor::UninstallHandler
void UninstallHandler()
Definition: FPEAuditor.cxx:150