ATLAS Offline Software
CoreDumpSvc.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 #ifndef ATHENASERVICES_COREDUMPSVC_H
6 #define ATHENASERVICES_COREDUMPSVC_H 1
7 
8 // System includes
9 #include <signal.h>
10 #include <string>
11 #include <atomic>
12 #include <tbb/concurrent_unordered_map.h>
13 
14 // Package includes
16 
17 // FrameWork includes
20 #include "GaudiKernel/IIncidentListener.h"
21 #include "EventInfo/EventID.h"
22 
23 
24 namespace CoreDumpSvcHandler {
25  void action ATLAS_NOT_THREAD_SAFE ( int sig, siginfo_t *info, void* extra );
26 }
27 
41 class CoreDumpSvc : public extends<AthService,
42  ICoreDumpSvc,
43  IIncidentListener> {
44 
45 protected:
46  friend void CoreDumpSvcHandler::action( int sig, siginfo_t *info, void* extra );
47 
50 
51 public:
52 
54  CoreDumpSvc( const std::string& name, ISvcLocator* pSvcLocator ) ATLAS_CTORDTOR_NOT_THREAD_SAFE;
55 
58 
60 
61  virtual void setCoreDumpInfo( const std::string& name, const std::string& value ) override;
63 
65  virtual void setCoreDumpInfo( const EventContext& ctx, const std::string& name, const std::string& value ) override;
66 
68  virtual std::string dump() const override;
70 
71 
73 
75  virtual StatusCode start() override;
77 
79  virtual void handle( const Incident& incident ) override;
81 
82 
83 private:
84  struct sysDumpRec{
85  std::string LastInc;
86  std::string EvId;
87  };
88  typedef tbb::concurrent_unordered_map<std::string,std::string > UserCore_t;
89  std::vector<UserCore_t> m_usrCoreDumps;
90  std::vector<sysDumpRec> m_sysCoreDumps;
91  siginfo_t* m_siginfo{nullptr};
92  std::atomic<EventID::event_number_t> m_eventCounter{0};
93 
94  thread_local static std::vector<uint8_t> s_stack;
95 
97 
98  Gaudi::Property<std::vector<int>> m_signals{this, "Signals", {SIGSEGV,SIGBUS,SIGILL,SIGFPE,SIGALRM},
99  "List of signals to catch"};
100 
101  Gaudi::Property<bool> m_callOldHandler{this, "CallOldHandler", true,
102  "Call previous signal handler"};
103 
104  Gaudi::Property<bool> m_dumpCoreFile{this, "DumpCoreFile", false,
105  "Produce a core dump file if resource limits (ulimit -c) allow"};
106 
107  Gaudi::Property<bool> m_stackTrace{this, "StackTrace", false,
108  "Produce (gdb) stack trace on crash. Useful if no other signal handler is used"};
109 
110  Gaudi::Property<bool> m_fastStackTrace{this, "FastStackTrace", false,
111  "Produce fast stack trace of current thread"};
112 
113  Gaudi::Property<std::string> m_coreDumpStream{this, "CoreDumpStream", "stdout",
114  "Stream to use for core dump [stdout,stderr]"};
115 
116  Gaudi::Property<int> m_fatalHandlerFlags{this, "FatalHandler", 0,
117  "Flags given to the fatal handler this service installs\n"
118  "if the flag is zero, no additional fatal handler is installed."};
119 
120  Gaudi::Property<double> m_timeout{this, "TimeOut", 30.0*60*1e9,
121  "Terminate job after it this reaches the time out in Wallclock time, "
122  "usually due to hanging during stack unwinding. Timeout given in nanoseconds despite seconds precision"};
123 
124  Gaudi::Property<bool> m_killOnSigInt{this, "KillOnSigInt",true, "Terminate job on SIGINT (aka Ctrl-C)"};
125 
126 
128 
130  void propertyHandler ATLAS_NOT_THREAD_SAFE (Gaudi::Details::PropertyBase& p);
131 
134 
137 
139  StatusCode installSignalHandler ATLAS_NOT_THREAD_SAFE ();
140 
142  StatusCode uninstallSignalHandler ATLAS_NOT_THREAD_SAFE ();
143 
145  void setAltStack();
146 };
147 
148 
149 #endif
grepfile.info
info
Definition: grepfile.py:38
python.tests.PyTestsLib.finalize
def finalize(self)
_info( "content of StoreGate..." ) self.sg.dump()
Definition: PyTestsLib.py:50
CoreDumpSvc::sysDumpRec::EvId
std::string EvId
Definition: CoreDumpSvc.h:86
CoreDumpSvc::sysDumpRec::LastInc
std::string LastInc
Definition: CoreDumpSvc.h:85
CoreDumpSvc::start
virtual StatusCode start() override
Definition: CoreDumpSvc.cxx:337
siginfo_t
Definition: SealSignal.h:77
CoreDumpSvc::m_fastStackTrace
Gaudi::Property< bool > m_fastStackTrace
Definition: CoreDumpSvc.h:110
CoreDumpSvc::ATLAS_NOT_THREAD_SAFE
StatusCode installSignalHandler ATLAS_NOT_THREAD_SAFE()
Install signal handlers.
CoreDumpSvc::m_sysCoreDumps
std::vector< sysDumpRec > m_sysCoreDumps
Core dump info collected by this service
Definition: CoreDumpSvc.h:90
initialize
void initialize()
Definition: run_EoverP.cxx:894
CoreDumpSvc::m_callOldHandler
Gaudi::Property< bool > m_callOldHandler
Definition: CoreDumpSvc.h:101
CoreDumpSvc::~CoreDumpSvc
virtual ~CoreDumpSvc() ATLAS_CTORDTOR_NOT_THREAD_SAFE
Destructor.
Definition: CoreDumpSvc.cxx:254
CoreDumpSvc::m_coreDumpStream
Gaudi::Property< std::string > m_coreDumpStream
Definition: CoreDumpSvc.h:113
athena.value
value
Definition: athena.py:124
CoreDumpSvc::ATLAS_NOT_THREAD_SAFE
virtual StatusCode initialize ATLAS_NOT_THREAD_SAFE() override
CoreDumpSvc::setAltStack
void setAltStack()
Set up an alternate stack for the current thread.
Definition: CoreDumpSvc.cxx:647
const
bool const RAWDATA *ch2 const
Definition: LArRodBlockPhysicsV0.cxx:560
CoreDumpSvc::m_eventCounter
std::atomic< EventID::event_number_t > m_eventCounter
Event counter.
Definition: CoreDumpSvc.h:92
CoreDumpSvc::UserCore_t
tbb::concurrent_unordered_map< std::string, std::string > UserCore_t
Definition: CoreDumpSvc.h:88
CoreDumpSvc::ATLAS_NOT_THREAD_SAFE
void propertyHandler ATLAS_NOT_THREAD_SAFE(Gaudi::Details::PropertyBase &p)
Property handler.
CoreDumpSvc::ATLAS_NOT_THREAD_SAFE
void print ATLAS_NOT_THREAD_SAFE()
Print core dump records to configured stream.
CoreDumpSvc::setCoreDumpInfo
virtual void setCoreDumpInfo(const std::string &name, const std::string &value) override
Set a name/value pair in the core dump record.
Definition: CoreDumpSvc.cxx:364
CoreDumpSvc
Service to print additional information before a crash.
Definition: CoreDumpSvc.h:43
CoreDumpSvc::sysDumpRec
Definition: CoreDumpSvc.h:84
EventID.h
This class provides a unique identification for each event, in terms of run/event number and/or a tim...
python.utils.AtlRunQueryDQUtils.p
p
Definition: AtlRunQueryDQUtils.py:210
CoreDumpSvc::m_timeout
Gaudi::Property< double > m_timeout
Definition: CoreDumpSvc.h:120
CoreDumpSvc::m_siginfo
siginfo_t * m_siginfo
Pointer to siginfo_t struct (set by signal handler)
Definition: CoreDumpSvc.h:91
CoreDumpSvcHandler::ATLAS_NOT_THREAD_SAFE
std::ostream &log ATLAS_NOT_THREAD_SAFE()
convenience method for logging
Definition: CoreDumpSvc.cxx:89
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
python.BuildSignatureFlags.sig
sig
Definition: BuildSignatureFlags.py:218
CoreDumpSvc::CoreDumpSvc
CoreDumpSvc()
Default constructor (do not use)
CoreDumpSvc::dump
virtual std::string dump() const override
Print all core dump records.
Definition: CoreDumpSvc.cxx:392
CoreDumpSvc::handle
virtual void handle(const Incident &incident) override
Incident listener.
Definition: CoreDumpSvc.cxx:552
CoreDumpSvc::setSigInfo
void setSigInfo(siginfo_t *info)
Set pointer to siginfo_t struct.
Definition: CoreDumpSvc.h:136
CoreDumpSvcHandler
Signal handler for CoreDumpSvc.
Definition: CoreDumpSvc.cxx:78
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
python.handimod.extra
int extra
Definition: handimod.py:522
CoreDumpSvc::m_signals
Gaudi::Property< std::vector< int > > m_signals
Alternate stack for signal handler.
Definition: CoreDumpSvc.h:98
CoreDumpSvc::m_dumpCoreFile
Gaudi::Property< bool > m_dumpCoreFile
Definition: CoreDumpSvc.h:104
private
#define private
Definition: DetDescrConditionsDict_dict_fixes.cxx:13
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:228
CoreDumpSvc::m_killOnSigInt
Gaudi::Property< bool > m_killOnSigInt
Definition: CoreDumpSvc.h:124
CoreDumpSvc::m_stackTrace
Gaudi::Property< bool > m_stackTrace
Definition: CoreDumpSvc.h:107
python.CaloScaleNoiseConfig.action
action
Definition: CaloScaleNoiseConfig.py:77
CoreDumpSvc::m_usrCoreDumps
std::vector< UserCore_t > m_usrCoreDumps
User defined core dump info.
Definition: CoreDumpSvc.h:89
AthService.h
ATLAS_CTORDTOR_NOT_THREAD_SAFE
#define ATLAS_CTORDTOR_NOT_THREAD_SAFE
Definition: checker_macros.h:213
ICoreDumpSvc.h
Interface of a core dump service.
CoreDumpSvc::s_stack
static thread_local std::vector< uint8_t > s_stack
Definition: CoreDumpSvc.h:94
checker_macros.h
Define macros for attributes used to control the static checker.
CoreDumpSvc::m_fatalHandlerFlags
Gaudi::Property< int > m_fatalHandlerFlags
Definition: CoreDumpSvc.h:116
CoreDumpSvc::ATLAS_NOT_THREAD_SAFE
StatusCode uninstallSignalHandler ATLAS_NOT_THREAD_SAFE()
Uninstall signal handlers.