ATLAS Offline Software
PerfMonMTSvc.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 /*
6  * @authors: Alaettin Serhan Mete, Hasan Ozturk - alaettin.serhan.mete@cern.ch, haozturk@cern.ch
7  */
8 
9 #ifndef PERFMONCOMPS_PERFMONMTSVC_H
10 #define PERFMONCOMPS_PERFMONMTSVC_H
11 
12 // Thread-safety-checker
14 
15 // Framework includes
17 #include "GaudiKernel/IIncidentListener.h"
18 
19 // PerfMonKernel includes
21 
22 // PerfMonComps includes
23 #include "LinFitSglPass.h"
24 #include "PerfMonMTUtils.h"
25 
26 // Containers
27 #include <set>
28 #include <map>
29 #include <vector>
30 
31 
32 // Input/Output includes
33 #include <nlohmann/json.hpp>
34 
35 // Other Libraries
36 
37 
38 #include <memory>
39 #include <mutex>
40 
41 class PerfMonMTSvc : public extends<AthService, IPerfMonMTSvc, IIncidentListener> {
42  public:
44  PerfMonMTSvc(const std::string& name, ISvcLocator* pSvcLocator);
45 
46  // Destructor
47  virtual ~PerfMonMTSvc() = default;
48 
50  virtual void handle( const Incident& incident ) override;
51 
53  virtual StatusCode initialize() override;
54 
56  virtual StatusCode finalize() override;
57 
59  virtual void startAud(const std::string& stepName, const std::string& compName) override;
60 
62  virtual void stopAud(const std::string& stepName, const std::string& compName) override;
63 
65  void startSnapshotAud(const std::string& stepName, const std::string& compName);
66  void stopSnapshotAud(const std::string& stepName, const std::string& compName);
67 
69  void startCompAud(const std::string& stepName, const std::string& compName, const EventContext& ctx);
70  void stopCompAud(const std::string& stepName, const std::string& compName, const EventContext& ctx);
71 
73  void report();
74 
76  void report2Log();
77  void report2Log_Description() const;
79  void report2Log_EventLevel_instant() const;
80  void report2Log_EventLevel();
81  void report2Log_Summary(); // make it const
82  void report2Log_CpuInfo() const;
83  void report2Log_EnvInfo() const;
84 
86  void report2JsonFile();
90 
92  void aggregateSlotData();
93  void divideData2Steps();
94 
95  std::string scaleTime(double timeMeas) const;
96  std::string scaleMem(int64_t memMeas) const;
97 
100  std::string get_info_from_file(const std::string& fileName, const std::string& fieldName) const;
101  std::string get_cpu_model_info() const;
102  int get_cpu_core_info() const;
103  uint64_t get_memory_info() const;
104 
105  PMonMT::StepComp generate_state(const std::string& stepName, const std::string& compName) const;
106 
107  private:
110 
113 
115  Gaudi::Property<bool> m_doEventLoopMonitoring{
116  this, "doEventLoopMonitoring", true,
117  "True if event loop monitoring is enabled, false o/w. Event loop monitoring may cause a decrease in the "
118  "performance due to the usage of locks."};
120  Gaudi::Property<bool> m_doComponentLevelMonitoring{
121  this, "doComponentLevelMonitoring", false,
122  "True if component level monitoring is enabled, false o/w. Component monitoring may cause a decrease in the "
123  "performance due to the usage of locks."};
125  Gaudi::Property<bool> m_reportResultsToJSON{this, "reportResultsToJSON", true, "Report results into the json file."};
127  Gaudi::Property<std::string> m_jsonFileName{this, "jsonFileName", "PerfMonMTSvc_result.json",
128  "Name of the JSON file that contains the results."};
130  Gaudi::Property<bool> m_printDetailedTables{this, "printDetailedTables", true,
131  "Print detailed component-level metrics."};
133  Gaudi::Property<uint64_t> m_memFitLowerLimit{
134  this, "memFitLowerLimit", 25,
135  "Lower limit (in number of events) for the memory fit."};
137  Gaudi::Property<uint64_t> m_checkPointThreshold{
138  this, "checkPointThreshold", 30,
139  "Least amount of time (in seconds) between event-level checks."};
141  Gaudi::Property<double> m_wallTimeOffset{this, "wallTimeOffset", 0, "Job start wall time in miliseconds."};
143  Gaudi::Property<int> m_printNComps{
144  this, "printNComps", 50, "Maximum number of components to be printed."};
146  Gaudi::Property<int> m_numberOfThreads{this, "numberOfThreads", 1, "Number of threads in the job."};
148  Gaudi::Property<int> m_numberOfSlots{this, "numberOfSlots", 1, "Number of slots in the job."};
150  Gaudi::Property<uint64_t> m_eventLoopMsgLimit{this, "eventLoopMsgLimit", 10, "Maximum number of event-level messages."};
151 
155  const std::set<std::string> m_exclusionSet = {"AthMasterSeq", "AthAlgEvtSeq", "AthAllAlgSeq", "AthAlgSeq", "AthOutSeq",
156  "AthCondSeq", "AthBeginSeq", "AthEndSeq", "AthenaEventLoopMgr", "AthenaHiveEventLoopMgr", "AthMpEvtLoopMgr", "AthenaMtesEventLoopMgr",
157  "PerfMonMTSvc"};
158 
161  std::vector<PMonMT::SnapshotData> m_snapshotData;
162  std::vector<std::string> m_snapshotStepNames = {"Configure", "Initialize", "FirstEvent", "Execute", "Finalize"};
164 
165  // Store event level measurements
167 
168  // Lock for capturing event loop measurements
170 
171  // Are we processing the first event?
172  std::atomic<bool> m_isFirstEvent;
173 
174  // Count the number of events processed
175  std::atomic<uint64_t> m_eventCounter;
176 
177  // Instant event-loop report counter
178  std::atomic<uint64_t> m_eventLoopMsgCounter;
179 
180  // The last event-level measurement time in seconds
181  std::atomic<double> m_checkPointTime;
182 
183  // This flag protects against double stopping the event loop measurement
184  std::atomic<bool> m_isEvtLoopStopped;
185 
186  /*
187  * Data structure to store component level measurements
188  */
189  typedef std::map<PMonMT::StepComp, PMonMT::ComponentData*> data_map_t;
190  typedef std::map<PMonMT::StepComp, std::unique_ptr<PMonMT::ComponentData>> data_map_unique_t;
191  // Here I'd prefer to use SG::SlotSpecificObj<data_map_t>
192  // However, w/ invalid context it seems to segfault
193  // Can investigate in the future, for now std::vector should be OK
195 
196  // m_compLevelDataMap is divided into following maps and these are stored in the m_stdoutVec_serial.
197  // There should be a more clever way!
198  std::vector<data_map_unique_t> m_compLevelDataMapVec; // all
205 
206  std::vector<data_map_t> m_stdoutVec_serial;
207 
208  // Leak estimates
211 
212  // Estimate CPU efficiency
213  int getCpuEfficiency() const;
214 
215 }; // class PerfMonMTSvc
216 
217 #endif // PERFMONCOMPS_PERFMONMTSVC_H
PerfMonMTSvc::m_doComponentLevelMonitoring
Gaudi::Property< bool > m_doComponentLevelMonitoring
Do component level monitoring.
Definition: PerfMonMTSvc.h:120
PerfMonMTSvc::report2JsonFile
void report2JsonFile()
Report to the JSON File.
Definition: PerfMonMTSvc.cxx:616
PerfMonMTSvc::m_isFirstEvent
std::atomic< bool > m_isFirstEvent
Definition: PerfMonMTSvc.h:172
PerfMonMTSvc::stopSnapshotAud
void stopSnapshotAud(const std::string &stepName, const std::string &compName)
Definition: PerfMonMTSvc.cxx:235
LinFitSglPass.h
PerfMonMTSvc::report2Log_Description
void report2Log_Description() const
Definition: PerfMonMTSvc.cxx:420
PerfMonMTSvc::divideData2Steps
void divideData2Steps()
Definition: PerfMonMTSvc.cxx:818
PerfMonMTSvc::getCpuEfficiency
int getCpuEfficiency() const
Definition: PerfMonMTSvc.cxx:355
AddEmptyComponent.compName
compName
Definition: AddEmptyComponent.py:32
PerfMon::LinFitSglPass
Definition: LinFitSglPass.h:22
PerfMonMTSvc::report2Log_ComponentLevel
void report2Log_ComponentLevel()
Definition: PerfMonMTSvc.cxx:437
PerfMonMTSvc::Snapshots
Snapshots
Definition: PerfMonMTSvc.h:163
PerfMonMTSvc::m_compLevelDataMap_fin
data_map_t m_compLevelDataMap_fin
Definition: PerfMonMTSvc.h:202
PerfMonMTSvc::m_numberOfThreads
Gaudi::Property< int > m_numberOfThreads
Get the number of threads.
Definition: PerfMonMTSvc.h:146
PerfMonMTSvc::m_measurementSnapshots
PMonMT::SnapshotMeasurement m_measurementSnapshots
Measurement to capture snapshots.
Definition: PerfMonMTSvc.h:109
PerfMonMTSvc::m_compLevelDataMap
data_map_t m_compLevelDataMap
Definition: PerfMonMTSvc.h:194
PerfMonMTSvc::m_motherPID
int m_motherPID
Snapshots data.
Definition: PerfMonMTSvc.h:160
PerfMonMTSvc::report2Log_CpuInfo
void report2Log_CpuInfo() const
Definition: PerfMonMTSvc.cxx:587
PerfMonMTSvc::report2Log_EnvInfo
void report2Log_EnvInfo() const
Definition: PerfMonMTSvc.cxx:601
PMonMT::EventLevelData
Definition: PerfMonMTUtils.h:196
json
nlohmann::json json
Definition: HistogramDef.cxx:9
PerfMonMTSvc::initialize
virtual StatusCode initialize() override
Standard Gaudi Service initialization.
Definition: PerfMonMTSvc.cxx:49
PerfMonMTSvc::m_eventCounter
std::atomic< uint64_t > m_eventCounter
Definition: PerfMonMTSvc.h:175
PerfMonMTSvc::get_cpu_core_info
int get_cpu_core_info() const
Definition: PerfMonMTSvc.cxx:928
BeamSpot::mutex
std::mutex mutex
Definition: InDetBeamSpotVertex.cxx:18
PerfMonMTSvc::~PerfMonMTSvc
virtual ~PerfMonMTSvc()=default
PerfMonMTSvc::m_checkPointTime
std::atomic< double > m_checkPointTime
Definition: PerfMonMTSvc.h:181
PerfMonMTSvc::m_wallTimeOffset
Gaudi::Property< double > m_wallTimeOffset
Offset for the wall-time, comes from configuration.
Definition: PerfMonMTSvc.h:141
PerfMonMTSvc::report2JsonFile_ComponentLevel
void report2JsonFile_ComponentLevel(nlohmann::json &j) const
Definition: PerfMonMTSvc.cxx:724
PMonMT::SnapshotMeasurement
Definition: PerfMonMTUtils.h:170
PerfMonMTSvc::m_fit_vmem
PerfMon::LinFitSglPass m_fit_vmem
Definition: PerfMonMTSvc.h:209
PerfMonMTSvc::handle
virtual void handle(const Incident &incident) override
Incident service handle for post-finalize.
Definition: PerfMonMTSvc.cxx:107
PMonMT::StepComp
Definition: PerfMonMTUtils.h:63
PerfMonMTSvc::get_cpu_model_info
std::string get_cpu_model_info() const
Definition: PerfMonMTSvc.cxx:923
PerfMonMTSvc::stopCompAud
void stopCompAud(const std::string &stepName, const std::string &compName, const EventContext &ctx)
Definition: PerfMonMTSvc.cxx:297
PerfMonMTSvc::CONFIGURE
@ CONFIGURE
Definition: PerfMonMTSvc.h:163
PerfMonMTSvc::m_measurementEvents
PMonMT::SnapshotMeasurement m_measurementEvents
Measurement to capture events.
Definition: PerfMonMTSvc.h:112
PerfMonMTSvc::m_eventLoopMsgLimit
Gaudi::Property< uint64_t > m_eventLoopMsgLimit
Set the number of messages for the event-level report.
Definition: PerfMonMTSvc.h:150
PerfMonMTSvc::FINALIZE
@ FINALIZE
Definition: PerfMonMTSvc.h:163
PerfMonMTSvc::report2Log_EventLevel
void report2Log_EventLevel()
Definition: PerfMonMTSvc.cxx:500
PerfMonMTSvc::startSnapshotAud
void startSnapshotAud(const std::string &stepName, const std::string &compName)
Snapshot Auditing: Take snapshots at the beginning and at the end of each step.
Definition: PerfMonMTSvc.cxx:217
PerfMonMTSvc::m_isEvtLoopStopped
std::atomic< bool > m_isEvtLoopStopped
Definition: PerfMonMTSvc.h:184
PerfMonMTSvc::report2Log_Summary
void report2Log_Summary()
Definition: PerfMonMTSvc.cxx:538
PerfMonMTSvc::m_compLevelDataMap_ini
data_map_t m_compLevelDataMap_ini
Definition: PerfMonMTSvc.h:199
IPerfMonMTSvc.h
PerfMonMTSvc::m_compLevelDataMap_evt
data_map_t m_compLevelDataMap_evt
Definition: PerfMonMTSvc.h:201
PerfMonMTSvc::m_snapshotData
std::vector< PMonMT::SnapshotData > m_snapshotData
Definition: PerfMonMTSvc.h:161
PerfMonMTSvc::INITIALIZE
@ INITIALIZE
Definition: PerfMonMTSvc.h:163
PerfMonMTSvc::report2Log_EventLevel_instant
void report2Log_EventLevel_instant() const
Definition: PerfMonMTSvc.cxx:483
FortranAlgorithmOptions.fileName
fileName
Definition: FortranAlgorithmOptions.py:13
PerfMonMTSvc::startAud
virtual void startAud(const std::string &stepName, const std::string &compName) override
Start Auditing.
Definition: PerfMonMTSvc.cxx:183
PerfMonMTSvc::m_fit_pss
PerfMon::LinFitSglPass m_fit_pss
Definition: PerfMonMTSvc.h:210
PerfMonMTSvc::report2Log
void report2Log()
Report to log.
Definition: PerfMonMTSvc.cxx:397
PerfMonMTSvc::m_reportResultsToJSON
Gaudi::Property< bool > m_reportResultsToJSON
Report results to JSON.
Definition: PerfMonMTSvc.h:125
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
PerfMonMTUtils.h
PerfMonMTSvc::m_jsonFileName
Gaudi::Property< std::string > m_jsonFileName
Name of the JSON file.
Definition: PerfMonMTSvc.h:127
taskman.fieldName
fieldName
Definition: taskman.py:492
PerfMonMTSvc::finalize
virtual StatusCode finalize() override
Standard Gaudi Service finalization.
Definition: PerfMonMTSvc.cxx:97
PerfMonMTSvc::report2JsonFile_EventLevel
void report2JsonFile_EventLevel(nlohmann::json &j) const
Definition: PerfMonMTSvc.cxx:749
PerfMonMTSvc::m_eventLoopMsgCounter
std::atomic< uint64_t > m_eventLoopMsgCounter
Definition: PerfMonMTSvc.h:178
PerfMonMTSvc
Definition: PerfMonMTSvc.h:41
xAOD::uint64_t
uint64_t
Definition: EventInfo_v1.cxx:123
PerfMonMTSvc::scaleMem
std::string scaleMem(int64_t memMeas) const
Definition: PerfMonMTSvc.cxx:865
PerfMonMTSvc::scaleTime
std::string scaleTime(double timeMeas) const
Definition: PerfMonMTSvc.cxx:841
PerfMonMTSvc::get_memory_info
uint64_t get_memory_info() const
Definition: PerfMonMTSvc.cxx:934
PerfMonMTSvc::NSNAPSHOTS
@ NSNAPSHOTS
Definition: PerfMonMTSvc.h:163
PerfMonMTSvc::stopAud
virtual void stopAud(const std::string &stepName, const std::string &compName) override
Stop Auditing.
Definition: PerfMonMTSvc.cxx:202
PerfMonMTSvc::report2JsonFile_Summary
void report2JsonFile_Summary(nlohmann::json &j) const
Definition: PerfMonMTSvc.cxx:654
PerfMonMTSvc::m_snapshotStepNames
std::vector< std::string > m_snapshotStepNames
Definition: PerfMonMTSvc.h:162
PerfMonMTSvc::m_exclusionSet
const std::set< std::string > m_exclusionSet
Exclude some common components from monitoring In the future this might be converted to a inclusion s...
Definition: PerfMonMTSvc.h:155
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:228
PerfMonMTSvc::m_mutex_capture
std::mutex m_mutex_capture
Definition: PerfMonMTSvc.h:169
PerfMonMTSvc::m_numberOfSlots
Gaudi::Property< int > m_numberOfSlots
Get the number of slots.
Definition: PerfMonMTSvc.h:148
PerfMonMTSvc::m_printNComps
Gaudi::Property< int > m_printNComps
Print the top N components.
Definition: PerfMonMTSvc.h:143
PerfMonMTSvc::m_printDetailedTables
Gaudi::Property< bool > m_printDetailedTables
Print detailed tables.
Definition: PerfMonMTSvc.h:130
PerfMonMTSvc::aggregateSlotData
void aggregateSlotData()
A few helper functions.
Definition: PerfMonMTSvc.cxx:785
PerfMonMTSvc::m_stdoutVec_serial
std::vector< data_map_t > m_stdoutVec_serial
Definition: PerfMonMTSvc.h:206
PerfMonMTSvc::m_eventLevelData
PMonMT::EventLevelData m_eventLevelData
Definition: PerfMonMTSvc.h:166
PerfMonMTSvc::PerfMonMTSvc
PerfMonMTSvc(const std::string &name, ISvcLocator *pSvcLocator)
Standard Gaudi Service constructor.
Definition: PerfMonMTSvc.cxx:34
PerfMonMTSvc::m_doEventLoopMonitoring
Gaudi::Property< bool > m_doEventLoopMonitoring
Do event loop monitoring.
Definition: PerfMonMTSvc.h:115
PerfMonMTSvc::EXECUTE
@ EXECUTE
Definition: PerfMonMTSvc.h:163
PerfMonMTSvc::m_checkPointThreshold
Gaudi::Property< uint64_t > m_checkPointThreshold
Frequency of event level monitoring.
Definition: PerfMonMTSvc.h:137
PerfMonMTSvc::m_compLevelDataMap_1stevt
data_map_t m_compLevelDataMap_1stevt
Definition: PerfMonMTSvc.h:200
PerfMonMTSvc::m_memFitLowerLimit
Gaudi::Property< uint64_t > m_memFitLowerLimit
Lower limit (in number of events) for the memory fit.
Definition: PerfMonMTSvc.h:133
AthService.h
PerfMonMTSvc::FIRSTEVENT
@ FIRSTEVENT
Definition: PerfMonMTSvc.h:163
checker_macros.h
Define macros for attributes used to control the static checker.
PerfMonMTSvc::get_info_from_file
std::string get_info_from_file(const std::string &fileName, const std::string &fieldName) const
A few helper methods to get system information These should be carried to PerfMonMTUtils at some poin...
Definition: PerfMonMTSvc.cxx:895
PerfMonMTSvc::m_compLevelDataMapVec
std::vector< data_map_unique_t > m_compLevelDataMapVec
Definition: PerfMonMTSvc.h:198
PerfMonMTSvc::m_compLevelDataMap_cbk
data_map_t m_compLevelDataMap_cbk
Definition: PerfMonMTSvc.h:204
PerfMonMTSvc::data_map_unique_t
std::map< PMonMT::StepComp, std::unique_ptr< PMonMT::ComponentData > > data_map_unique_t
Definition: PerfMonMTSvc.h:190
PerfMonMTSvc::data_map_t
std::map< PMonMT::StepComp, PMonMT::ComponentData * > data_map_t
Definition: PerfMonMTSvc.h:189
PerfMonMTSvc::generate_state
PMonMT::StepComp generate_state(const std::string &stepName, const std::string &compName) const
Definition: PerfMonMTSvc.cxx:775
PerfMonMTSvc::report
void report()
Report the results.
Definition: PerfMonMTSvc.cxx:384
PerfMonMTSvc::startCompAud
void startCompAud(const std::string &stepName, const std::string &compName, const EventContext &ctx)
Component Level Auditing: Take measurements at the beginning and at the end of each component call.
Definition: PerfMonMTSvc.cxx:254
PerfMonMTSvc::m_compLevelDataMap_plp
data_map_t m_compLevelDataMap_plp
Definition: PerfMonMTSvc.h:203