16base_class(name, pSvcLocator),
45 ATH_MSG_WARNING(
"numConcurrentEvents() == 0. This is a misconfiguration, probably coming from running from pickle. "
46 "Setting local m_eventSlots to a 'large' number until this is fixed to allow the job to proceed.");
61 return StatusCode::SUCCESS;
70 ATH_MSG_INFO(
"Calling hashes2file, saving dump of job's HLT hashing dictionary to disk.");
72 return StatusCode::SUCCESS;
85 std::unique_lock lockUnique(
m_slotMutex[ context.slot() ] );
101 return StatusCode::SUCCESS;
114 std::shared_lock lockShared(
m_slotMutex[ context.slot() ] );
122 << (
type == AuditType::Before ?
"BEGAN" :
"ENDED") <<
" at " << now.microsecondsSinceEpoch());
136 return StatusCode::SUCCESS;
143 if (
type == AuditType::Before) {
147 std::this_thread::get_id(),
149 static_cast<uint32_t
>(context.slot())
155 tbb::concurrent_hash_map<std::thread::id, AlgorithmIdentifier, ThreadHashCompare>::accessor acc;
160 }
else if (
type == AuditType::After) {
166 ATH_MSG_ERROR(
"Only expecting AuditType::Before or AuditType::After");
167 return StatusCode::FAILURE;
171 return StatusCode::SUCCESS;
184 tbb::concurrent_hash_map<std::thread::id, AlgorithmIdentifier, ThreadHashCompare>::const_accessor acc;
188 ATH_MSG_WARNING(
"Cannot find algorithm on this thread (id=" << std::this_thread::get_id() <<
"). Request "<< payload <<
" won't be monitored");
189 return StatusCode::SUCCESS;
192 theAlg = acc->second;
198 std::shared_lock lockShared(
m_slotMutex[ context.slot() ] );
202 return StatusCode::SUCCESS;
213 return StatusCode::SUCCESS;
228 std::unique_lock lockUnique(
m_slotMutex[ context.slot() ] );
233 uint64_t eventStopTime = 0;
237 tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
242 eventStopTime = stopTimeAcessor->second.microsecondsSinceEpoch();
247 uint64_t eventStartTime = 0;
251 tbb::concurrent_hash_map<AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_accessor startAcessor;
256 eventStartTime = startAcessor->second.m_algStartTime.microsecondsSinceEpoch();
261 tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator beginIt;
262 tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator endIt;
263 tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator it;
266 ATH_MSG_DEBUG(
"Monitored event with " << std::distance(beginIt, endIt) <<
" AlgorithmPayload objects.");
268 std::map<size_t, size_t> aiToHandleIndex;
269 for (it = beginIt; it != endIt; ++it) {
272 uint64_t startTime = ap.m_algStartTime.microsecondsSinceEpoch();
275 uint64_t stopTime = eventStopTime;
277 tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
281 stopTime = stopTimeAcessor->second.microsecondsSinceEpoch();
289 if (startTime > stopTime) {
290 ATH_MSG_VERBOSE(
"Disregard start-time:" << startTime <<
" > stop-time:" << stopTime
300 if (startTime > eventStopTime) {
304 if (stopTime > eventStopTime) {
306 <<
" truncating its ending time stamp from " << stopTime <<
" to " << eventStopTime);
307 stopTime = eventStopTime;
313 if (stopTime < eventStartTime) {
317 if (startTime < eventStartTime) {
319 <<
" truncating its starting time stamp from " << startTime <<
" to " << eventStartTime);
320 startTime = eventStartTime;
325 costOutputHandle->push_back(
tc );
328 const uint32_t threadID =
static_cast<uint32_t
>( std::hash< std::thread::id >()(ap.m_algThreadID) );
329 uint32_t threadEnumerator = 0;
333 const std::unordered_map<uint32_t, uint32_t>::const_iterator mapIt =
m_threadToCounterMap.find(threadID);
338 threadEnumerator = mapIt->second;
346 result &=
tc->setDetail(
"thread", threadEnumerator);
347 result &=
tc->setDetail(
"thash", threadID);
348 result &=
tc->setDetail(
"slot", ap.m_slot);
349 result &=
tc->setDetail(
"roi", ap.m_algROIID);
350 result &=
tc->setDetail(
"start", startTime);
351 result &=
tc->setDetail(
"stop", stopTime);
352 if (!result)
ATH_MSG_WARNING(
"Failed to append one or more details to trigger cost TC");
354 aiToHandleIndex[ai.
m_hash] = costOutputHandle->size() - 1;
363 for (ROBConstIt it = beginRob; it != endRob; ++it) {
364 size_t aiHash = it->first.m_hash;
366 if (aiToHandleIndex.count(aiHash) == 0) {
373 rosOutputHandle->push_back(
tc);
376 std::vector<uint32_t> robs_id;
377 std::vector<uint32_t> robs_size;
378 std::vector<unsigned> robs_history;
379 std::vector<unsigned short> robs_status;
387 robs_id.push_back(rob.second.rob_id);
388 robs_size.push_back(rob.second.rob_size);
389 robs_history.push_back(rob.second.rob_history);
390 robs_status.push_back(rob.second.isStatusOk());
394 result &=
tc->setDetail(
"alg_idx", aiToHandleIndex[aiHash]);
395 result &=
tc->setDetail(
"lvl1ID", robData.
lvl1ID);
396 result &=
tc->setDetail<std::vector<uint32_t>>(
"robs_id", robs_id);
397 result &=
tc->setDetail<std::vector<uint32_t>>(
"robs_size", robs_size);
398 result &=
tc->setDetail<std::vector<unsigned>>(
"robs_history", robs_history);
399 result &=
tc->setDetail<std::vector<unsigned short>>(
"robs_status", robs_status);
401 result &=
tc->setDetail(
"stop", robData.
end_time);
403 if (!result)
ATH_MSG_WARNING(
"Failed to append one or more details to trigger cost ROS TC");
407 if (
msg().level() <= MSG::VERBOSE) {
422 return StatusCode::SUCCESS;
433 return StatusCode::SUCCESS;
436 std::unique_lock lockUnique(
m_slotMutex[context.slot()]);
438 tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator beginIt;
439 tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator endIt;
440 tbb::concurrent_hash_map< AlgorithmIdentifier, AlgorithmPayload, AlgorithmIdentifierHashCompare>::const_iterator it;
444 std::map<uint64_t, std::string, std::greater<uint64_t>> timeToAlgMap;
446 for (it = beginIt; it != endIt; ++it) {
451 if (ai.
m_realSlot != context.slot())
continue;
453 uint64_t startTime = ap.m_algStartTime.microsecondsSinceEpoch();
454 uint64_t stopTime = 0;
456 tbb::concurrent_hash_map<AlgorithmIdentifier, TrigTimeStamp, AlgorithmIdentifierHashCompare>::const_accessor stopTimeAcessor;
461 stopTime = stopTimeAcessor->second.microsecondsSinceEpoch();
466 if (stopTime == 0)
continue;
468 timeToAlgMap[stopTime-startTime] = ai.
m_caller;
472 report =
"Timeout detected with the following algorithms consuming the most time: ";
474 for(
const std::pair<const uint64_t, std::string>& p : timeToAlgMap){
476 report += p.second +
" (" + std::to_string(std::lround(p.first/1e3)) +
" ms)";
478 if (algCounter >= 5){
484 return StatusCode::SUCCESS;
492 ATH_MSG_DEBUG(
"All events are monitored - event will not be discarded");
493 return StatusCode::SUCCESS;
499 std::unique_lock lockUnique(
m_slotMutex[ context.slot() ] );
506 return StatusCode::SUCCESS;
514 return StatusCode::FAILURE;
516 return StatusCode::SUCCESS;
524 if (roi)
return static_cast<int32_t
>(roi->
roiId());
544 return static_cast<size_t>( std::hash< std::thread::id >()(thread) );
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_VERBOSE(x)
#define ATH_MSG_WARNING(x)
Maintain a set of objects, one per slot.
virtual void lock()=0
Interface to allow an object to lock itself when made const in SG.
const IRoiDescriptor * roiDescriptor() const
Get cached pointer to View's Region of Interest Descriptor or nullptr if not describing a View.
Describes the API of the Region of Ineterest geometry.
virtual unsigned int roiId() const =0
identifiers
static const std::string hash2string(HLTHash, const std::string &category="TE")
hash function translating identifiers into names (via internal dictionary)
static void hashes2file(const std::string &fileName="hashes2string.txt")
debugging output of internal dictionary
virtual StatusCode processAlg(const EventContext &context, const std::string &caller, const AuditType type) override
Implementation of ITrigCostSvc::processAlg.
Gaudi::Property< bool > m_monitorAllEvents
Gaudi::Property< bool > m_saveHashes
Gaudi::Property< std::string > m_costFinalizeAlgName
std::mutex m_globalMutex
Used to protect all-slot modifications.
TrigCostDataStore< AlgorithmPayload > m_algStartInfo
Thread-safe store of algorithm start payload.
StatusCode checkSlot(const EventContext &context) const
Sanity check that the job is respecting the number of slots which were declared at config time.
TrigCostDataStore< TrigTimeStamp > m_algStopTime
Thread-safe store of algorithm stop times.
Gaudi::Property< bool > m_enableMultiSlot
virtual StatusCode initialize() override
Initialise, create enough storage to store m_eventSlots.
size_t m_eventSlots
Number of concurrent processing slots.
virtual ~TrigCostSvc()
Destructor.
std::unique_ptr< std::shared_mutex[] > m_slotMutex
Used to control and protect whole-table operations.
virtual StatusCode monitorROS(const EventContext &context, robmonitor::ROBDataMonitorStruct payload) override
Implementation of ITrigCostSvc::monitorROS.
virtual bool isMonitoredEvent(const EventContext &context, const bool includeMultiSlot=true) const override
std::unique_ptr< std::atomic< bool >[] > m_eventMonitored
Used to cache if the event in a given slot is being monitored.
virtual StatusCode endEvent(const EventContext &context, SG::WriteHandle< xAOD::TrigCompositeContainer > &costOutputHandle, SG::WriteHandle< xAOD::TrigCompositeContainer > &rosOutputHandle) override
Implementation of ITrigCostSvc::endEvent.
int32_t getROIID(const EventContext &context)
@breif Internal function to return a RoI from an extended event context context
virtual StatusCode discardEvent(const EventContext &context) override
Discard a cost monitored event.
Gaudi::Property< std::string > m_costSupervisorAlgName
TrigCostDataStore< std::vector< robmonitor::ROBDataMonitorStruct > > m_rosData
Thread-safe store of ROS data.
TrigCostSvc(const std::string &name, ISvcLocator *pSvcLocator)
Standard ATLAS Service constructor.
Gaudi::Property< size_t > m_masterSlot
virtual StatusCode finalize() override
Finalize, act on m_saveHashes.
virtual StatusCode generateTimeoutReport(const EventContext &context, std::string &report) override
StatusCode monitor(const EventContext &context, const AlgorithmIdentifier &ai, const TrigTimeStamp &now, const AuditType type)
Internal call to save monitoring data for a given AlgorithmIdentifier.
size_t m_threadCounter
Count how many unique thread ID we have seen.
tbb::concurrent_hash_map< std::thread::id, AlgorithmIdentifier, ThreadHashCompare > m_threadToAlgMap
Keeps track of what is running right now in each thread.
std::unordered_map< uint32_t, uint32_t > m_threadToCounterMap
Map thread's hash ID to a counting numeral.
virtual StatusCode startEvent(const EventContext &context, const bool enableMonitoring=true) override
Implementation of ITrigCostSvc::startEvent.
utility class to measure time duration in AthenaMT The pattern when it is useful: AlgA tags the begin...
The structure which is used to monitor the ROB data request in L2 It is created for every addROBData ...
uint64_t start_time
map of ROBs requested
std::map< const uint32_t, robmonitor::ROBDataStruct > requested_ROBs
name of requesting algorithm
uint64_t end_time
start time of ROB request (microsec since epoch)
const ExtendedEventContext & getExtendedEventContext(const EventContext &ctx)
Retrieve an extended context from a context object.
bool hasExtendedEventContext(const EventContext &ctx)
Test whether a context object has an extended context installed.
size_t getNSlots()
Return the number of event slots.
TrigComposite_v1 TrigComposite
Declare the latest version of the class.
Static hash and equal members as required by tbb::concurrent_hash_map.
static AlgorithmIdentifier make(const EventContext &context, const std::string &caller, MsgStream &msg, const int16_t slotOverride=-1)
Construct an AlgorithmIdentifier.
Small structure to hold an algorithm's name and store, plus some details on its EventView.
std::string m_caller
Name of the algorithm.
std::string m_store
Name of the algorithm's store.
TrigConf::HLTHash callerHash(MsgStream &msg) const
size_t m_slotToSaveInto
The slot which is used for the purposes of recording data on this algorithm's execution.
static constexpr int16_t s_noView
Constant value used to express an Algorithm which is not running in a View.
TrigConf::HLTHash storeHash(MsgStream &msg) const
size_t m_realSlot
The actual slot of the algorithm.
size_t m_hash
Hash of algorithm + store + realSlot.
StatusCode isValid() const
int16_t m_viewID
If not within an event view, then the m_iewID = s_noView = -1.
Small structure wrap the various values stored for an algorithm just before it starts to execute.
static bool equal(const std::thread::id &x, const std::thread::id &y)
static size_t hash(const std::thread::id &thread)