ATLAS Offline Software
CostMetadataUtil.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
4 #
5 
6 '''
7 @file CostMetadataUtil.py
8 @brief Helper functions to create cost metadata json file based on input ntuple
9  and histogram under/overflows
10 '''
11 import xml.etree.ElementTree as ET
12 
13 import datetime
14 
15 from AthenaCommon.Logging import logging
16 log = logging.getLogger('CostAnalysisPostProcessing')
17 
18 
19 def saveMetadata(inputFile, argsMetadata={}, processingWarnings=[], doTRPDetails=False, loglevel=3, maxRanges=5):
20  ''' @brief Save metadata from ntuple to json file
21  '''
22  import json
23 
24  log.level = loglevel
25 
26  metatree = inputFile.Get("metadata")
27  if metatree is None:
28  return None
29 
30  metatree.GetEntry(0)
31  metadata = []
32 
33  metadata.append({'runNumber' : metatree.runNumber})
34  metadata.append({'Details' : argsMetadata["userDetails"]})
35  metadata.append({'JIRA' : argsMetadata["jira"]})
36  metadata.append({'AMITag' : argsMetadata["amiTag"]})
37  if "ProcessedRanges" in metadata:
38  metadata.append({'ProcessedRanges' : str(metatree.ProcessedRanges)})
39 
40  if argsMetadata["amiTag"]:
41  metadata += readHLTConfigKeysFromAMI(argsMetadata["amiTag"])
42  else:
43  metadata += readHLTConfigKeysFromCOOL(metatree.runNumber)
44 
45  if metatree.hostname and argsMetadata["readOKSDetails"]:
46  metadata.append({'OKS configuration' : addOKSDetails(str(metatree.hostname), metatree.runNumber, argsMetadata["partition"])})
47  elif metatree.hostname:
48  metadata.append({'Hostnames' : str(metatree.hostname)})
49 
50  metadata.append({'AtlasCostProcessingProject' : str(metatree.AtlasProject)})
51  metadata.append({'AtlasCostProcessingVersion' : str(metatree.AtlasVersion)})
52 
53  metadata.append({'ChainMonitor' : metatree.ChainMonitor})
54  metadata.append({'AlgorithmMonitor' : metatree.AlgorithmMonitor})
55  metadata.append({'AlgorithmClassMonitor' : metatree.AlgorithmClassMonitor})
56  metadata.append({'ROSMonitor' : metatree.ROSMonitor})
57  metadata.append({'GlobalsMonitor' : metatree.GlobalsMonitor})
58  metadata.append({'ThreadMonitor' : metatree.ThreadMonitor})
59 
60  metadata.append({'AdditionalHashMap' : str(metatree.AdditionalHashMap)})
61  metadata.append({'DoEBWeighting' : metatree.DoEBWeighting})
62  metadata.append({'BaseEventWeight' : metatree.BaseEventWeight})
63 
64  if doTRPDetails:
65  # First run with new physics deadtime item https://gitlab.cern.ch/atlas-tdaq-oks/p1/tdaq-10-00-00/-/commit/31c7c6e6b9f3c796c97cf4a61e76818d7da410df
66  if metatree.runNumber >= 452028:
67  dtItem = "L1_eEM26M--enabled"
68  else:
69  dtItem = "L1_TAU8--enabled"
70  detailsPerLb = readDetailsFromTRP(inputFile, metatree.runNumber, maxRanges, dtItem)
71  if detailsPerLb:
72  for detail in detailsPerLb["Global"]:
73  metadata.append({detail : detailsPerLb["Global"][detail]})
74  metadata.append({"LumiblockDetails" : detailsPerLb["PerLb"]})
75  if metadata[1]['Details']:
76  metadata[1]['Details'] += " "
77  else:
78  metadata[1]['Details'] = ""
79  metadata[1]['Details'] += "Monitored time {4}h: {0} - {1} max <mu> {2} deadtime {3}".format(
80  detailsPerLb["Global"]["DataRangeStart"], detailsPerLb["Global"]["DataRangeEnd"],
81  detailsPerLb["Global"]["GlobalMaxPileup"], detailsPerLb["Global"]["GlobalMeanDeadtime"],
82  detailsPerLb["Global"]["DataRangeDuration"])
83  else:
84  log.error("Reading lumiblock details for TRP failed!")
85 
86  metadata.append({'Histogram under/overflows' : processingWarnings})
87 
88  metadata.append({'HLTMenu' : json.loads(str(metatree.HLTMenu))})
89 
90  with open('metadata.json', 'w') as outMetaFile:
91  metafile = {}
92  metafile['text'] = 'metadata'
93  metafile['children'] = metadata
94  json.dump(obj=metafile, fp=outMetaFile, indent=2, sort_keys=True)
95 
96 
97 def createOverflowSummary(warnings):
98  ''' @brief Create summery of under/overflows based on passed warnings
99  '''
100  histogramStats = {}
101  log.debug("Received %s warnings", len(warnings))
102  for entry in warnings:
103  histFullName = entry.split(" ")[-1]
104  histType = histFullName.split("_")[-2] + "_" + histFullName.split("_")[-1]
105  summary = entry.split(" ")[-1].split("HLT")[0] + "HLT"
106 
107  if "LumiBlock" in summary:
108  # format LumiBlock_000XX_SummaryName...
109  summary = summary.split('_', 1)[1]
110  summary = summary.split('_', 1)[1]
111  elif "All" in summary:
112  # format All_SummaryName...
113  summary = summary.split('_', 1)[1]
114 
115  entryName = summary + "_" + histType
116  if entryName in histogramStats:
117  histogramStats[entryName] += 1
118  else:
119  histogramStats[entryName] = 1
120 
121  histogramStatsStr = []
122  for name, value in histogramStats.items():
123  histogramStatsStr.append("{0}: {1} histograms with over/underflows".format(name, value))
124 
125  return {"Summary": histogramStatsStr}
126 
127 
128 def ignoreUnderflow(histogramName):
129  ''' @brief Filter out the histograms to ignore in underflow check
130  '''
131 
132  # Ignore time histograms for filters
133  if "FStep" in histogramName and "Time" in histogramName:
134  log.debug("Filter %s underflow will be ignored", histogramName)
135  return True
136 
137  return False
138 
139 
140 def addOKSDetails(hostname, runNumber, partition):
141  ''' @brief Retrieve additional run metadata from oks repository
142  '''
143  oksMetadata = []
144 
145  # Clone TDAQ repository
146  oksTag = "r{0}@{1}".format(runNumber, partition)
147  log.info("Cloning tdaq-09-04-00 Tag " + oksTag)
148  import os
149  os.system("git clone https://gitlab.cern.ch/atlas-tdaq-oks/p1/tdaq-09-04-00.git --branch " + oksTag + " --single-branch")
150 
151  # Browse OKS
152  try:
153  if partition == "TDAQ":
154  partitionRoot = ET.parse('tdaq-09-04-00/daq/partitions/TDAQ.data.xml').getroot()
155  hltRoot = ET.parse('tdaq-09-04-00/daq/segments/HLT/HLT-TDAQ.data.xml').getroot()
156  elif partition == "ATLAS":
157  partitionRoot = ET.parse('tdaq-09-04-00/combined/partitions/ATLAS.data.xml').getroot()
158  hltRoot = ET.parse('tdaq-09-04-00/daq/segments/HLT/HLT-internal.data.xml').getroot()
159  except FileNotFoundError as e:
160  log.warning("OKS files not available: {0}".format(e))
161  return []
162 
163 
164  # Read F/T/S
165  racksToComp = dict()
166  if 'pc' in hostname:
167  # Convert computer names to set of racks
168  for computerName in hostname.split(","):
169  rackName = findRackForComputer(computerName)
170  if rackName not in racksToComp:
171  racksToComp[rackName] = list()
172  racksToComp[rackName].append(computerName)
173 
174  hostname = ",".join(racksToComp.keys())
175 
176  for rack in hostname.split(","):
177  hltApplication = findHLTApplication(partitionRoot, hltRoot, rack, partition)
178 
179  metadataDict = [{'Hostname' : rack},
180  {'Forks' : hltRoot.findall("./*[@id='{0}']/*[@name='numForks']".format(hltApplication))[0].get("val")},
181  {'Threads' : hltRoot.findall("./*[@id='{0}']/*[@name='numberOfAthenaMTThreads']".format(hltApplication))[0].get("val")},
182  {'Slots' : hltRoot.findall("./*[@id='{0}']/*[@name='numberOfEventSlots']".format(hltApplication))[0].get("val")}]
183 
184  if rack in racksToComp:
185  metadataDict.append({'Computers' : str(racksToComp[rack])})
186 
187  oksMetadata.append(metadataDict)
188 
189  # Cleanup cloned repository
190  os.system("rm -rf tdaq-09-04-00")
191  return oksMetadata
192 
193 
194 def findHLTApplication(partitionRoot, hltRoot, hostname, partitionName="ATLAS"):
195  ''' @brief Find HLT application based on hostname and disabled segments
196  '''
197  segments = []
198 
199  if hostname:
200  # Find segment based on hostname
201  for segment in hltRoot.findall("./*[@class='TemplateSegment']/*[@name='Racks']/*[@id='{0}'].../...".format(hostname)):
202  segments.append(segment.get("id"))
203  else:
204  # Get available segments
205  segments = []
206  for segment in hltRoot.findall("./*[@class='Segment']/*[@name='Segments']/*[@class='TemplateSegment']"):
207  segments.append(segment.get("id"))
208 
209  log.debug("Found segments {0}".format(segments))
210 
211  # disabled segments
212  for segment in partitionRoot.findall("./*[@class='Partition']/*[@name='Disabled']/*[@class='TemplateSegment']"):
213  if segment.get("id") in segments:
214  segments.remove(segment.get("id"))
215 
216  if len(segments) > 1:
217  log.warning("Found more than one enabled segment, will use {0}".format(segments[0]))
218 
219  return hltRoot.findall("./*[@id='{0}']/*[@name='Applications']/*[@class='HLTRCApplication']".format(segments[0]))[0].get("id")
220 
221 
222 def findRackForComputer(computerName):
223  ''' Find rack for computer name '''
224 
225  import re
226  m = re.search(r'pc-tdq-tpu-(.*?)\.cern\.ch', computerName)
227  if m:
228  computerNum = m.group(1)
229  rackNumber = computerNum[0:2]
230  return "tpu-rack-{0}".format(rackNumber)
231 
232  log.warning("Cannot retrieve rack number from {0}".format(computerName))
233  return ""
234 
235 
237  '''
238  Returns list of config keys read from COOL:
239  DB - database alias
240  Release - release
241  SMK - Super Master Key
242  HLTPSK - HLT Prescale keys
243  LVL1PSK - L1 Prescale keys
244  '''
245 
246  configMetadata = []
247 
248  from TrigConfStorage.TriggerCoolUtil import TriggerCoolUtil
249  dbconn = TriggerCoolUtil.GetConnection("CONDBR2")
250  configKeys = TriggerCoolUtil.getHLTConfigKeys(dbconn, [[runNumber, runNumber]])
251 
252  if configKeys and runNumber in configKeys.keys():
253  configKeys = configKeys[runNumber]
254 
255  configMetadata.append({'DB' : configKeys['DB']})
256  configMetadata.append({'Release' : configKeys['REL']})
257  configMetadata.append({'SMK' : configKeys['SMK']})
258 
259  configMetadata.append({'HLTPSK' : str(TriggerCoolUtil.getHLTPrescaleKeys(dbconn, [[runNumber, runNumber]])[runNumber]['HLTPSK2'])})
260  configMetadata.append({'LVL1PSK' : str(TriggerCoolUtil.getL1ConfigKeys(dbconn, [[runNumber, runNumber]])[runNumber]['LVL1PSK'])})
261 
262  else:
263  log.warning("Config keys not found in COOL")
264 
265  return configMetadata
266 
267 
268 
270  '''
271  Returns list of config keys read from AMI Tag:
272  DB - database alias
273  Release - release
274  SMK - Super Master Key
275  HLTPSK - HLT Prescale keys
276  LVL1PSK - L1 Prescale keys
277  '''
278 
279  configMetadata = []
280 
281  try:
282  import pyAMI.client
283  import pyAMI.atlas.api as AtlasAPI
284  except ModuleNotFoundError:
285  log.warning("Unable to import AMIClient from pyAMI. Maybe you didn't do localSetupPyAMI?")
286  return configMetadata
287 
288  amiclient = pyAMI.client.Client('atlas')
289  AtlasAPI.init()
290 
291  command = [ 'AMIGetAMITagInfo', '-amiTag="%s"' % amiTag, '-cached' ]
292  amiTagInfo = amiclient.execute(command, format = 'dict_object').get_rows('amiTagInfo')[0]
293 
294  configMetadata.append({'Release' : amiTagInfo['SWReleaseCache']})
295  configMetadata.append({'SMK' : amiTagInfo['DBsmkey'] if "DBsmkey" in amiTagInfo else None})
296  configMetadata.append({'DB' : amiTagInfo['DBserver'] if "DBserver" in amiTagInfo else None})
297  configMetadata.append({'HLTPSK' : amiTagInfo['DBhltpskey'] if "DBhltpskey" in amiTagInfo else None})
298  configMetadata.append({'LVL1PSK' : amiTagInfo['DBl1pskey'] if "DBl1pskey" in amiTagInfo else None})
299 
300  return configMetadata
301 
302 
303 def readDetailsFromTRP(inputFile, runNumber, maxRanges, itemName="L1_eEM26M--enabled", server="https://atlasop.cern.ch"):
304  log.info("Reading run details from TRP")
305 
306  import ROOT
307 
308  lumiBlockDict = {} # Mapping of range to lumiblocks in the range
309 
310  for timeRange in inputFile.GetListOfKeys():
311  rangeObj = timeRange.ReadObj()
312  if not rangeObj.IsA().InheritsFrom(ROOT.TDirectory.Class()): continue # Skip metadata TTree
313  rangeName = rangeObj.GetName()
314 
315  for table in rangeObj.GetListOfKeys():
316  tableObj = table.ReadObj()
317  if "Global" not in tableObj.GetName(): continue # Find Global summary
318 
319  dirKey = set(key.ReadObj().GetName() for key in tableObj.GetListOfKeys() if key.ReadObj().GetName().startswith('LumiBlock'))
320  lumiBlockDict[rangeName] = sorted(dirKey)
321 
322  if not lumiBlockDict:
323  log.error("No lumiblocks were found in the input file")
324  return {}
325 
326  # Read start and stop timestamps for lumiblock ranges
327  from DQUtils.sugar import RunLumi
328  from time import ctime
329  from PyCool import cool
330  from TrigConfStorage.TriggerCoolUtil import TriggerCoolUtil
331  dbconn = TriggerCoolUtil.GetConnection("CONDBR2")
332 
333  lbRangeTsDict = {} # Timestamps for lumiblock range
334 
335  f = dbconn.getFolder( "/TRIGGER/LUMI/LBLB" )
336  for lbRange in lumiBlockDict:
337  startLb = int(min(lumiBlockDict[lbRange]).replace('LumiBlock_', ''))
338  endLb = int(max(lumiBlockDict[lbRange]).replace('LumiBlock_', ''))
339  log.debug("For range {0} first lumiblock is {1} and last {2}".format(lbRange, startLb, endLb))
340 
341  since = RunLumi(runNumber, startLb)
342  until = RunLumi(runNumber, endLb)
343 
344  objs = f.browseObjects(since, until, cool.ChannelSelection(0))
345  objs.goToNext()
346  objCurrRef = objs.currentRef()
347  startTime = int(objCurrRef.payload()["StartTime"]/1000)
348 
349  while objs.goToNext():
350  objCurrRef = objs.currentRef()
351 
352  endTime = int(objCurrRef.payload()["EndTime"]/1000)
353 
354  lbRangeTsDict[lbRange] = {"start": startTime, "end" : endTime}
355 
356  log.debug("Read start and end of range {0} from COOL: {1} - {2}".format(lbRange, ctime(startTime/1E6).replace(' ','_'), ctime(endTime/1E6).replace(' ','_')))
357 
358 
359  # Read details from PBeast
360  lbRangeDetailsDict = {}
361  physicsDeadtimeGlobal = []
362  pileupGlobal = []
363  try:
364  import libpbeastpy
365  pbeast = libpbeastpy.ServerProxy(server)
366 
367  for lbRange in lbRangeTsDict:
368  lbStart = lbRangeTsDict[lbRange]["start"]
369  lbEnd = lbRangeTsDict[lbRange]["end"]
370 
371  # Deadtime
372  physicsDeadtimeTRP = pbeast.get_data('ATLAS', 'L1_Rate', 'DT', 'ISS_TRP.' + itemName, False, lbStart, lbEnd, 0, True)
373 
374  if len(physicsDeadtimeTRP) == 0:
375  log.error("Deadtime not found for item {0} for range {1}".format(itemName, lbRange))
376  physicsDeadtimeAvg = -1
377  else:
378  physicsDeadtimeTRP = physicsDeadtimeTRP[0].data['ISS_TRP.' + itemName]
379  physicsDeadtimeArray = []
380  for entry in physicsDeadtimeTRP:
381  # Read only values between timestamps - pbeast returns one timestamp earlier and one later
382  if entry.ts < lbStart or entry.ts > lbEnd:
383  continue
384  if type(entry.value) is not float: # None type
385  continue
386 
387  physicsDeadtimeArray.append(entry.value)
388  physicsDeadtimeGlobal.append(entry.value)
389 
390  physicsDeadtimeAvg = sum(physicsDeadtimeArray)/len(physicsDeadtimeArray) if len(physicsDeadtimeArray) > 0 else 1.
391 
392  # Pileup
393  pileupPbeast = pbeast.get_data('OLC', 'OCLumi', 'Mu', 'OLC.OLCApp/ATLAS_PREFERRED_LBAv_PHYS', False, lbStart, lbEnd)[0].data['OLC.OLCApp/ATLAS_PREFERRED_LBAv_PHYS']
394  pileupArr = []
395  for entry in pileupPbeast:
396  if entry.ts < lbStart or entry.ts > lbEnd:
397  continue
398  if type(entry.value) is not float: # None type
399  continue
400 
401  pileupArr.append(entry.value)
402  pileupGlobal.append(entry.value)
403 
404  pileupAvg = sum(pileupArr)/len(pileupArr) if len(pileupArr) > 0 else -1
405  lbRangeDetailsDict[lbRange] = {"avgPileup" : round(pileupAvg, 3), "minPileup" : round(min(pileupArr), 3),
406  "maxPileup" : round(max(pileupArr), 3), "deadtime" : round(physicsDeadtimeAvg, 3)}
407 
408  except ImportError as e:
409  log.error("The pbeast python library was not found! Remember to setup tdaq release!")
410  log.debug(e)
411  return {}
412  except RuntimeError as e:
413  if "Sign in to your account" in str(e):
414  log.error("PBeast authentication failed! Remember to export pbeast server sso: export PBEAST_SERVER_SSO_SETUP_TYPE=AutoUpdateKerberos")
415  elif "cannot create CERN SSO cookie" in str(e):
416  log.error("PBeast authentication requires the cookies, please setup")
417  else:
418  log.error("Error when reading from Pbeast! ")
419  log.debug(e)
420  return {}
421 
422  log.debug("The final lumiblock dictionary is {0}".format(lbRangeDetailsDict))
423 
424  physicsDeadtimeGlobalAvg = sum(physicsDeadtimeGlobal)/len(physicsDeadtimeGlobal) if len(physicsDeadtimeGlobal) > 0 else 1.
425  pileupGlobalAvg = sum(pileupGlobal)/len(pileupGlobal) if len(pileupGlobal) > 0 else 1.
426 
427  startTs = lbRangeTsDict[min(lbRangeTsDict.keys())]["start"]/1E6
428  endTs = lbRangeTsDict[max(lbRangeTsDict.keys())]["end"]/1E6
429  monitoredTime = datetime.timedelta(seconds=(int(endTs - startTs)))
430  additionalDetails = {
431  "DataRangeStart" : ctime(startTs),
432  "DataRangeEnd" : ctime(endTs),
433  "DataRangeDuration" : "{0}:{1}".format(int(monitoredTime.total_seconds()//3600), int((monitoredTime.total_seconds()%3600)//60)),
434  "GlobalMeanPileup" : round(pileupGlobalAvg, 3),
435  "GlobalMinPileup" : round(min(pileupGlobal), 3),
436  "GlobalMaxPileup" : round(max(pileupGlobal), 3),
437  "GlobalMeanDeadtime" : round(physicsDeadtimeGlobalAvg, 3)
438  }
439 
440  return {"Global" : additionalDetails, "PerLb" : lbRangeDetailsDict}
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
python.CostMetadataUtil.ignoreUnderflow
def ignoreUnderflow(histogramName)
Definition: CostMetadataUtil.py:128
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
MuonGM::round
float round(const float toRound, const unsigned int decimals)
Definition: Mdt.cxx:27
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.sugar.runlumi.RunLumi
RunLumi
Definition: runlumi.py:131
python.CostMetadataUtil.readHLTConfigKeysFromCOOL
def readHLTConfigKeysFromCOOL(runNumber)
Definition: CostMetadataUtil.py:236
convertTimingResiduals.sum
sum
Definition: convertTimingResiduals.py:55
python.CostMetadataUtil.addOKSDetails
def addOKSDetails(hostname, runNumber, partition)
Definition: CostMetadataUtil.py:140
python.CostMetadataUtil.findHLTApplication
def findHLTApplication(partitionRoot, hltRoot, hostname, partitionName="ATLAS")
Definition: CostMetadataUtil.py:194
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
python.CostMetadataUtil.findRackForComputer
def findRackForComputer(computerName)
Definition: CostMetadataUtil.py:222
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.CostMetadataUtil.readDetailsFromTRP
def readDetailsFromTRP(inputFile, runNumber, maxRanges, itemName="L1_eEM26M--enabled", server="https://atlasop.cern.ch")
Definition: CostMetadataUtil.py:303
python.CostMetadataUtil.saveMetadata
def saveMetadata(inputFile, argsMetadata={}, processingWarnings=[], doTRPDetails=False, loglevel=3, maxRanges=5)
Definition: CostMetadataUtil.py:19
Trk::open
@ open
Definition: BinningType.h:40
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
get
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition: hcg.cxx:127
python.CostMetadataUtil.readHLTConfigKeysFromAMI
def readHLTConfigKeysFromAMI(amiTag)
Definition: CostMetadataUtil.py:269
str
Definition: BTagTrackIpAccessor.cxx:11
python.CostMetadataUtil.createOverflowSummary
def createOverflowSummary(warnings)
Definition: CostMetadataUtil.py:97
Trk::split
@ split
Definition: LayerMaterialProperties.h:38