ATLAS Offline Software
checkTriggerxAOD.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
4 #
5 # This is a modified version of PyUtils/bin/checkFile.py. It has been taught
6 # how to sum up the sizes of all the branches belonging to a single xAOD
7 # object/container.
8 #
9 # This is a modified version of checkxAOD.py to provide trigger specific
10 # information in an organized way.
11 
12 __author__ = "Sebastien Binet <binet@cern.ch>, " \
13  "Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>, " \
14  "RD Schaffer R.D.Schaffer@cern.ch" \
15  "Ryan White <ryan.white@cern.ch>"
16 
17 
18 import sys
19 import os
20 import re
21 
22 from optparse import OptionParser
23 import TrigEDMConfig.TriggerEDM as edm
24 
25 def categorizeData(categData):
26  categorizedData = []
27  for br in categData.keys():
28  categorizedData += [ categData[ br ] ]
29  pass
30  sorter = PF.PoolRecord.Sorter.DiskSize
31  import operator
32  categorizedData.sort( key = operator.attrgetter( sorter ) )
33  return categorizedData
34 
35 def compareCategoryData(trigData,recoData):
36  trDict = {'Trig_Egamma':'egamma',
37  'Trig_Muon':'Muon',
38  'Trig_MET':'MET',
39  'Trig_Tau':'tau',
40  'Trig_Bjet':'BTag',
41  'Trig_Jet':'Jet',
42  'Trig_InDet':'InDet'}
43  td = categorizeData(trigData)
44  rd = categorizeData(recoData)
45 
46 
47  print( "=" * 80 )
48  print( " Categorized data" )
49  print( "=" * 80 )
50  print( " Trigger Offline Trigger/Offline Category Name Offline" )
51  print( "-" * 80 )
52  for d in td:
53  for r in rd:
54  if d.name in trDict:
55  if trDict[d.name] is r.name:
56  dsPerEvt = d.diskSize/poolFile.dataHeader.nEntries
57  rsPerEvt = r.diskSize/poolFile.dataHeader.nEntries
58  frac = dsPerEvt/rsPerEvt
59  print( "%12.3f kb %12.3f kb %12.3f %12s %12s" % ( dsPerEvt, rsPerEvt, frac, d.name, r.name ) )
60 
61 
62 def printCategoryData(categData,diskTotal):
63  categorizedData = categorizeData(categData)
64 
65  print( "=" * 80 )
66  print( " Categorized data" )
67  print( "=" * 80 )
68  print( " Disk Size Fraction Category Name" )
69  print( "-" * 80 )
70  totDiskSize = 0.0
71  frac = 0.0
72  ds = []
73  dsFrac = []
74  dsName = []
75  for d in categorizedData:
76  dsPerEvt = d.diskSize / poolFile.dataHeader.nEntries
77  dsPerEvtFrac = d.diskSize / diskTotal
78  totDiskSize += dsPerEvt
79  frac += dsPerEvtFrac
80  ds += [dsPerEvt]
81  dsFrac += [dsPerEvtFrac]
82  dsName += [d.name]
83  print( "%12.3f kb %12.3f %s" % ( dsPerEvt, dsPerEvtFrac, d.name ) )
84  pass
85  print( "%12.3f kb %12.3f %s" % ( totDiskSize , frac, "Total" ) )
86  ds += [totDiskSize]
87  dsFrac += [frac]
88  dsName += ["Total"]
89 
90  print( "" )
91  print( "=" * 80 )
92  print( "CSV for categories disk size/evt and fraction:" )
93  # print out comment separated list in descending order
94  print(",".join(dsName[::-1]))
95  b = ['{:<0.3f}'.format(i) for i in ds[::-1]]
96  print(",".join(b))
97  b = ['{:<0.3f}'.format(i) for i in dsFrac[::-1]]
98  print(",".join(b))
99  print( "=" * 80 )
100  print( "" )
101 
102 def sortTrigData(orderedData,diskTotal):
103  # Sort and print the trigger containers
104  orderTrigData = {}
105  memSize = 0.0
106  diskSize = 0.0
107  for d in orderedData:
108  catName = edm.getCategory(d.name).strip()
109  if(catName == 'NOTFOUND'): continue
110  if catName in orderTrigData.keys():
111  orderTrigData[ catName ].append(d)
112  else:
113  orderTrigData[ catName ] = [d]
114  pass
115  for k in orderTrigData:
116  # Print a header:
117  print( "" )
118  print( "=" * 100 )
119  print(' Trigger EDM group: %s' % (k))
120  print( "=" * 100 )
121  print( PF.PoolOpts.HDR_FORMAT %
122  ( "Mem Size", "Disk Size", "Size/Evt", "Compression",
123  "Items", "Container Name (Type)" ) )
124  print( "-" * 100 )
125  memSize = 0.0
126  diskSize = 0.0
127  for d in orderTrigData[k]:
128  memSize = memSize + d.memSize
129  diskSize = diskSize + d.diskSize
130  print( PF.PoolOpts.ROW_FORMAT %
131  ( d.memSize,
132  d.diskSize,
133  ( d.diskSize / poolFile.dataHeader.nEntries ),
134  ( d.memSize / d.diskSize ),
135  d.nEntries,
136  d.name ) )
137  print( "-" * 100 )
138  print( " %s Total size/evt %12.3f AOD fraction %12.3f" % (k,diskSize/poolFile.dataHeader.nEntries,diskSize/diskTotal) )
139  print( "=" * 100 )
140 
141 def printAuxDynVars(dynvars):
142 
143  for key,items in dynvars.items():
144  print( "=" * 80 )
145  print(key)
146  print( "=" * 80 )
147  for var in items:
148  print('%s'%var)
149 
150 
151 
152 if __name__ == "__main__":
153 
154  parser = OptionParser( usage = "usage: %prog [-f] my.xAOD.file.pool.root" )
155  p = parser.add_option
156  p( "-f",
157  "--file",
158  dest = "fileName",
159  help = "The path to the POOL file to analyze" )
160  p( "-c",
161  "--csv",
162  dest = "csvFileName",
163  help = "Output CSV file name, to use with spreadsheets" )
164  p( "-v",
165  "--vars",
166  action="store_true",
167  dest = "printVars",
168  default = True,
169  help = "print dynamic variables")
170 
171  ( options, args ) = parser.parse_args()
172 
173  # Set up categorization matching strings:
174  # Set up categorization matching strings:
175  categoryStrings = {
176  "MetaData" : ["^DataHeader", "(.*)_mems$", "(.*)_timings$", "^Token$", "^RawInfoSummaryForTag$"],
177  "Trig" : ["^HLT", "^LVL1", "^xTrig", "^Trig", "^CTP_Decision", "^TrigInDetTrackTruthMap", "^TrigNavigation", ".*TriggerTowers", "TileTTL1MBTS", "^TileL2Cnt", "RoIBResult","^_TRIGGER","^L1TopoRawData"],
178  "MET" : ["^MET", "^METMAP", "JEMEtSums"],
179  "EvtId" : ["^ByteStreamEventInfo", "^EventInfo", "^McEventInfo", "^LumiBlockN", "^EventWeight", "^RunNumber", "^ConditionsRun", "^EventTime", "^BunchId", "^EventNumber","^IsTestBeam", "^IsSimulation", "^IsCalibration", "^AvgIntPerXing", "^ActualIntPerXing", "^RandomNumber"],
180  "tau" : ["^Tau", "^CombinedStauTrackParticles", "^ExtrapolatedStauTrackParticles","^finalTauPi0s","^DiTauJets"],
181  "PFO" : ["(.*)EventShape$", "^AntiKt4EMPFlowJets", "^JetETMissChargedParticleFlowObjects", "^JetETMissNeutralParticleFlowObjects"],
182  "egamma" : ["^GSF", "^ForwardElectron", "^egamma", "^Electron", "^Photon"],
183  "Muon" : ["^Muon", "^TileMuObj", "^MS", "^SlowMuons", "^Staus", "(.*)MuonTrackParticles$", "MUCTPI_RDO", "^RPC", "^TGC", "^MDT", "^CSC", ".*MuonMeasurements$", "^ExtrapolatedMuonTracks", "^CombinedMuonTracks"],
184  "BTag" : ["^BTag"],
185  "InDet" : ["^InDet", "^PrimaryVertices", "^ComTime_TRT", "^Pixel", "^TRT", "^SCT", "^BCM", "^CTP", "^Tracks", "^ResolvedForwardTracks", "^SplitClusterAmbiguityMap"],
186  "Jet" : ["^CamKt", "^AntiKt", "^Jet","^LCOriginTopoClusters","^EMOriginTopoClusters"],
187  "CaloTopo" : ["CaloCalTopoCluster"],
188  "Calo" : ["^LAr", "^AllCalo", "^AODCellContainer", "^MBTSContainer", "^CaloCompactCellContainer", "^E4prContainer", "^TileCellVec", "^TileDigits"],
189  "Truth" : ["^Truth", "Truth$", "TruthMap$", "TruthCollection$", "^PRD_MultiTruth", "TracksTruth$", ".*TrackTruth$", "TrackTruthCollection"]
190  }
191 
192  fileNames = []
193 
194  if len( args ) > 0:
195  fileNames = [ arg for arg in args if arg[ 0 ] != "-" ]
196  pass
197 
198  if options.fileName is None and len( fileNames ) == 0:
199  str( parser.print_help() or "" )
200  sys.exit( 1 )
201 
202  if options.fileName is not None:
203  fileName = os.path.expandvars( os.path.expanduser( options.fileName ) )
204  fileNames.append( fileName )
205  pass
206 
207  fileNames = set( fileNames )
208  # Check the consistency with the CSV output:
209  if len( fileNames ) > 1 and options.csvFileName:
210  print( "WARNING CSV output is only available when processing a single "
211  "input file" )
212  pass
213 
214  # Loop over the specified file(s):
215  for fileName in fileNames:
216 
217  # Open the file:
218  import PyUtils.PoolFile as PF
219  poolFile = PF.PoolFile( fileName )
220 
221  # Loop over all the branches of the file, and sum up the information
222  # about them in a smart way...
223  summedData = {}
224  categData = {}
225  categTrigData = {}
226  categTrigDynVars = {}
227  for d in poolFile.data:
228  # Skip metadata/TAG/etc. branches:
229  # if d.dirType != "B": continue
230  # The name of this branch:
231  brName = d.name
232  # Check if this is a static auxiliary store:
233  m = re.match( r"(.*)Aux\..*", d.name )
234  if m:
235  # Yes, it is. And the name of the main object/container is:
236  brName = m.group( 1 )
237  pass
238  # Check if this is a dynamic auxiliary variable:
239  m = re.match( r"(.*)AuxDyn\..*", d.name )
240  if m:
241  # Oh yes, it is. Let's construct the name of the main
242  # object/container:
243  brName = m.group( 1 )
244  if edm.getCategory(d.name) != 'NOTFOUND':
245  if brName in categTrigDynVars:
246  categTrigDynVars[brName].append(d.name)
247  else:
248  categTrigDynVars[brName]=[d.name]
249  pass
250  # Check if we already know this container:
251  if brName in summedData.keys():
252  summedData[ brName ].memSize += d.memSize
253  summedData[ brName ].diskSize += d.diskSize
254  else:
255  summedData[ brName ] = \
256  PF.PoolRecord( brName,
257  d.memSize,
258  d.diskSize,
259  d.memSizeNoZip,
260  d.nEntries,
261  d.dirType )
262 
263  pass
264  pass
265 
266  # Order the records by size:
267  orderedData = []
268  for br in summedData.keys():
269  orderedData += [ summedData[ br ] ]
270  pass
271  sorter = PF.PoolRecord.Sorter.DiskSize
272  import operator
273  orderedData.sort( key = operator.attrgetter( sorter ) )
274 
275  # Access the CollectionTree directly:
276  import ROOT
277  tfile = ROOT.TFile.Open( fileName )
278  ttree = tfile.Get( "CollectionTree" )
279 
280  # Now, let's print the event-wise info that we gathered:
281  memSize = 0.0
282  diskSize = 0.0
283  for d in orderedData:
284  # keep branches with either the same number of entries as the number of events, or the
285  # special tlp branches with extra event information
286  mtlp = re.match( "(.*)_tlp.$", d.name ) or re.match( "(.*)DataHeader(.*)", d.name )
287  if d.nEntries != poolFile.dataHeader.nEntries and not mtlp: continue
288  # print d.name
289 
290  br = ttree.GetBranch( d.name )
291  d_name = d.name
292  if br:
293  m = re.match( "(.*)_[pv]._", d.name )
294  m1 = re.match( "(.*)_tlp._", d.name )
295  m2 = re.match( "(.*)_v.>_", d.name )
296  m3 = re.match( "([a-zA-Z]+)_(.*_[lL]inks?)", d.name )
297  if m:
298  nameType = "%s (%s)" % ( d.name[m.end():], br.GetClassName() )
299  d_name = d.name[m.end():]
300  elif m1:
301  nameType = "%s (%s)" % ( d.name[m1.end():], br.GetClassName() )
302  d_name = d.name[m1.end():]
303  elif m2:
304  nameType = "%s (%s)" % ( d.name[m2.end():], br.GetClassName() )
305  d_name = d.name[m2.end():]
306  elif m3:
307  nameType = "%s (%s)" % ( m3.group(2), br.GetClassName() )
308  d_name = m3.group(2)
309  else:
310  nameType = "%s (%s)" % ( d.name, br.GetClassName() )
311  else:
312  m = re.match( "(.*)_v._", d.name )
313  m1 = re.match( "(.*)(_tlp.$)", d.name )
314  # print "match",m,m1
315  if m:
316  nameType = "%s (%s)" % ( d.name[m.end():], (d.name[:m.end()-1]) )
317  d_name = d.name[m.end():]
318  elif m1:
319  # print "m1:",m1.group(),m1.group(1)
320  nt = m1.group(1).replace("_",":") + m1.group(2)
321  n = m1.group(1).replace("_",":")
322  nameType = "%s (%s)" % ( n, nt )
323  d_name = n
324  else:
325  nameType = "%s (%s)" % ( d.name, "()" )
326 
327  # Find category:
328  found = False
329  catName = '*Unknown*'
330  for categ in categoryStrings:
331  for pattern in categoryStrings[ categ ]:
332  # print d.name, d_name, pair, type(d.name), type(d_name), type(pair[0])
333  m = None
334  try:
335  m = re.match(pattern, d_name)
336  except TypeError:
337  pass
338  if m:
339  found = True
340  catName = categ
341  break
342  # print d.name, categ
343  pass
344  pass
345  if not found:
346  # print "Did not find category for:", d.name, d_name, br
347  pass
348  pass
349  # Add on category to name/type
350  nameType += ' [' + catName + ']'
351 
352 
353  # Now sum up the sizes according to the category
354  # Check if we already know this category:
355  if catName in categData.keys():
356  categData[ catName ].memSize += d.memSize
357  categData[ catName ].diskSize += d.diskSize
358  else:
359  categData[ catName ] = \
360  PF.PoolRecord( catName,
361  d.memSize,
362  d.diskSize,
363  d.memSizeNoZip,
364  d.nEntries,
365  d.dirType )
366  pass
367  pass
368 
369  # Now repeat to add Trigger Categories (from EDMConfig)
370  found = False
371  catName = '*Unknown*'
372  if edm.getCategory(d_name) != 'NOTFOUND':
373  found = True
374  catName = 'Trig_'+edm.getCategory(d_name)
375  nameType += ' [' + catName + ']'
376  if catName in categTrigData.keys():
377  categTrigData[ catName ].memSize += d.memSize
378  categTrigData[ catName ].diskSize += d.diskSize
379  else:
380  categTrigData[ catName ] = \
381  PF.PoolRecord( catName,
382  d.memSize,
383  d.diskSize,
384  d.memSizeNoZip,
385  d.nEntries,
386  d.dirType )
387  pass
388  pass
389 
390  # Commment out normal print out
391  #print( PF.PoolOpts.ROW_FORMAT %
392  # ( d.memSize,
393  # d.diskSize,
394  # ( d.diskSize / poolFile.dataHeader.nEntries ),
395  # ( d.memSize / d.diskSize ),
396  # d.nEntries,
397  # nameType ) )
398  memSize = memSize + d.memSize
399  diskSize = diskSize + d.diskSize
400  pass
401  # Now print out the categorized information
402  sortTrigData(orderedData,diskSize)
403  # Order the records by size:
404  printCategoryData(categData,diskSize)
405  print( " Trigger Group Data " )
406  printCategoryData(categTrigData,diskSize)
407  compareCategoryData(categTrigData,categData)
408 
409  # Now print the info about the metadata:
410  print( "=" * 80 )
411  print( " Meta data" )
412  print( "=" * 80 )
413  print( " Mem Size Disk Size Container Name" )
414  print( "-" * 80 )
415  memSize = 0.0
416  diskSize = 0.0
417  for d in orderedData:
418  mtlp = re.match( "(.*)_tlp.$", d.name ) or re.match( "(.*)DataHeader(.*)", d.name )
419  if d.nEntries == poolFile.dataHeader.nEntries or mtlp: continue
420  print( "%12.3f kb %12.3f kb %s" %
421  ( d.memSize, d.diskSize, d.name ) )
422  memSize = memSize + d.memSize
423  diskSize = diskSize + d.diskSize
424  pass
425  print( "-" * 80 )
426  print( "%12.3f kb %12.3f kb %s" %
427  ( memSize, diskSize, "Total" ) )
428  print( "=" * 80 )
429  print( "=" * 80 )
430  if options.printVars: printAuxDynVars(categTrigDynVars)
431 
432  # Write out a CSV file if one was requested:
433  if options.csvFileName and ( len( fileNames ) == 1 ):
434  # Open the output file:
435  import csv
436  with open( options.csvFileName, "wb" ) as f:
437  writer = csv.writer( f )
438  # Set up the formatting of the file:
439  writer.writerow( [ "Name (Type)", "Size/Evt" ] )
440  # Write all entries to it:
441  for d in orderedData:
442  # Skip metadata items:
443  if d.nEntries != poolFile.dataHeader.nEntries: continue
444  # Construct the name of the entry:
445  nameType = "%s (%s)" % \
446  ( d.name, ttree.GetBranch( d.name ).GetClassName() )
447  # Write the entry:
448  writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
449  pass
450  pass
451  pass
452 
453  if len(fileNames) > 1:
454  print()
455  pass # loop over fileNames
456 
457  print("## Bye.")
458  sys.exit( 0 )
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
checkTriggerxAOD.printAuxDynVars
def printAuxDynVars(dynvars)
Definition: checkTriggerxAOD.py:141
vtune_athena.format
format
Definition: vtune_athena.py:14
checkTriggerxAOD.p
p
Definition: checkTriggerxAOD.py:155
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
checkTriggerxAOD.sortTrigData
def sortTrigData(orderedData, diskTotal)
Definition: checkTriggerxAOD.py:102
checkTriggerxAOD.compareCategoryData
def compareCategoryData(trigData, recoData)
Definition: checkTriggerxAOD.py:35
checkTriggerxAOD.printCategoryData
def printCategoryData(categData, diskTotal)
Definition: checkTriggerxAOD.py:62
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
checkTriggerxAOD.categorizeData
def categorizeData(categData)
Definition: checkTriggerxAOD.py:25
Trk::open
@ open
Definition: BinningType.h:40
if
if(febId1==febId2)
Definition: LArRodBlockPhysicsV0.cxx:567
str
Definition: BTagTrackIpAccessor.cxx:11
dbg::print
void print(std::FILE *stream, std::format_string< Args... > fmt, Args &&... args)
Definition: SGImplSvc.cxx:70