ATLAS Offline Software
Loading...
Searching...
No Matches
checkTriggerxAOD.py
Go to the documentation of this file.
1#!/usr/bin/env python
2#
3# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
4#
5# This is a modified version of PyUtils/bin/checkFile.py. It has been taught
6# how to sum up the sizes of all the branches belonging to a single xAOD
7# object/container.
8#
9# This is a modified version of checkxAOD.py to provide trigger specific
10# information in an organized way.
11
12__author__ = "Sebastien Binet <binet@cern.ch>, " \
13 "Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>, " \
14 "RD Schaffer R.D.Schaffer@cern.ch" \
15 "Ryan White <ryan.white@cern.ch>"
16
17
18import sys
19import os
20import re
21
22from optparse import OptionParser
23import TrigEDMConfig.TriggerEDM as edm
24
25def categorizeData(categData):
26 categorizedData = []
27 for br in categData.keys():
28 categorizedData += [ categData[ br ] ]
29 pass
30 sorter = PF.PoolRecord.Sorter.DiskSize
31 import operator
32 categorizedData.sort( key = operator.attrgetter( sorter ) )
33 return categorizedData
34
35def compareCategoryData(trigData,recoData):
36 trDict = {'Trig_Egamma':'egamma',
37 'Trig_Muon':'Muon',
38 'Trig_MET':'MET',
39 'Trig_Tau':'tau',
40 'Trig_Bjet':'BTag',
41 'Trig_Jet':'Jet',
42 'Trig_InDet':'InDet'}
43 td = categorizeData(trigData)
44 rd = categorizeData(recoData)
45
46
47 print( "=" * 80 )
48 print( " Categorized data" )
49 print( "=" * 80 )
50 print( " Trigger Offline Trigger/Offline Category Name Offline" )
51 print( "-" * 80 )
52 for d in td:
53 for r in rd:
54 if d.name in trDict:
55 if trDict[d.name] is r.name:
56 dsPerEvt = d.diskSize/poolFile.dataHeader.nEntries
57 rsPerEvt = r.diskSize/poolFile.dataHeader.nEntries
58 frac = dsPerEvt/rsPerEvt
59 print( "%12.3f kb %12.3f kb %12.3f %12s %12s" % ( dsPerEvt, rsPerEvt, frac, d.name, r.name ) )
60
61
62def printCategoryData(categData,diskTotal):
63 categorizedData = categorizeData(categData)
64
65 print( "=" * 80 )
66 print( " Categorized data" )
67 print( "=" * 80 )
68 print( " Disk Size Fraction Category Name" )
69 print( "-" * 80 )
70 totDiskSize = 0.0
71 frac = 0.0
72 ds = []
73 dsFrac = []
74 dsName = []
75 for d in categorizedData:
76 dsPerEvt = d.diskSize / poolFile.dataHeader.nEntries
77 dsPerEvtFrac = d.diskSize / diskTotal
78 totDiskSize += dsPerEvt
79 frac += dsPerEvtFrac
80 ds += [dsPerEvt]
81 dsFrac += [dsPerEvtFrac]
82 dsName += [d.name]
83 print( "%12.3f kb %12.3f %s" % ( dsPerEvt, dsPerEvtFrac, d.name ) )
84 pass
85 print( "%12.3f kb %12.3f %s" % ( totDiskSize , frac, "Total" ) )
86 ds += [totDiskSize]
87 dsFrac += [frac]
88 dsName += ["Total"]
89
90 print( "" )
91 print( "=" * 80 )
92 print( "CSV for categories disk size/evt and fraction:" )
93 # print out comment separated list in descending order
94 print(",".join(dsName[::-1]))
95 b = ['{:<0.3f}'.format(i) for i in ds[::-1]]
96 print(",".join(b))
97 b = ['{:<0.3f}'.format(i) for i in dsFrac[::-1]]
98 print(",".join(b))
99 print( "=" * 80 )
100 print( "" )
101
102def sortTrigData(orderedData,diskTotal):
103 # Sort and print the trigger containers
104 orderTrigData = {}
105 memSize = 0.0
106 diskSize = 0.0
107 for d in orderedData:
108 catName = edm.getCategory(d.name).strip()
109 if(catName == 'NOTFOUND'): continue
110 if catName in orderTrigData.keys():
111 orderTrigData[ catName ].append(d)
112 else:
113 orderTrigData[ catName ] = [d]
114 pass
115 for k in orderTrigData:
116 # Print a header:
117 print( "" )
118 print( "=" * 100 )
119 print(' Trigger EDM group: %s' % (k))
120 print( "=" * 100 )
121 print( PF.PoolOpts.HDR_FORMAT %
122 ( "Mem Size", "Disk Size", "Size/Evt", "Compression",
123 "Items", "Container Name (Type)" ) )
124 print( "-" * 100 )
125 memSize = 0.0
126 diskSize = 0.0
127 for d in orderTrigData[k]:
128 memSize = memSize + d.memSize
129 diskSize = diskSize + d.diskSize
130 print( PF.PoolOpts.ROW_FORMAT %
131 ( d.memSize,
132 d.diskSize,
133 ( d.diskSize / poolFile.dataHeader.nEntries ),
134 ( d.memSize / d.diskSize ),
135 d.nEntries,
136 d.name ) )
137 print( "-" * 100 )
138 print( " %s Total size/evt %12.3f AOD fraction %12.3f" % (k,diskSize/poolFile.dataHeader.nEntries,diskSize/diskTotal) )
139 print( "=" * 100 )
140
141def printAuxDynVars(dynvars):
142
143 for key,items in dynvars.items():
144 print( "=" * 80 )
145 print(key)
146 print( "=" * 80 )
147 for var in items:
148 print('%s'%var)
149
150
151
152if __name__ == "__main__":
153
154 parser = OptionParser( usage = "usage: %prog [-f] my.xAOD.file.pool.root" )
155 p = parser.add_option
156 p( "-f",
157 "--file",
158 dest = "fileName",
159 help = "The path to the POOL file to analyze" )
160 p( "-c",
161 "--csv",
162 dest = "csvFileName",
163 help = "Output CSV file name, to use with spreadsheets" )
164 p( "-v",
165 "--vars",
166 action="store_true",
167 dest = "printVars",
168 default = True,
169 help = "print dynamic variables")
170
171 ( options, args ) = parser.parse_args()
172
173 # Set up categorization matching strings:
174 # Set up categorization matching strings:
175 categoryStrings = {
176 "MetaData" : ["^DataHeader", "(.*)_mems$", "(.*)_timings$", "^Token$", "^RawInfoSummaryForTag$"],
177 "Trig" : ["^HLT", "^LVL1", "^xTrig", "^Trig", "^CTP_Decision", "^TrigInDetTrackTruthMap", "^TrigNavigation", ".*TriggerTowers", "TileTTL1MBTS", "^TileL2Cnt", "RoIBResult","^_TRIGGER","^L1TopoRawData"],
178 "MET" : ["^MET", "^METMAP", "JEMEtSums"],
179 "EvtId" : ["^ByteStreamEventInfo", "^EventInfo", "^McEventInfo", "^LumiBlockN", "^EventWeight", "^RunNumber", "^ConditionsRun", "^EventTime", "^BunchId", "^EventNumber","^IsTestBeam", "^IsSimulation", "^IsCalibration", "^AvgIntPerXing", "^ActualIntPerXing", "^RandomNumber"],
180 "tau" : ["^Tau", "^CombinedStauTrackParticles", "^ExtrapolatedStauTrackParticles","^finalTauPi0s","^DiTauJets"],
181 "PFO" : ["(.*)EventShape$", "^AntiKt4EMPFlowJets", "^JetETMissChargedParticleFlowObjects", "^JetETMissNeutralParticleFlowObjects"],
182 "egamma" : ["^GSF", "^ForwardElectron", "^egamma", "^Electron", "^Photon"],
183 "Muon" : ["^Muon", "^TileMuObj", "^MS", "^SlowMuons", "^Staus", "(.*)MuonTrackParticles$", "MUCTPI_RDO", "^RPC", "^TGC", "^MDT", "^CSC", ".*MuonMeasurements$", "^ExtrapolatedMuonTracks", "^CombinedMuonTracks"],
184 "BTag" : ["^BTag"],
185 "InDet" : ["^InDet", "^PrimaryVertices", "^ComTime_TRT", "^Pixel", "^TRT", "^SCT", "^BCM", "^CTP", "^Tracks", "^ResolvedForwardTracks", "^SplitClusterAmbiguityMap"],
186 "Jet" : ["^CamKt", "^AntiKt", "^Jet","^LCOriginTopoClusters","^EMOriginTopoClusters"],
187 "CaloTopo" : ["CaloCalTopoCluster"],
188 "Calo" : ["^LAr", "^AllCalo", "^AODCellContainer", "^MBTSContainer", "^CaloCompactCellContainer", "^E4prContainer", "^TileCellVec", "^TileDigits"],
189 "Truth" : ["^Truth", "Truth$", "TruthMap$", "TruthCollection$", "^PRD_MultiTruth", "TracksTruth$", ".*TrackTruth$", "TrackTruthCollection"]
190 }
191
192 fileNames = []
193
194 if len( args ) > 0:
195 fileNames = [ arg for arg in args if arg[ 0 ] != "-" ]
196 pass
197
198 if options.fileName is None and len( fileNames ) == 0:
199 str( parser.print_help() or "" )
200 sys.exit( 1 )
201
202 if options.fileName is not None:
203 fileName = os.path.expandvars( os.path.expanduser( options.fileName ) )
204 fileNames.append( fileName )
205 pass
206
207 fileNames = set( fileNames )
208 # Check the consistency with the CSV output:
209 if len( fileNames ) > 1 and options.csvFileName:
210 print( "WARNING CSV output is only available when processing a single "
211 "input file" )
212 pass
213
214 # Loop over the specified file(s):
215 for fileName in fileNames:
216
217 # Open the file:
218 import PyUtils.PoolFile as PF
219 poolFile = PF.PoolFile( fileName )
220
221 # Loop over all the branches of the file, and sum up the information
222 # about them in a smart way...
223 summedData = {}
224 categData = {}
225 categTrigData = {}
226 categTrigDynVars = {}
227 for d in poolFile.data:
228 # Skip metadata/TAG/etc. branches:
229 # if d.dirType != "B": continue
230 # The name of this branch:
231 brName = d.name
232 # Check if this is a static auxiliary store:
233 m = re.match( r"(.*)Aux\..*", d.name )
234 if m:
235 # Yes, it is. And the name of the main object/container is:
236 brName = m.group( 1 )
237 pass
238 # Check if this is a dynamic auxiliary variable:
239 m = re.match( r"(.*)AuxDyn\..*", d.name )
240 if m:
241 # Oh yes, it is. Let's construct the name of the main
242 # object/container:
243 brName = m.group( 1 )
244 if edm.getCategory(d.name) != 'NOTFOUND':
245 if brName in categTrigDynVars:
246 categTrigDynVars[brName].append(d.name)
247 else:
248 categTrigDynVars[brName]=[d.name]
249 pass
250 # Check if we already know this container:
251 if brName in summedData.keys():
252 summedData[ brName ].memSize += d.memSize
253 summedData[ brName ].diskSize += d.diskSize
254 else:
255 summedData[ brName ] = \
256 PF.PoolRecord( brName,
257 d.memSize,
258 d.diskSize,
259 d.memSizeNoZip,
260 d.nEntries,
261 d.dirType )
262
263 pass
264 pass
265
266 # Order the records by size:
267 orderedData = []
268 for br in summedData.keys():
269 orderedData += [ summedData[ br ] ]
270 pass
271 sorter = PF.PoolRecord.Sorter.DiskSize
272 import operator
273 orderedData.sort( key = operator.attrgetter( sorter ) )
274
275 # Access the CollectionTree directly:
276 import ROOT
277 tfile = ROOT.TFile.Open( fileName )
278 ttree = tfile.Get( "CollectionTree" )
279
280 # Now, let's print the event-wise info that we gathered:
281 memSize = 0.0
282 diskSize = 0.0
283 for d in orderedData:
284 # keep branches with either the same number of entries as the number of events, or the
285 # special tlp branches with extra event information
286 mtlp = re.match( "(.*)_tlp.$", d.name ) or re.match( "(.*)DataHeader(.*)", d.name )
287 if d.nEntries != poolFile.dataHeader.nEntries and not mtlp: continue
288 # print d.name
289
290 br = ttree.GetBranch( d.name )
291 d_name = d.name
292 if br:
293 m = re.match( "(.*)_[pv]._", d.name )
294 m1 = re.match( "(.*)_tlp._", d.name )
295 m2 = re.match( "(.*)_v.>_", d.name )
296 m3 = re.match( "([a-zA-Z]+)_(.*_[lL]inks?)", d.name )
297 if m:
298 nameType = "%s (%s)" % ( d.name[m.end():], br.GetClassName() )
299 d_name = d.name[m.end():]
300 elif m1:
301 nameType = "%s (%s)" % ( d.name[m1.end():], br.GetClassName() )
302 d_name = d.name[m1.end():]
303 elif m2:
304 nameType = "%s (%s)" % ( d.name[m2.end():], br.GetClassName() )
305 d_name = d.name[m2.end():]
306 elif m3:
307 nameType = "%s (%s)" % ( m3.group(2), br.GetClassName() )
308 d_name = m3.group(2)
309 else:
310 nameType = "%s (%s)" % ( d.name, br.GetClassName() )
311 else:
312 m = re.match( "(.*)_v._", d.name )
313 m1 = re.match( "(.*)(_tlp.$)", d.name )
314 # print "match",m,m1
315 if m:
316 nameType = "%s (%s)" % ( d.name[m.end():], (d.name[:m.end()-1]) )
317 d_name = d.name[m.end():]
318 elif m1:
319 # print "m1:",m1.group(),m1.group(1)
320 nt = m1.group(1).replace("_",":") + m1.group(2)
321 n = m1.group(1).replace("_",":")
322 nameType = "%s (%s)" % ( n, nt )
323 d_name = n
324 else:
325 nameType = "%s (%s)" % ( d.name, "()" )
326
327 # Find category:
328 found = False
329 catName = '*Unknown*'
330 for categ in categoryStrings:
331 for pattern in categoryStrings[ categ ]:
332 # print d.name, d_name, pair, type(d.name), type(d_name), type(pair[0])
333 m = None
334 try:
335 m = re.match(pattern, d_name)
336 except TypeError:
337 pass
338 if m:
339 found = True
340 catName = categ
341 break
342 # print d.name, categ
343 pass
344 pass
345 if not found:
346 # print "Did not find category for:", d.name, d_name, br
347 pass
348 pass
349 # Add on category to name/type
350 nameType += ' [' + catName + ']'
351
352
353 # Now sum up the sizes according to the category
354 # Check if we already know this category:
355 if catName in categData.keys():
356 categData[ catName ].memSize += d.memSize
357 categData[ catName ].diskSize += d.diskSize
358 else:
359 categData[ catName ] = \
360 PF.PoolRecord( catName,
361 d.memSize,
362 d.diskSize,
363 d.memSizeNoZip,
364 d.nEntries,
365 d.dirType )
366 pass
367 pass
368
369 # Now repeat to add Trigger Categories (from EDMConfig)
370 found = False
371 catName = '*Unknown*'
372 if edm.getCategory(d_name) != 'NOTFOUND':
373 found = True
374 catName = 'Trig_'+edm.getCategory(d_name)
375 nameType += ' [' + catName + ']'
376 if catName in categTrigData.keys():
377 categTrigData[ catName ].memSize += d.memSize
378 categTrigData[ catName ].diskSize += d.diskSize
379 else:
380 categTrigData[ catName ] = \
381 PF.PoolRecord( catName,
382 d.memSize,
383 d.diskSize,
384 d.memSizeNoZip,
385 d.nEntries,
386 d.dirType )
387 pass
388 pass
389
390 # Commment out normal print out
391 #print( PF.PoolOpts.ROW_FORMAT %
392 # ( d.memSize,
393 # d.diskSize,
394 # ( d.diskSize / poolFile.dataHeader.nEntries ),
395 # ( d.memSize / d.diskSize ),
396 # d.nEntries,
397 # nameType ) )
398 memSize = memSize + d.memSize
399 diskSize = diskSize + d.diskSize
400 pass
401 # Now print out the categorized information
402 sortTrigData(orderedData,diskSize)
403 # Order the records by size:
404 printCategoryData(categData,diskSize)
405 print( " Trigger Group Data " )
406 printCategoryData(categTrigData,diskSize)
407 compareCategoryData(categTrigData,categData)
408
409 # Now print the info about the metadata:
410 print( "=" * 80 )
411 print( " Meta data" )
412 print( "=" * 80 )
413 print( " Mem Size Disk Size Container Name" )
414 print( "-" * 80 )
415 memSize = 0.0
416 diskSize = 0.0
417 for d in orderedData:
418 mtlp = re.match( "(.*)_tlp.$", d.name ) or re.match( "(.*)DataHeader(.*)", d.name )
419 if d.nEntries == poolFile.dataHeader.nEntries or mtlp: continue
420 print( "%12.3f kb %12.3f kb %s" %
421 ( d.memSize, d.diskSize, d.name ) )
422 memSize = memSize + d.memSize
423 diskSize = diskSize + d.diskSize
424 pass
425 print( "-" * 80 )
426 print( "%12.3f kb %12.3f kb %s" %
427 ( memSize, diskSize, "Total" ) )
428 print( "=" * 80 )
429 print( "=" * 80 )
430 if options.printVars: printAuxDynVars(categTrigDynVars)
431
432 # Write out a CSV file if one was requested:
433 if options.csvFileName and ( len( fileNames ) == 1 ):
434 # Open the output file:
435 import csv
436 with open( options.csvFileName, "wb" ) as f:
437 writer = csv.writer( f )
438 # Set up the formatting of the file:
439 writer.writerow( [ "Name (Type)", "Size/Evt" ] )
440 # Write all entries to it:
441 for d in orderedData:
442 # Skip metadata items:
443 if d.nEntries != poolFile.dataHeader.nEntries: continue
444 # Construct the name of the entry:
445 nameType = "%s (%s)" % \
446 ( d.name, ttree.GetBranch( d.name ).GetClassName() )
447 # Write the entry:
448 writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
449 pass
450 pass
451 pass
452
453 if len(fileNames) > 1:
454 print()
455 pass # loop over fileNames
456
457 print("## Bye.")
458 sys.exit( 0 )
if(febId1==febId2)
void print(char *figname, TCanvas *c1)
STL class.
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310
sortTrigData(orderedData, diskTotal)
compareCategoryData(trigData, recoData)
printCategoryData(categData, diskTotal)
categorizeData(categData)