12 __author__ =
"Sebastien Binet <binet@cern.ch>, " \
13 "Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>, " \
14 "RD Schaffer R.D.Schaffer@cern.ch" \
15 "Ryan White <ryan.white@cern.ch>"
22 from optparse
import OptionParser
23 import TrigEDMConfig.TriggerEDM
as edm
27 for br
in categData.keys():
28 categorizedData += [ categData[ br ] ]
30 sorter = PF.PoolRecord.Sorter.DiskSize
32 categorizedData.sort( key = operator.attrgetter( sorter ) )
33 return categorizedData
36 trDict = {
'Trig_Egamma':
'egamma',
48 print(
" Categorized data" )
50 print(
" Trigger Offline Trigger/Offline Category Name Offline" )
55 if trDict[d.name]
is r.name:
56 dsPerEvt = d.diskSize/poolFile.dataHeader.nEntries
57 rsPerEvt = r.diskSize/poolFile.dataHeader.nEntries
58 frac = dsPerEvt/rsPerEvt
59 print(
"%12.3f kb %12.3f kb %12.3f %12s %12s" % ( dsPerEvt, rsPerEvt, frac, d.name, r.name ) )
66 print(
" Categorized data" )
68 print(
" Disk Size Fraction Category Name" )
75 for d
in categorizedData:
76 dsPerEvt = d.diskSize / poolFile.dataHeader.nEntries
77 dsPerEvtFrac = d.diskSize / diskTotal
78 totDiskSize += dsPerEvt
81 dsFrac += [dsPerEvtFrac]
83 print(
"%12.3f kb %12.3f %s" % ( dsPerEvt, dsPerEvtFrac, d.name ) )
85 print(
"%12.3f kb %12.3f %s" % ( totDiskSize , frac,
"Total" ) )
92 print(
"CSV for categories disk size/evt and fraction:" )
95 b = [
'{:<0.3f}'.
format(i)
for i
in ds[::-1]]
97 b = [
'{:<0.3f}'.
format(i)
for i
in dsFrac[::-1]]
107 for d
in orderedData:
108 catName = edm.getCategory(d.name).strip()
109 if(catName ==
'NOTFOUND'):
continue
110 if catName
in orderTrigData.keys():
111 orderTrigData[ catName ].
append(d)
113 orderTrigData[ catName ] = [d]
115 for k
in orderTrigData:
119 print(
' Trigger EDM group: %s' % (k))
121 print( PF.PoolOpts.HDR_FORMAT %
122 (
"Mem Size",
"Disk Size",
"Size/Evt",
"Compression",
123 "Items",
"Container Name (Type)" ) )
127 for d
in orderTrigData[k]:
128 memSize = memSize + d.memSize
129 diskSize = diskSize + d.diskSize
130 print( PF.PoolOpts.ROW_FORMAT %
133 ( d.diskSize / poolFile.dataHeader.nEntries ),
134 ( d.memSize / d.diskSize ),
138 print(
" %s Total size/evt %12.3f AOD fraction %12.3f" % (k,diskSize/poolFile.dataHeader.nEntries,diskSize/diskTotal) )
143 for key,items
in dynvars.items():
152 if __name__ ==
"__main__":
154 parser = OptionParser( usage =
"usage: %prog [-f] my.xAOD.file.pool.root" )
155 p = parser.add_option
159 help =
"The path to the POOL file to analyze" )
162 dest =
"csvFileName",
163 help =
"Output CSV file name, to use with spreadsheets" )
169 help =
"print dynamic variables")
171 ( options, args ) = parser.parse_args()
176 "MetaData" : [
"^DataHeader",
"(.*)_mems$",
"(.*)_timings$",
"^Token$",
"^RawInfoSummaryForTag$"],
177 "Trig" : [
"^HLT",
"^LVL1",
"^xTrig",
"^Trig",
"^CTP_Decision",
"^TrigInDetTrackTruthMap",
"^TrigNavigation",
".*TriggerTowers",
"TileTTL1MBTS",
"^TileL2Cnt",
"RoIBResult",
"^_TRIGGER",
"^L1TopoRawData"],
178 "MET" : [
"^MET",
"^METMAP",
"JEMEtSums"],
179 "EvtId" : [
"^ByteStreamEventInfo",
"^EventInfo",
"^McEventInfo",
"^LumiBlockN",
"^EventWeight",
"^RunNumber",
"^ConditionsRun",
"^EventTime",
"^BunchId",
"^EventNumber",
"^IsTestBeam",
"^IsSimulation",
"^IsCalibration",
"^AvgIntPerXing",
"^ActualIntPerXing",
"^RandomNumber"],
180 "tau" : [
"^Tau",
"^CombinedStauTrackParticles",
"^ExtrapolatedStauTrackParticles",
"^finalTauPi0s",
"^DiTauJets"],
181 "PFO" : [
"(.*)EventShape$",
"^AntiKt4EMPFlowJets",
"^JetETMissChargedParticleFlowObjects",
"^JetETMissNeutralParticleFlowObjects"],
182 "egamma" : [
"^GSF",
"^ForwardElectron",
"^egamma",
"^Electron",
"^Photon"],
183 "Muon" : [
"^Muon",
"^TileMuObj",
"^MS",
"^SlowMuons",
"^Staus",
"(.*)MuonTrackParticles$",
"MUCTPI_RDO",
"^RPC",
"^TGC",
"^MDT",
"^CSC",
".*MuonMeasurements$",
"^ExtrapolatedMuonTracks",
"^CombinedMuonTracks"],
185 "InDet" : [
"^InDet",
"^PrimaryVertices",
"^ComTime_TRT",
"^Pixel",
"^TRT",
"^SCT",
"^BCM",
"^CTP",
"^Tracks",
"^ResolvedForwardTracks",
"^SplitClusterAmbiguityMap"],
186 "Jet" : [
"^CamKt",
"^AntiKt",
"^Jet",
"^LCOriginTopoClusters",
"^EMOriginTopoClusters"],
187 "CaloTopo" : [
"CaloCalTopoCluster"],
188 "Calo" : [
"^LAr",
"^AllCalo",
"^AODCellContainer",
"^MBTSContainer",
"^CaloCompactCellContainer",
"^E4prContainer",
"^TileCellVec",
"^TileDigits"],
189 "Truth" : [
"^Truth",
"Truth$",
"TruthMap$",
"TruthCollection$",
"^PRD_MultiTruth",
"TracksTruth$",
".*TrackTruth$",
"TrackTruthCollection"]
195 fileNames = [ arg
for arg
in args
if arg[ 0 ] !=
"-" ]
198 if options.fileName
is None and len( fileNames ) == 0:
199 str( parser.print_help()
or "" )
202 if options.fileName
is not None:
203 fileName = os.path.expandvars( os.path.expanduser( options.fileName ) )
204 fileNames.append( fileName )
207 fileNames =
set( fileNames )
209 if len( fileNames ) > 1
and options.csvFileName:
210 print(
"WARNING CSV output is only available when processing a single "
215 for fileName
in fileNames:
218 import PyUtils.PoolFile
as PF
219 poolFile = PF.PoolFile( fileName )
226 categTrigDynVars = {}
227 for d
in poolFile.data:
233 m = re.match(
r"(.*)Aux\..*", d.name )
236 brName = m.group( 1 )
239 m = re.match(
r"(.*)AuxDyn\..*", d.name )
243 brName = m.group( 1 )
244 if edm.getCategory(d.name) !=
'NOTFOUND':
245 if brName
in categTrigDynVars:
246 categTrigDynVars[brName].
append(d.name)
248 categTrigDynVars[brName]=[d.name]
251 if brName
in summedData.keys():
252 summedData[ brName ].memSize += d.memSize
253 summedData[ brName ].diskSize += d.diskSize
255 summedData[ brName ] = \
256 PF.PoolRecord( brName,
268 for br
in summedData.keys():
269 orderedData += [ summedData[ br ] ]
271 sorter = PF.PoolRecord.Sorter.DiskSize
273 orderedData.sort( key = operator.attrgetter( sorter ) )
277 tfile = ROOT.TFile.Open( fileName )
278 ttree = tfile.Get(
"CollectionTree" )
283 for d
in orderedData:
286 mtlp = re.match(
"(.*)_tlp.$", d.name )
or re.match(
"(.*)DataHeader(.*)", d.name )
287 if d.nEntries != poolFile.dataHeader.nEntries
and not mtlp:
continue
290 br = ttree.GetBranch( d.name )
293 m = re.match(
"(.*)_[pv]._", d.name )
294 m1 = re.match(
"(.*)_tlp._", d.name )
295 m2 = re.match(
"(.*)_v.>_", d.name )
296 m3 = re.match(
"([a-zA-Z]+)_(.*_[lL]inks?)", d.name )
298 nameType =
"%s (%s)" % ( d.name[m.end():], br.GetClassName() )
299 d_name = d.name[m.end():]
301 nameType =
"%s (%s)" % ( d.name[m1.end():], br.GetClassName() )
302 d_name = d.name[m1.end():]
304 nameType =
"%s (%s)" % ( d.name[m2.end():], br.GetClassName() )
305 d_name = d.name[m2.end():]
307 nameType =
"%s (%s)" % ( m3.group(2), br.GetClassName() )
310 nameType =
"%s (%s)" % ( d.name, br.GetClassName() )
312 m = re.match(
"(.*)_v._", d.name )
313 m1 = re.match(
"(.*)(_tlp.$)", d.name )
316 nameType =
"%s (%s)" % ( d.name[m.end():], (d.name[:m.end()-1]) )
317 d_name = d.name[m.end():]
320 nt = m1.group(1).
replace(
"_",
":") + m1.group(2)
322 nameType =
"%s (%s)" % ( n, nt )
325 nameType =
"%s (%s)" % ( d.name,
"()" )
329 catName =
'*Unknown*'
330 for categ
in categoryStrings:
331 for pattern
in categoryStrings[ categ ]:
335 m = re.match(pattern, d_name)
350 nameType +=
' [' + catName +
']'
355 if catName
in categData.keys():
356 categData[ catName ].memSize += d.memSize
357 categData[ catName ].diskSize += d.diskSize
359 categData[ catName ] = \
360 PF.PoolRecord( catName,
371 catName =
'*Unknown*'
372 if edm.getCategory(d_name) !=
'NOTFOUND':
374 catName =
'Trig_'+edm.getCategory(d_name)
375 nameType +=
' [' + catName +
']'
376 if catName
in categTrigData.keys():
377 categTrigData[ catName ].memSize += d.memSize
378 categTrigData[ catName ].diskSize += d.diskSize
380 categTrigData[ catName ] = \
381 PF.PoolRecord( catName,
398 memSize = memSize + d.memSize
399 diskSize = diskSize + d.diskSize
405 print(
" Trigger Group Data " )
411 print(
" Meta data" )
413 print(
" Mem Size Disk Size Container Name" )
417 for d
in orderedData:
418 mtlp = re.match(
"(.*)_tlp.$", d.name )
or re.match(
"(.*)DataHeader(.*)", d.name )
419 if d.nEntries == poolFile.dataHeader.nEntries
or mtlp:
continue
420 print(
"%12.3f kb %12.3f kb %s" %
421 ( d.memSize, d.diskSize, d.name ) )
422 memSize = memSize + d.memSize
423 diskSize = diskSize + d.diskSize
426 print(
"%12.3f kb %12.3f kb %s" %
427 ( memSize, diskSize,
"Total" ) )
433 if options.csvFileName
and ( len( fileNames ) == 1 ):
436 with open( options.csvFileName,
"wb" )
as f:
437 writer = csv.writer( f )
439 writer.writerow( [
"Name (Type)",
"Size/Evt" ] )
441 for d
in orderedData:
443 if d.nEntries != poolFile.dataHeader.nEntries:
continue
445 nameType =
"%s (%s)" % \
446 ( d.name, ttree.GetBranch( d.name ).GetClassName() )
448 writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
453 if len(fileNames) > 1: