11 __author__ =
"Sebastien Binet <binet@cern.ch>, " \
12 "Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>, " \
13 "RD Schaffer R.D.Schaffer@cern.ch"
20 from optparse
import OptionParser
22 if __name__ ==
"__main__":
24 parser = OptionParser( usage =
"usage: %prog [-f] my.xAOD.file.pool.root" )
29 help =
"The path to the POOL file to analyze" )
33 help =
"Output CSV file name, to use with spreadsheets" )
34 ( options, args ) = parser.parse_args()
44 "MetaData" : [
"^DataHeader",
"(.*)_mems$",
"(.*)_timings$",
"^Token$",
"^RawInfoSummaryForTag$",
"^index_ref$"],
45 "Trig" : [
"^HLT",
"^LVL1",
"^L1",
"^xTrig",
"^Trig",
"^CTP_Decision",
"^TrigInDetTrackTruthMap",
"^TrigNavigation",
".*TriggerTowers",
"TileTTL1MBTS",
"^TileL2Cnt",
"RoIBResult",
"^_TRIGGER",
"^L1TopoRawData",
"BunchConfKey"],
46 "MET" : [
"^MET",
"^METMAP",
"JEMEtSums"],
47 "EvtId" : [
"^ByteStreamEventInfo",
"^EventInfo",
"^McEventInfo",
"^LumiBlockN",
"^EventWeight",
"^RunNumber",
"^ConditionsRun",
"^EventTime",
"^BunchId",
"^EventNumber",
"^IsTestBeam",
"^IsSimulation",
"^IsCalibration",
"^AvgIntPerXing",
"^ActualIntPerXing",
"^RandomNumber",
"^McChannel"],
48 "tau" : [
"^Tau",
"^DiTauJets"],
49 "PFO" : [
"(.*)EventShape$",
"^AntiKt4EMPFlowJets",
"^JetETMissChargedParticleFlowObjects",
"^JetETMissNeutralParticleFlowObjects",
"^CHS(.*)ChargedParticleFlowObjects",
"^CHSNeutralParticleFlowObjects",
"^JetETMissLCNeutralParticleFlowObjects",
"^Global(.*)ParticleFlowObjects"],
50 "egamma" : [
"^GSF",
"^ForwardElectron",
"^egamma",
"^Electron(?!.*Ring)",
"^Photon(?!.*Ring)"],
51 "Muon" : [
"^Muon",
"^TileMuObj",
"^MS",
"^SlowMuons",
".*Stau",
"(.*)MuonTrackParticles$",
"MUCTPI_RDO",
"^RPC",
"^TGC",
"^MDT",
"^CSC",
"^sTGC",
"^MM",
".*MuonMeasurements$",
"^ExtrapolatedMuonTracks",
"^CombinedMuonTracks",
"^NCB_MuonSegments",
"^UnAssocMuonSegments",
"^EMEO_Muons",
"^EMEO_MuonSpectrometerTrackParticles",
"^xAODNSWSegments"],
54 "InDet" : [
"^InDet",
"^PrimaryVertices",
"^ComTime_TRT",
"^Pixel",
"^TRT",
"^SCT",
"^BCM",
"^CTP",
"^Tracks",
"^ResolvedForwardTracks",
"^SplitClusterAmbiguityMap",
"^SoftBVrt",
"^BLMHits",
"^FourLeptonVertices"],
56 "Jet" : [
"^CamKt",
"^AntiKt",
"^Jet(?!.*ParticleFlowObjects$)",
"^LCOriginTopoClusters",
"^EMOriginTopoClusters"],
57 "CaloTopo" : [
"CaloCalTopoCluster",
"CaloCalFwdTopoTowers"],
58 "Calo" : [
"^LAr",
"^AllCalo",
"^AODCellContainer",
"^MBTSContainer",
"^CaloCompactCellContainer",
"^CaloEntryLayer",
"^E4prContainer",
"^TileHitVec",
"^TileCellVec",
"^TileDigits",
"^MBTSHits"],
59 "Truth" : [
"^Truth",
"Truth$",
"TruthMap$",
"TruthCollection$",
"^PRD_MultiTruth",
"TracksTruth$",
".*TrackTruth$",
"TrackTruthCollection",
"^HardScatter",
"BornLeptons",
".*ExitLayer$",
".*EntryLayer$"],
61 "LRT" : [
"^LRT",
"(.*)LRT$",
"(.*)LRTTrackParticles$",
"(.*)LargeD0TrackParticles$"],
62 "caloringer" : [
"(.*)Ring"],
63 "AnalysisElectrons" : [
"^AnalysisElectrons" ],
64 "AnalysisTauJets" : [
"^AnalysisTauJets" ],
65 "AnalysisPhotons" : [
"^AnalysisPhotons" ],
66 "AnalysisMuons" : [
"^AnalysisMuons" ],
67 "AnalysisJets" : [
"^AnalysisJets" ],
68 "AnalysisHLT" : [
"^AnalysisHLT" ],
69 "AnalysisTrigMatch" : [
"^AnalysisTrigMatch" ],
70 "AnalysisLargeRJets" : [
"^AnalysisLargeRJets" ],
71 "AnalysisSiHitElectrons" : [
"^AnalysisSiHitElectrons" ],
77 fileNames = [ arg
for arg
in args
if arg[ 0 ] !=
"-" ]
80 if options.fileName
is None and len( fileNames ) == 0:
81 str( parser.print_help()
or "" )
84 if options.fileName
is not None:
85 fileName = os.path.expandvars( os.path.expanduser( options.fileName ) )
86 fileNames.append( fileName )
89 fileNames =
set( fileNames )
92 if len( fileNames ) > 1
and options.csvFileName:
93 print(
"WARNING CSV output is only available when processing a single "
98 auxvarptn = re.compile(
r"Aux(?:Dyn)?(?:\.|:)" )
100 for fileName
in fileNames:
103 import PyUtils.PoolFile
as PF
104 poolFile = PF.PoolFile( fileName )
110 for d
in poolFile.data:
116 m = auxvarptn.search( d.name )
119 brName = d.name[:m.start()]
121 if brName
in summedData.keys():
122 summedData[ brName ].memSize += d.memSize
123 summedData[ brName ].diskSize += d.diskSize
125 summedData[ brName ] = \
126 PF.PoolRecord( brName,
134 if summedData[ brName ].typeName
and \
135 summedData[ brName ].typeName != d.typeName:
136 print(f
"WARNING: Reset typeName {summedData[ brName ].typeName!r}"
137 f
" -> {d.typeName!r} for {brName}", file=sys.stderr)
138 summedData[ brName ].typeName = d.typeName
142 orderedData = [rec
for rec
in summedData.values()]
143 sorter = PF.PoolRecord.Sorter.DiskSize
144 orderedData.sort( key = operator.attrgetter( sorter ) )
149 print(
" Event data" )
151 print( PF.PoolOpts.HDR_FORMAT %
152 (
"Mem Size",
"Disk Size",
"Size/Evt",
"Compression",
153 "Items",
"Container Name (Type)" ) )
159 for d
in orderedData:
162 mtlp = re.search(
"_tlp.$", d.name )
or "DataHeader" in d.name
163 if d.nEntries != poolFile.dataHeader.nEntries
and not mtlp:
continue
165 colTypeName = d.typeName
167 for ptn
in (
"(?:_[pv]._|_tlp._|_v.>_)(.*)",
"^[a-zA-Z]+_(.*_[lL]inks?)"):
168 m = re.search(ptn, d.name)
173 m = re.search(
"_tlp.$", d.name)
175 d_name = d.name[:m.start()].
replace(
"_",
":")
178 nameType =
"%s (%s)" % (d_name, colTypeName)
180 m = re.search(
"_v._", d.name )
182 d_name = d.name[m.end():]
183 nameType =
"%s (%s)" % ( d_name, (d.name[:m.end()-1]) )
185 m = re.search(
"_tlp.$", d.name)
187 d_name = d.name[:m.start()].
replace(
"_",
":")
188 nameType =
"%s (%s)" % (d_name, d_name + m.group())
191 nameType =
"%s (%s)" % ( d.name,
"()" )
194 for categ
in reversed(categoryStrings.keys()):
195 for pattern
in categoryStrings[ categ ]:
196 if re.match(pattern, d_name.replace(
"Bkg_",
"")):
205 catName =
'*Unknown*'
207 nameType +=
' [' + catName +
']'
211 if catName
in categData.keys():
212 categData[ catName ].memSize += d.memSize
213 categData[ catName ].diskSize += d.diskSize
215 categData[ catName ] = \
216 PF.PoolRecord( catName,
225 print( PF.PoolOpts.ROW_FORMAT %
228 ( d.diskSize / poolFile.dataHeader.nEntries ),
229 ( d.memSize / d.diskSize ),
232 memSize = memSize + d.memSize
233 diskSize = diskSize + d.diskSize
236 print( PF.PoolOpts.ROW_FORMAT %
239 ( diskSize / poolFile.dataHeader.nEntries ),
241 poolFile.dataHeader.nEntries,
247 categorizedData =
list(categData.values())
248 sorter = PF.PoolRecord.Sorter.DiskSize
249 categorizedData.sort( key = operator.attrgetter( sorter ) )
252 print(
" Categorized data" )
254 print(
" Disk Size Fraction Category Name" )
261 for d
in categorizedData:
262 dsPerEvt = d.diskSize / poolFile.dataHeader.nEntries
263 dsPerEvtFrac = d.diskSize / diskSize
264 totDiskSize += dsPerEvt
267 dsFrac += [dsPerEvtFrac]
269 print(
"%12.3f kb %12.3f %s" % ( dsPerEvt, dsPerEvtFrac, d.name ) )
271 print(
"%12.3f kb %12.3f %s" % ( totDiskSize , frac,
"Total" ) )
278 print(
"CSV for categories disk size/evt and fraction:" )
280 print (
",".
join(reversed(dsName)))
281 b = [
'{:.3f}'.
format(i)
for i
in reversed(ds)]
283 b = [
'{:.3f}'.
format(i)
for i
in reversed(dsFrac)]
290 print(
" Meta data" )
292 print(
" Mem Size Disk Size Container Name" )
298 for d
in orderedData:
299 mtlp = re.search(
"_tlp.$", d.name )
or "DataHeader" in d.name
300 if d.nEntries == poolFile.dataHeader.nEntries
or mtlp:
continue
301 print(
"%12.3f kb %12.3f kb %s" %
302 ( d.memSize, d.diskSize, d.name ) )
303 memSize = memSize + d.memSize
304 diskSize = diskSize + d.diskSize
307 print(
"%12.3f kb %12.3f kb %s" %
308 ( memSize, diskSize,
"Total" ) )
312 if options.csvFileName
and ( len( fileNames ) == 1 ):
315 args = {
'newline' :
''}
316 with open( options.csvFileName,
"w", **args )
as f:
317 writer = csv.writer( f )
319 writer.writerow( [
"Name (Type)",
"Size/Evt" ] )
321 for d
in orderedData:
323 if d.nEntries != poolFile.dataHeader.nEntries:
continue
325 colTypeName = d.typeName
326 if not colTypeName:
continue
327 nameType =
"%s (%s)" % \
328 ( d.name, colTypeName )
330 writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
335 if len(fileNames) > 1: