11 __author__ =
"Sebastien Binet <binet@cern.ch>, " \
12 "Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>, " \
13 "RD Schaffer R.D.Schaffer@cern.ch"
20 from optparse
import OptionParser
22 if __name__ ==
"__main__":
24 parser = OptionParser( usage =
"usage: %prog [OPTION]... my.xAOD.file.pool.root" )
27 action=
"store_true", dest =
"siUnits",
28 help =
"print sizes in kB, i.e., in units of 1000 bytes"
29 " (default: print sizes in KiB, i.e., in units of 1024 bytes)" )
33 help =
"The path to the POOL file to analyze" )
37 help =
"Output CSV file name, to use with spreadsheets" )
38 ( options, args ) = parser.parse_args()
48 "MetaData" : [
"^DataHeader",
"(.*)_mems$",
"(.*)_timings$",
"^Token$",
"^RawInfoSummaryForTag$",
"^index_ref$"],
49 "Trig" : [
"^HLT",
"^LVL1",
"^L1",
"^xTrig",
"^Trig",
"^CTP_Decision",
"^TrigInDetTrackTruthMap",
"^TrigNavigation",
".*TriggerTowers",
"TileTTL1MBTS",
"^TileL2Cnt",
"RoIBResult",
"^_TRIGGER",
"^L1TopoRawData",
"BunchConfKey"],
50 "MET" : [
"^MET",
"^METMAP",
"JEMEtSums"],
51 "EvtId" : [
"^ByteStreamEventInfo",
"^EventInfo",
"^McEventInfo",
"^LumiBlockN",
"^EventWeight",
"^RunNumber",
"^ConditionsRun",
"^EventTime",
"^BunchId",
"^EventNumber",
"^IsTestBeam",
"^IsSimulation",
"^IsCalibration",
"^AvgIntPerXing",
"^ActualIntPerXing",
"^RandomNumber",
"^McChannel"],
52 "tau" : [
"^Tau",
"^DiTauJets"],
53 "PFO" : [
"(.*)EventShape$",
"^AntiKt4EMPFlowJets",
"^JetETMissChargedParticleFlowObjects",
"^JetETMissNeutralParticleFlowObjects",
"^CHS(.*)ChargedParticleFlowObjects",
"^CHSNeutralParticleFlowObjects",
"^JetETMissLCNeutralParticleFlowObjects",
"^Global(.*)ParticleFlowObjects"],
54 "egamma" : [
"^GSF",
"^ForwardElectron",
"^egamma",
"^Electron(?!.*Ring)",
"^Photon(?!.*Ring)"],
55 "Muon" : [
"^Muon",
"^TileMuObj",
"^MS",
"^SlowMuons",
".*Stau",
"(.*)MuonTrackParticles$",
"MUCTPI_RDO",
"^RPC",
"^TGC",
"^MDT",
"^CSC",
"^sTGC",
"^MM",
".*MuonMeasurements$",
"^ExtrapolatedMuonTracks",
"^CombinedMuonTracks",
"^NCB_MuonSegments",
"^UnAssocMuonSegments",
"^EMEO_Muons",
"^EMEO_MuonSpectrometerTrackParticles",
"^xAODNSWSegments"],
58 "InDet" : [
"^InDet",
"^PrimaryVertices",
"^ComTime_TRT",
"^Pixel",
"^TRT",
"^SCT",
"^BCM",
"^CTP",
"^Tracks",
"^ResolvedForwardTracks",
"^SplitClusterAmbiguityMap",
"^SoftBVrt",
"^BLMHits",
"^FourLeptonVertices"],
60 "ACTS" : [
".*Acts.*"],
61 "Jet" : [
"^CamKt",
"^AntiKt",
"^Jet(?!.*ParticleFlowObjects$)",
"^LCOriginTopoClusters",
"^EMOriginTopoClusters"],
62 "CaloTopo" : [
"CaloCalTopoCluster",
"CaloCalFwdTopoTowers"],
63 "Calo" : [
"^LAr",
"^AllCalo",
"^AODCellContainer",
"^MBTSContainer",
"^CaloCompactCellContainer",
"^CaloEntryLayer",
"^E4prContainer",
"^TileHitVec",
"^TileCellVec",
"^TileDigits",
"^MBTSHits"],
64 "Truth" : [
"^Truth",
"Truth$",
"TruthMap$",
"TruthCollection$",
"^PRD_MultiTruth",
"TracksTruth$",
".*TrackTruth$",
"TrackTruthCollection",
"^HardScatter",
"BornLeptons",
".*ExitLayer$",
".*EntryLayer$"],
66 "LRT" : [
"^LRT",
"(.*)LRT$",
"(.*)LRTTrackParticles$",
"(.*)LargeD0TrackParticles$"],
67 "caloringer" : [
"(.*)Ring"],
68 "AnalysisElectrons" : [
"^AnalysisElectrons" ],
69 "AnalysisTauJets" : [
"^AnalysisTauJets" ],
70 "AnalysisPhotons" : [
"^AnalysisPhotons" ],
71 "AnalysisMuons" : [
"^AnalysisMuons" ],
72 "AnalysisJets" : [
"^AnalysisJets" ],
73 "AnalysisHLT" : [
"^AnalysisHLT" ],
74 "AnalysisTrigMatch" : [
"^AnalysisTrigMatch" ],
75 "AnalysisLargeRJets" : [
"^AnalysisLargeRJets" ],
76 "AnalysisSiHitElectrons" : [
"^AnalysisSiHitElectrons" ],
82 fileNames = [ arg
for arg
in args
if arg[ 0 ] !=
"-" ]
85 if options.fileName
is None and len( fileNames ) == 0:
86 str( parser.print_help()
or "" )
89 if options.fileName
is not None:
90 fileName = os.path.expandvars( os.path.expanduser( options.fileName ) )
91 fileNames.append( fileName )
94 fileNames =
set( fileNames )
97 if len( fileNames ) > 1
and options.csvFileName:
98 print(
"WARNING CSV output is only available when processing a single "
102 import PyUtils.PoolFile
as PF
110 auxvarptn = re.compile(
r"Aux(?:Dyn)?(?:\.|:)" )
112 for fileName
in fileNames:
115 poolFile = PF.PoolFile( fileName )
121 for d
in poolFile.data:
127 m = auxvarptn.search( d.name )
130 brName = d.name[:m.start()]
132 if brName
in summedData.keys():
133 summedData[ brName ].memSize += d.memSize
134 summedData[ brName ].diskSize += d.diskSize
136 summedData[ brName ] = \
137 PF.PoolRecord( brName,
145 if summedData[ brName ].typeName
and \
146 summedData[ brName ].typeName != d.typeName:
147 print(f
"WARNING: Reset typeName {summedData[ brName ].typeName!r}"
148 f
" -> {d.typeName!r} for {brName}", file=sys.stderr)
149 summedData[ brName ].typeName = d.typeName
153 orderedData = [rec
for rec
in summedData.values()]
154 sorter = PF.PoolRecord.Sorter.DiskSize
155 orderedData.sort( key = operator.attrgetter( sorter ) )
160 print(
" Event data" )
162 print(f
'{"Mem Size":^16} {"Disk Size":^16} {"Size/Evt":^16} {"Compression":>12}'
163 f
' {"Items":>8} Container Name (Type) [Category]')
169 for d
in orderedData:
172 mtlp = re.search(
"_tlp.$", d.name )
or "DataHeader" in d.name
173 if d.nEntries != poolFile.dataHeader.nEntries
and not mtlp:
continue
175 colTypeName = d.typeName
177 for ptn
in (
"(?:_[pv]._|_tlp._|_v.>_)(.*)",
"^[a-zA-Z]+_(.*_[lL]inks?)"):
178 m = re.search(ptn, d.name)
183 m = re.search(
"_tlp.$", d.name)
185 d_name = d.name[:m.start()].
replace(
"_",
":")
188 nameType =
"%s (%s)" % (d_name, colTypeName)
190 m = re.search(
"_v._", d.name )
192 d_name = d.name[m.end():]
193 nameType =
"%s (%s)" % ( d_name, (d.name[:m.end()-1]) )
195 m = re.search(
"_tlp.$", d.name)
197 d_name = d.name[:m.start()].
replace(
"_",
":")
198 nameType =
"%s (%s)" % (d_name, d_name + m.group())
201 nameType =
"%s (%s)" % ( d.name,
"()" )
204 for categ
in reversed(categoryStrings.keys()):
205 for pattern
in categoryStrings[ categ ]:
206 if re.match(pattern, d_name.replace(
"Bkg_",
"")):
215 catName =
'*Unknown*'
217 nameType +=
' [' + catName +
']'
221 if catName
in categData.keys():
222 categData[ catName ].memSize += d.memSize
223 categData[ catName ].diskSize += d.diskSize
225 categData[ catName ] = \
226 PF.PoolRecord( catName,
235 print(f
"{d.memSize:12.3f} {sizeUnits:3} {d.diskSize:12.3f} {sizeUnits:3}"
236 f
" {d.diskSize / poolFile.dataHeader.nEntries:12.3f} {sizeUnits:3}"
237 f
" {d.memSize / d.diskSize:12.3f} {d.nEntries:8d} {nameType:s}")
238 memSize = memSize + d.memSize
239 diskSize = diskSize + d.diskSize
242 print(f
"{memSize:12.3f} {sizeUnits:3} {diskSize:12.3f} {sizeUnits:3}"
243 f
" {diskSize / poolFile.dataHeader.nEntries:12.3f} {sizeUnits:3}"
244 f
" {memSize / diskSize:12.3f} {poolFile.dataHeader.nEntries:8d} Total")
249 categorizedData =
list(categData.values())
250 sorter = PF.PoolRecord.Sorter.DiskSize
251 categorizedData.sort( key = operator.attrgetter( sorter ) )
254 print(
" Categorized data" )
256 print(f
'{"Disk Size/Evt":^16} {"Fraction":8} Category Name')
263 for d
in categorizedData:
264 dsPerEvt = d.diskSize / poolFile.dataHeader.nEntries
265 dsPerCatFrac = d.diskSize / diskSize
266 totDiskSize += dsPerEvt
269 dsFrac += [dsPerCatFrac]
271 print(f
"{dsPerEvt:12.3f} {sizeUnits:3} {dsPerCatFrac:8.3f} {d.name:s}")
273 print(f
"{totDiskSize:12.3f} {sizeUnits:3} {frac:8.3f} Total")
280 print(
"CSV for categories disk size/evt and fraction:" )
282 print (
",".
join(reversed(dsName)))
283 b = [
'{:.3f}'.
format(i)
for i
in reversed(ds)]
285 b = [
'{:.3f}'.
format(i)
for i
in reversed(dsFrac)]
292 print(
" Meta data" )
294 print(f
'{"Mem Size":^16} {"Disk Size":^16} Container Name')
300 for d
in orderedData:
301 mtlp = re.search(
"_tlp.$", d.name )
or "DataHeader" in d.name
302 if d.nEntries == poolFile.dataHeader.nEntries
or mtlp:
continue
303 print(f
"{d.memSize:12.3f} {sizeUnits:3} {d.diskSize:12.3f} {sizeUnits:3} {d.name:s}")
304 memSize = memSize + d.memSize
305 diskSize = diskSize + d.diskSize
308 print(f
"{memSize:12.3f} {sizeUnits:3} {diskSize:12.3f} {sizeUnits:3} Total")
312 if options.csvFileName
and ( len( fileNames ) == 1 ):
315 args = {
'newline' :
''}
316 with open( options.csvFileName,
"w", **args )
as f:
317 writer = csv.writer( f )
319 writer.writerow( [
"Name (Type)",
"Size/Evt" ] )
321 for d
in orderedData:
323 if d.nEntries != poolFile.dataHeader.nEntries:
continue
325 colTypeName = d.typeName
326 if not colTypeName:
continue
327 nameType =
"%s (%s)" % \
328 ( d.name, colTypeName )
330 writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
335 if len(fileNames) > 1: