11 __author__ =
"Sebastien Binet <binet@cern.ch>, " \
12 "Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>, " \
13 "RD Schaffer R.D.Schaffer@cern.ch"
20 from optparse
import OptionParser
22 if __name__ ==
"__main__":
24 parser = OptionParser( usage =
"usage: %prog [-f] my.xAOD.file.pool.root" )
29 help =
"The path to the POOL file to analyze" )
33 help =
"Output CSV file name, to use with spreadsheets" )
34 ( options, args ) = parser.parse_args()
44 "MetaData" : [
"^DataHeader",
"(.*)_mems$",
"(.*)_timings$",
"^Token$",
"^RawInfoSummaryForTag$",
"^index_ref$"],
45 "Trig" : [
"^HLT",
"^LVL1",
"^L1",
"^xTrig",
"^Trig",
"^CTP_Decision",
"^TrigInDetTrackTruthMap",
"^TrigNavigation",
".*TriggerTowers",
"TileTTL1MBTS",
"^TileL2Cnt",
"RoIBResult",
"^_TRIGGER",
"^L1TopoRawData",
"BunchConfKey"],
46 "MET" : [
"^MET",
"^METMAP",
"JEMEtSums"],
47 "EvtId" : [
"^ByteStreamEventInfo",
"^EventInfo",
"^McEventInfo",
"^LumiBlockN",
"^EventWeight",
"^RunNumber",
"^ConditionsRun",
"^EventTime",
"^BunchId",
"^EventNumber",
"^IsTestBeam",
"^IsSimulation",
"^IsCalibration",
"^AvgIntPerXing",
"^ActualIntPerXing",
"^RandomNumber",
"^McChannel"],
48 "tau" : [
"^Tau",
"^DiTauJets"],
49 "PFO" : [
"(.*)EventShape$",
"^AntiKt4EMPFlowJets",
"^JetETMissChargedParticleFlowObjects",
"^JetETMissNeutralParticleFlowObjects",
"^CHS(.*)ChargedParticleFlowObjects",
"^CHSNeutralParticleFlowObjects",
"^JetETMissLCNeutralParticleFlowObjects",
"^Global(.*)ParticleFlowObjects"],
50 "egamma" : [
"^GSF",
"^ForwardElectron",
"^egamma",
"^Electron(?!.*Ring)",
"^Photon(?!.*Ring)"],
51 "Muon" : [
"^Muon",
"^TileMuObj",
"^MS",
"^SlowMuons",
".*Stau",
"(.*)MuonTrackParticles$",
"MUCTPI_RDO",
"^RPC",
"^TGC",
"^MDT",
"^CSC",
"^sTGC",
"^MM",
".*MuonMeasurements$",
"^ExtrapolatedMuonTracks",
"^CombinedMuonTracks",
"^NCB_MuonSegments",
"^UnAssocMuonSegments",
"^EMEO_Muons",
"^EMEO_MuonSpectrometerTrackParticles",
"^xAODNSWSegments"],
54 "InDet" : [
"^InDet",
"^PrimaryVertices",
"^ComTime_TRT",
"^Pixel",
"^TRT",
"^SCT",
"^BCM",
"^CTP",
"^Tracks",
"^ResolvedForwardTracks",
"^SplitClusterAmbiguityMap",
"^SoftBVrt",
"^BLMHits"],
56 "Jet" : [
"^CamKt",
"^AntiKt",
"^Jet(?!.*ParticleFlowObjects$)",
"^LCOriginTopoClusters",
"^EMOriginTopoClusters"],
57 "CaloTopo" : [
"CaloCalTopoCluster",
"CaloCalFwdTopoTowers"],
58 "Calo" : [
"^LAr",
"^AllCalo",
"^AODCellContainer",
"^MBTSContainer",
"^CaloCompactCellContainer",
"^CaloEntryLayer",
"^E4prContainer",
"^TileHitVec",
"^TileCellVec",
"^TileDigits",
"^MBTSHits"],
59 "Truth" : [
"^Truth",
"Truth$",
"TruthMap$",
"TruthCollection$",
"^PRD_MultiTruth",
"TracksTruth$",
".*TrackTruth$",
"TrackTruthCollection",
"^HardScatter",
"BornLeptons",
".*ExitLayer$",
".*EntryLayer$"],
61 "LRT" : [
"^LRT",
"(.*)LRT$",
"(.*)LRTTrackParticles$",
"(.*)LargeD0TrackParticles$"],
62 "caloringer" : [
"(.*)Ring"],
63 "AnalysisElectrons" : [
"^AnalysisElectrons" ],
64 "AnalysisTauJets" : [
"^AnalysisTauJets" ],
65 "AnalysisPhotons" : [
"^AnalysisPhotons" ],
66 "AnalysisMuons" : [
"^AnalysisMuons" ],
67 "AnalysisJets" : [
"^AnalysisJets" ],
68 "AnalysisHLT" : [
"^AnalysisHLT" ],
69 "AnalysisTrigMatch" : [
"^AnalysisTrigMatch" ],
70 "AnalysisLargeRJets" : [
"^AnalysisLargeRJets" ],
76 fileNames = [ arg
for arg
in args
if arg[ 0 ] !=
"-" ]
79 if options.fileName
is None and len( fileNames ) == 0:
80 str( parser.print_help()
or "" )
83 if options.fileName
is not None:
84 fileName = os.path.expandvars( os.path.expanduser( options.fileName ) )
85 fileNames.append( fileName )
88 fileNames =
set( fileNames )
91 if len( fileNames ) > 1
and options.csvFileName:
92 print(
"WARNING CSV output is only available when processing a single "
97 auxvarptn = re.compile(
r"Aux(?:Dyn)?(?:\.|:)" )
99 for fileName
in fileNames:
102 import PyUtils.PoolFile
as PF
103 poolFile = PF.PoolFile( fileName )
109 for d
in poolFile.data:
115 m = auxvarptn.search( d.name )
118 brName = d.name[:m.start()]
120 if brName
in summedData.keys():
121 summedData[ brName ].memSize += d.memSize
122 summedData[ brName ].diskSize += d.diskSize
124 summedData[ brName ] = \
125 PF.PoolRecord( brName,
133 if summedData[ brName ].typeName
and \
134 summedData[ brName ].typeName != d.typeName:
135 print(f
"WARNING: Reset typeName {summedData[ brName ].typeName!r}"
136 f
" -> {d.typeName!r} for {brName}", file=sys.stderr)
137 summedData[ brName ].typeName = d.typeName
141 orderedData = [rec
for rec
in summedData.values()]
142 sorter = PF.PoolRecord.Sorter.DiskSize
143 orderedData.sort( key = operator.attrgetter( sorter ) )
148 print(
" Event data" )
150 print( PF.PoolOpts.HDR_FORMAT %
151 (
"Mem Size",
"Disk Size",
"Size/Evt",
"Compression",
152 "Items",
"Container Name (Type)" ) )
158 for d
in orderedData:
161 mtlp = re.search(
"_tlp.$", d.name )
or "DataHeader" in d.name
162 if d.nEntries != poolFile.dataHeader.nEntries
and not mtlp:
continue
164 colTypeName = d.typeName
166 for ptn
in (
"(?:_[pv]._|_tlp._|_v.>_)(.*)",
"^[a-zA-Z]+_(.*_[lL]inks?)"):
167 m = re.search(ptn, d.name)
172 m = re.search(
"_tlp.$", d.name)
174 d_name = d.name[:m.start()].
replace(
"_",
":")
177 nameType =
"%s (%s)" % (d_name, colTypeName)
179 m = re.search(
"_v._", d.name )
181 d_name = d.name[m.end():]
182 nameType =
"%s (%s)" % ( d_name, (d.name[:m.end()-1]) )
184 m = re.search(
"_tlp.$", d.name)
186 d_name = d.name[:m.start()].
replace(
"_",
":")
187 nameType =
"%s (%s)" % (d_name, d_name + m.group())
190 nameType =
"%s (%s)" % ( d.name,
"()" )
193 for categ
in reversed(categoryStrings.keys()):
194 for pattern
in categoryStrings[ categ ]:
195 if re.match(pattern, d_name.replace(
"Bkg_",
"")):
204 catName =
'*Unknown*'
206 nameType +=
' [' + catName +
']'
210 if catName
in categData.keys():
211 categData[ catName ].memSize += d.memSize
212 categData[ catName ].diskSize += d.diskSize
214 categData[ catName ] = \
215 PF.PoolRecord( catName,
224 print( PF.PoolOpts.ROW_FORMAT %
227 ( d.diskSize / poolFile.dataHeader.nEntries ),
228 ( d.memSize / d.diskSize ),
231 memSize = memSize + d.memSize
232 diskSize = diskSize + d.diskSize
235 print( PF.PoolOpts.ROW_FORMAT %
238 ( diskSize / poolFile.dataHeader.nEntries ),
240 poolFile.dataHeader.nEntries,
246 categorizedData =
list(categData.values())
247 sorter = PF.PoolRecord.Sorter.DiskSize
248 categorizedData.sort( key = operator.attrgetter( sorter ) )
251 print(
" Categorized data" )
253 print(
" Disk Size Fraction Category Name" )
260 for d
in categorizedData:
261 dsPerEvt = d.diskSize / poolFile.dataHeader.nEntries
262 dsPerEvtFrac = d.diskSize / diskSize
263 totDiskSize += dsPerEvt
266 dsFrac += [dsPerEvtFrac]
268 print(
"%12.3f kb %12.3f %s" % ( dsPerEvt, dsPerEvtFrac, d.name ) )
270 print(
"%12.3f kb %12.3f %s" % ( totDiskSize , frac,
"Total" ) )
277 print(
"CSV for categories disk size/evt and fraction:" )
279 print (
",".
join(reversed(dsName)))
280 b = [
'{:.3f}'.
format(i)
for i
in reversed(ds)]
282 b = [
'{:.3f}'.
format(i)
for i
in reversed(dsFrac)]
289 print(
" Meta data" )
291 print(
" Mem Size Disk Size Container Name" )
297 for d
in orderedData:
298 mtlp = re.search(
"_tlp.$", d.name )
or "DataHeader" in d.name
299 if d.nEntries == poolFile.dataHeader.nEntries
or mtlp:
continue
300 print(
"%12.3f kb %12.3f kb %s" %
301 ( d.memSize, d.diskSize, d.name ) )
302 memSize = memSize + d.memSize
303 diskSize = diskSize + d.diskSize
306 print(
"%12.3f kb %12.3f kb %s" %
307 ( memSize, diskSize,
"Total" ) )
311 if options.csvFileName
and ( len( fileNames ) == 1 ):
314 args = {
'newline' :
''}
315 with open( options.csvFileName,
"w", **args )
as f:
316 writer = csv.writer( f )
318 writer.writerow( [
"Name (Type)",
"Size/Evt" ] )
320 for d
in orderedData:
322 if d.nEntries != poolFile.dataHeader.nEntries:
continue
324 colTypeName = d.typeName
325 if not colTypeName:
continue
326 nameType =
"%s (%s)" % \
327 ( d.name, colTypeName )
329 writer.writerow( [ nameType, d.diskSize / d.nEntries ] )
334 if len(fileNames) > 1: