ATLAS Offline Software
Loading...
Searching...
No Matches
Control/xAODRootAccess/scripts/checkxAOD.py
Go to the documentation of this file.
1#!/usr/bin/env python
2#
3# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
4#
5# This is a standalone implementation of the xAOD checking script. It just needs
6# a functional ROOT installation to work.
7#
8# @author Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>
9#
10
11# Import(s):
12import os, re, operator, ROOT
13from optparse import OptionParser
14
15
21def main():
22
23 # Set up the command line option parser:
24 parser = OptionParser( usage = "usage: %prog [-f] xAOD.pool.root" )
25 parser.add_option( "-f", "--file",
26 dest = "fileName",
27 help = "The path to the xAOD file to analyse" )
28 ( options, args ) = parser.parse_args()
29
30 # Set up categorization matching strings:
31 categoryStrings = {
32 "MetaData" : ["^DataHeader", "(.*)_mems$", "(.*)_timings$", "^Token$", "^RawInfoSummaryForTag$"],
33 "Trig" : ["^HLT", "^LVL1", "^xTrig", "^Trig", "^CTP_Decision", "^TrigInDetTrackTruthMap", "^TrigNavigation", ".*TriggerTowers", "TileTTL1MBTS", "^TileL2Cnt", "RoIBResult"],
34 "MET" : ["^MET", "^METMAP", "JEMEtSums"],
35 "EvtId" : ["^ByteStreamEventInfo", "^EventInfo", "^McEventInfo", "^LumiBlockN", "^EventWeight", "^RunNumber", "^ConditionsRun", "^EventTime", "^BunchId", "^EventNumber"],
36 "tau" : ["^Tau", "^DiTauJets"],
37 "PFO" : ["(.*)EventShape$", "^AntiKt4EMPFlowJets", "^JetETMissChargedParticleFlowObjects", "^JetETMissNeutralParticleFlowObjects", "^JetETMissLCNeutralParticleFlowObjects"],
38 "egamma" : ["^GSF", "^ForwardElectron", "^egamma", "^Electron", "^Photon"],
39 "Muon" : ["^Muon", "^TileMuObj", "^MS", "^SlowMuons", ".*Stau", "(.*)MuonTrackParticles$", "MUCTPI_RDO", "^RPC", "^TGC", "^MDT", "^CSC", "^sTGC", "^MM", ".*MuonMeasurements$", "^ExtrapolatedMuonTracks", "^CombinedMuonTracks"],
40 "BTag" : ["^BTag"],
41 "InDet" : ["^InDet", "^PrimaryVertices", "^ComTime_TRT", "^Pixel", "^TRT", "^SCT", "^BCM", "^CTP", "^Tracks", "^ResolvedForwardTracks", "^SplitClusterAmbiguityMap"],
42 "Jet" : ["^CamKt", "^AntiKt", "^Jet"],
43 "CaloTopo" : ["CaloCalTopoCluster"],
44 "Calo" : ["^LAr", "^AODCellContainer", "^MBTSContainer", "^CaloCompactCellContainer", "^E4prContainer", "^TileCellVec", "^TileDigits"],
45 "Truth" : ["^Truth", "Truth$", "TruthMap$", "TruthCollection$", "^PRD_MultiTruth", "TracksTruth$", ".*TrackTruth$", "TrackTruthCollection"]
46 }
47 # Get the file name(s), taking all options into account:
48 fileNames = []
49 if len( args ) > 0:
50 fileNames = [ arg for arg in args if arg[ 0 ] != "-" ]
51 pass
52 if ( options.fileName == None ) and ( len( fileNames ) == 0 ):
53 parser.print_help()
54 return 1
55 if options.fileName != None:
56 fileNames.append(
57 os.path.expandvars( os.path.expanduser( options.fileName ) ) )
58 pass
59 fileNames = set( fileNames )
60
61 # Set up ROOT:
62 ROOT.gErrorIgnoreLevel = ROOT.kError
63
64 # Loop over the files:
65 for fileName in fileNames:
66 # Print their contents using the helper function:
67 printFileInfo( fileName, categoryStrings )
68 pass
69
70 # Return gracefully:
71 return 0
72
73
78
79 def __init__( self, name, memSize, diskSize, nEntries ):
80 # Call the base class's constructor:
81 object.__init__( self )
82 # Remember the properties:
83 self._name = name
84 self._memSize = memSize
85 self._diskSize = diskSize
86 self._nEntries = nEntries
87 return
88
89 def name( self ):
90 return self._name
91
92 def memSize( self ):
93 return float( self._memSize ) / 1024.
94
95 def diskSize( self ):
96 return float( self._diskSize ) / 1024.
97
98 def diskSizePerEntry( self ):
99 return self.diskSize() / self._nEntries
100
101 def compression( self ):
102 return self.memSize() / self.diskSize()
103
104 def nEntries( self ):
105 return self._nEntries
106
107
113def printFileInfo( fileName, categoryStrings ):
114
115 # Open the file:
116 f = ROOT.TFile.Open( fileName, "READ" )
117 if not f or f.IsZombie():
118 raise "Couldn't open file %s" % fileName
119
120 # Get the main event tree from the file:
121 t = f.Get( "CollectionTree" )
122 if not t:
123 raise "Couldn't find 'CollectionTree; in file %s" % fileName
124
125 # The collected data:
126 infoForCont = {}
127 categData = {}
128
129 # The entries in the TTree, for cross-checking:
130 entries = t.GetEntries()
131
132 # Get all the branches of the file:
133 branches = t.GetListOfBranches()
134 for i in range( branches.GetEntries() ):
135 # Get the branch:
136 branch = branches.At( i )
137 # A little security check:
138 if branch.GetEntries() != entries:
139 raise "Found %i entries in branch %s instead of %i" % \
140 ( branch.GetEntries(), branch.GetName(), entries )
141 # "Decode" the name of the branch:
142 brName = branch.GetName()
143 # Check if this is a static auxiliary branch:
144 m = re.match( r"(.*)Aux\..*", branch.GetName() )
145 if m:
146 brName = m.group( 1 )
147 pass
148 # Check if this is a dynamic auxiliary branch:
149 m = re.match( r"(.*)AuxDyn\..*", branch.GetName() )
150 if m:
151 brName = m.group( 1 )
152 pass
153 # Get the information that we need:
154 if brName in infoForCont.keys():
155 infoForCont[ brName ]._memSize += branch.GetTotBytes( "*" )
156 infoForCont[ brName ]._diskSize += branch.GetZipBytes( "*" )
157 else:
158 infoForCont[ brName ] = ContainerInfo( brName,
159 branch.GetTotBytes( "*" ),
160 branch.GetZipBytes( "*" ),
161 entries )
162 pass
163 pass
164
165 # Sort the collected info based on the on-disk size of the containers:
166 orderedData = []
167 for cName in infoForCont.keys():
168 orderedData += [ infoForCont[ cName ] ]
169 pass
170 orderedData.sort( key = operator.attrgetter( "_diskSize" ) )
171
172 # Finally, print the collected information:
173 memSize = 0.0
174 diskSize = 0.0
175 print( "=" * 120 )
176 print( " File: %s" % fileName )
177 print( "-" * 120 )
178 print( " Memory size Disk Size Size/Event Compression "
179 "Entries Name (Type)" )
180 print( "-" * 120 )
181 for d in orderedData:
182 # Try to get the type of the object/container:
183 intBr = t.GetBranch( d.name() )
184 className = "<!!!Interface missing!!!>"
185 if intBr:
186 className = intBr.GetClassName()
187 pass
188 # The name and type of the branch in question:
189 nameType = "%s (%s)" % ( d.name(), className )
190
191
192 # Find category:
193 found = False
194 catName = '*Unknown*'
195 # for pair in categoryStrings:
196 for categ in categoryStrings:
197 for pattern in categoryStrings[ categ ]:
198 # print d.name, d_name, pair, type(d.name), type(d_name), type(pair[0])
199 m = None
200 try:
201 m = re.match(pattern, d.name())
202 except TypeError:
203 pass
204 if m:
205 found = True
206 catName = categ
207 break
208 # print d.name, pair[1]
209 pass
210 if not found:
211 # print "Did not find category for:", d.name, d_name, br
212 pass
213 pass
214 # Add on category to name/type
215 nameType += ' [' + catName + ']'
216
217 # Now sum up the sizes according to the category
218 # Check if we already know this category:
219 if catName in categData.keys():
220 categData[ catName ]._memSize += d._memSize
221 categData[ catName ]._diskSize += d._diskSize
222 else:
223 categData[ catName ] = \
224 ContainerInfo( catName,
225 d._memSize,
226 d._diskSize,
227 d._nEntries )
228 pass
229 pass
230 memSize = memSize + d.memSize()
231 diskSize = diskSize + d.diskSize()
232
233 print( " %12.2f kB %12.2f kB %7.2f kB/event %5.2f %8i %s" % \
234 ( d.memSize(), d.diskSize(),
235 d.diskSizePerEntry(),
236 d.compression(),
237 d.nEntries(), nameType ) )
238 pass
239 print( "-" * 120 )
240 print( " %12.2f kB %12.2f kB %7.2f kB/event Total" % \
241 ( memSize, diskSize, diskSize/entries ) )
242 print( "=" * 120 )
243 print( "" )
244
245 # Now print out the categorized information
246 # Order the records by size:
247 categorizedData = []
248 for br in categData.keys():
249 categorizedData += [ categData[ br ] ]
250 pass
251 categorizedData.sort( key = operator.attrgetter( "_diskSize" ) )
252
253 print( "=" * 80 )
254 print( " Categorized data" )
255 print( "=" * 80 )
256 print( " Disk Size Fraction Category Name" )
257 print( "-" * 80 )
258 totDiskSize = 0.0
259 frac = 0.0
260 ds = []
261 dsFrac = []
262 dsName = []
263 for d in categorizedData:
264 #print d.name(), d.diskSize(), d.nEntries()
265 dsPerEvt = d.diskSize() / d.nEntries()
266 dsPerEvtFrac = d.diskSize() / diskSize
267 totDiskSize += dsPerEvt
268 frac += dsPerEvtFrac
269 ds += [dsPerEvt]
270 dsFrac += [dsPerEvtFrac]
271 dsName += [d.name()]
272 print( "%12.3f kb %12.3f %s" % ( dsPerEvt, dsPerEvtFrac, d.name() ) )
273 pass
274 print( "%12.3f kb %12.3f %s" % ( totDiskSize , frac, "Total" ) )
275 ds += [totDiskSize]
276 dsFrac += [frac]
277 dsName += ["Total"]
278
279 print( "" )
280 print( "=" * 80 )
281 print( "CSV for categories disk size/evt and fraction:" )
282 # print out comment separated list in descending order
283 print( ",".join(dsName[::-1]))
284 b = ['{:<0.3f}'.format(i) for i in ds[::-1]]
285 print( ",".join(b))
286 b = ['{:<0.3f}'.format(i) for i in dsFrac[::-1]]
287 print( ",".join(b))
288 print( "=" * 80 )
289 print( "" )
290
291 return
292
293# Run the main function in "normal mode":
294if __name__ == "__main__":
295 import sys
296 sys.exit( main() )
void print(char *figname, TCanvas *c1)
Class for holding information about a single container.
__init__(self, name, memSize, diskSize, nEntries)
Constructor.
nEntries(self)
The number of entries saved for this container.
memSize(self)
Size of the container in memory in kilobytes.
compression(self)
Compression factor for the container.
diskSizePerEntry(self)
Average size of the container per event in kilobytes.
diskSize(self)
Size of the container on disk in kilobytes.
STL class.
main()
C(++) style main function.
printFileInfo(fileName, categoryStrings)
Function printing information about a single xAOD file.