ATLAS Offline Software
Control/xAODRootAccess/scripts/checkxAOD.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
4 #
5 # This is a standalone implementation of the xAOD checking script. It just needs
6 # a functional ROOT installation to work.
7 #
8 # @author Attila Krasznahorkay <Attila.Krasznahorkay@cern.ch>
9 #
10 
11 # Import(s):
12 import os, re, operator, ROOT
13 from optparse import OptionParser
14 
15 
21 def main():
22 
23  # Set up the command line option parser:
24  parser = OptionParser( usage = "usage: %prog [-f] xAOD.pool.root" )
25  parser.add_option( "-f", "--file",
26  dest = "fileName",
27  help = "The path to the xAOD file to analyse" )
28  ( options, args ) = parser.parse_args()
29 
30  # Set up categorization matching strings:
31  categoryStrings = {
32  "MetaData" : ["^DataHeader", "(.*)_mems$", "(.*)_timings$", "^Token$", "^RawInfoSummaryForTag$"],
33  "Trig" : ["^HLT", "^LVL1", "^xTrig", "^Trig", "^CTP_Decision", "^TrigInDetTrackTruthMap", "^TrigNavigation", ".*TriggerTowers", "TileTTL1MBTS", "^TileL2Cnt", "RoIBResult"],
34  "MET" : ["^MET", "^METMAP", "JEMEtSums"],
35  "EvtId" : ["^ByteStreamEventInfo", "^EventInfo", "^McEventInfo", "^LumiBlockN", "^EventWeight", "^RunNumber", "^ConditionsRun", "^EventTime", "^BunchId", "^EventNumber"],
36  "tau" : ["^Tau", "^DiTauJets"],
37  "PFO" : ["(.*)EventShape$", "^AntiKt4EMPFlowJets", "^JetETMissChargedParticleFlowObjects", "^JetETMissNeutralParticleFlowObjects", "^JetETMissLCNeutralParticleFlowObjects"],
38  "egamma" : ["^GSF", "^ForwardElectron", "^egamma", "^Electron", "^Photon"],
39  "Muon" : ["^Muon", "^TileMuObj", "^MS", "^SlowMuons", ".*Stau", "(.*)MuonTrackParticles$", "MUCTPI_RDO", "^RPC", "^TGC", "^MDT", "^CSC", "^sTGC", "^MM", ".*MuonMeasurements$", "^ExtrapolatedMuonTracks", "^CombinedMuonTracks"],
40  "BTag" : ["^BTag"],
41  "InDet" : ["^InDet", "^PrimaryVertices", "^ComTime_TRT", "^Pixel", "^TRT", "^SCT", "^BCM", "^CTP", "^Tracks", "^ResolvedForwardTracks", "^SplitClusterAmbiguityMap"],
42  "Jet" : ["^CamKt", "^AntiKt", "^Jet"],
43  "CaloTopo" : ["CaloCalTopoCluster"],
44  "Calo" : ["^LAr", "^AODCellContainer", "^MBTSContainer", "^CaloCompactCellContainer", "^E4prContainer", "^TileCellVec", "^TileDigits"],
45  "Truth" : ["^Truth", "Truth$", "TruthMap$", "TruthCollection$", "^PRD_MultiTruth", "TracksTruth$", ".*TrackTruth$", "TrackTruthCollection"]
46  }
47  # Get the file name(s), taking all options into account:
48  fileNames = []
49  if len( args ) > 0:
50  fileNames = [ arg for arg in args if arg[ 0 ] != "-" ]
51  pass
52  if ( options.fileName == None ) and ( len( fileNames ) == 0 ):
53  parser.print_help()
54  return 1
55  if options.fileName != None:
56  fileNames.append(
57  os.path.expandvars( os.path.expanduser( options.fileName ) ) )
58  pass
59  fileNames = set( fileNames )
60 
61  # Set up ROOT:
62  ROOT.gErrorIgnoreLevel = ROOT.kError
63 
64  # Loop over the files:
65  for fileName in fileNames:
66  # Print their contents using the helper function:
67  printFileInfo( fileName, categoryStrings )
68  pass
69 
70  # Return gracefully:
71  return 0
72 
73 
78 
79  def __init__( self, name, memSize, diskSize, nEntries ):
80  # Call the base class's constructor:
81  object.__init__( self )
82  # Remember the properties:
83  self._name = name
84  self._memSize = memSize
85  self._diskSize = diskSize
86  self._nEntries = nEntries
87  return
88 
89  def name( self ):
90  return self._name
91 
92  def memSize( self ):
93  return float( self._memSize ) / 1024.
94 
95  def diskSize( self ):
96  return float( self._diskSize ) / 1024.
97 
98  def diskSizePerEntry( self ):
99  return self.diskSize() / self._nEntries
100 
101  def compression( self ):
102  return self.memSize() / self.diskSize()
103 
104  def nEntries( self ):
105  return self._nEntries
106 
107 
113 def printFileInfo( fileName, categoryStrings ):
114 
115  # Open the file:
116  f = ROOT.TFile.Open( fileName, "READ" )
117  if not f or f.IsZombie():
118  raise "Couldn't open file %s" % fileName
119 
120  # Get the main event tree from the file:
121  t = f.Get( "CollectionTree" )
122  if not t:
123  raise "Couldn't find 'CollectionTree; in file %s" % fileName
124 
125  # The collected data:
126  infoForCont = {}
127  categData = {}
128 
129  # The entries in the TTree, for cross-checking:
130  entries = t.GetEntries()
131 
132  # Get all the branches of the file:
133  branches = t.GetListOfBranches()
134  for i in range( branches.GetEntries() ):
135  # Get the branch:
136  branch = branches.At( i )
137  # A little security check:
138  if branch.GetEntries() != entries:
139  raise "Found %i entries in branch %s instead of %i" % \
140  ( branch.GetEntries(), branch.GetName(), entries )
141  # "Decode" the name of the branch:
142  brName = branch.GetName()
143  # Check if this is a static auxiliary branch:
144  m = re.match( "(.*)Aux\..*", branch.GetName() )
145  if m:
146  brName = m.group( 1 )
147  pass
148  # Check if this is a dynamic auxiliary branch:
149  m = re.match( "(.*)AuxDyn\..*", branch.GetName() )
150  if m:
151  brName = m.group( 1 )
152  pass
153  # Get the information that we need:
154  if brName in infoForCont.keys():
155  infoForCont[ brName ]._memSize += branch.GetTotBytes( "*" )
156  infoForCont[ brName ]._diskSize += branch.GetZipBytes( "*" )
157  else:
158  infoForCont[ brName ] = ContainerInfo( brName,
159  branch.GetTotBytes( "*" ),
160  branch.GetZipBytes( "*" ),
161  entries )
162  pass
163  pass
164 
165  # Sort the collected info based on the on-disk size of the containers:
166  orderedData = []
167  for cName in infoForCont.keys():
168  orderedData += [ infoForCont[ cName ] ]
169  pass
170  orderedData.sort( key = operator.attrgetter( "_diskSize" ) )
171 
172  # Finally, print the collected information:
173  memSize = 0.0
174  diskSize = 0.0
175  print( "=" * 120 )
176  print( " File: %s" % fileName )
177  print( "-" * 120 )
178  print( " Memory size Disk Size Size/Event Compression "
179  "Entries Name (Type)" )
180  print( "-" * 120 )
181  for d in orderedData:
182  # Try to get the type of the object/container:
183  intBr = t.GetBranch( d.name() )
184  className = "<!!!Interface missing!!!>"
185  if intBr:
186  className = intBr.GetClassName()
187  pass
188  # The name and type of the branch in question:
189  nameType = "%s (%s)" % ( d.name(), className )
190 
191 
192  # Find category:
193  found = False
194  catName = '*Unknown*'
195  # for pair in categoryStrings:
196  for categ in categoryStrings:
197  for pattern in categoryStrings[ categ ]:
198  # print d.name, d_name, pair, type(d.name), type(d_name), type(pair[0])
199  m = None
200  try:
201  m = re.match(pattern, d.name())
202  except TypeError:
203  pass
204  if m:
205  found = True
206  catName = categ
207  break
208  # print d.name, pair[1]
209  pass
210  if not found:
211  # print "Did not find category for:", d.name, d_name, br
212  pass
213  pass
214  # Add on category to name/type
215  nameType += ' [' + catName + ']'
216 
217  # Now sum up the sizes according to the category
218  # Check if we already know this category:
219  if catName in categData.keys():
220  categData[ catName ]._memSize += d._memSize
221  categData[ catName ]._diskSize += d._diskSize
222  else:
223  categData[ catName ] = \
224  ContainerInfo( catName,
225  d._memSize,
226  d._diskSize,
227  d._nEntries )
228  pass
229  pass
230  memSize = memSize + d.memSize()
231  diskSize = diskSize + d.diskSize()
232 
233  print( " %12.2f kB %12.2f kB %7.2f kB/event %5.2f %8i %s" % \
234  ( d.memSize(), d.diskSize(),
235  d.diskSizePerEntry(),
236  d.compression(),
237  d.nEntries(), nameType ) )
238  pass
239  print( "-" * 120 )
240  print( " %12.2f kB %12.2f kB %7.2f kB/event Total" % \
241  ( memSize, diskSize, diskSize/entries ) )
242  print( "=" * 120 )
243  print( "" )
244 
245  # Now print out the categorized information
246  # Order the records by size:
247  categorizedData = []
248  for br in categData.keys():
249  categorizedData += [ categData[ br ] ]
250  pass
251  categorizedData.sort( key = operator.attrgetter( "_diskSize" ) )
252 
253  print( "=" * 80 )
254  print( " Categorized data" )
255  print( "=" * 80 )
256  print( " Disk Size Fraction Category Name" )
257  print( "-" * 80 )
258  totDiskSize = 0.0
259  frac = 0.0
260  ds = []
261  dsFrac = []
262  dsName = []
263  for d in categorizedData:
264  #print d.name(), d.diskSize(), d.nEntries()
265  dsPerEvt = d.diskSize() / d.nEntries()
266  dsPerEvtFrac = d.diskSize() / diskSize
267  totDiskSize += dsPerEvt
268  frac += dsPerEvtFrac
269  ds += [dsPerEvt]
270  dsFrac += [dsPerEvtFrac]
271  dsName += [d.name()]
272  print( "%12.3f kb %12.3f %s" % ( dsPerEvt, dsPerEvtFrac, d.name() ) )
273  pass
274  print( "%12.3f kb %12.3f %s" % ( totDiskSize , frac, "Total" ) )
275  ds += [totDiskSize]
276  dsFrac += [frac]
277  dsName += ["Total"]
278 
279  print( "" )
280  print( "=" * 80 )
281  print( "CSV for categories disk size/evt and fraction:" )
282  # print out comment separated list in descending order
283  print( ",".join(dsName[::-1]))
284  b = ['{:<0.3f}'.format(i) for i in ds[::-1]]
285  print( ",".join(b))
286  b = ['{:<0.3f}'.format(i) for i in dsFrac[::-1]]
287  print( ",".join(b))
288  print( "=" * 80 )
289  print( "" )
290 
291  return
292 
293 # Run the main function in "normal mode":
294 if __name__ == "__main__":
295  import sys
296  sys.exit( main() )
vtune_athena.format
format
Definition: vtune_athena.py:14
checkxAOD.ContainerInfo.compression
def compression(self)
Compression factor for the container.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:101
checkxAOD.ContainerInfo.diskSizePerEntry
def diskSizePerEntry(self)
Average size of the container per event in kilobytes.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:98
checkxAOD.ContainerInfo._diskSize
_diskSize
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:85
checkxAOD.main
def main()
C(++) style main function.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:21
checkxAOD.ContainerInfo.__init__
def __init__(self, name, memSize, diskSize, nEntries)
Constructor.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:79
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
checkxAOD.printFileInfo
def printFileInfo(fileName, categoryStrings)
Function printing information about a single xAOD file.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:113
checkxAOD.ContainerInfo.diskSize
def diskSize(self)
Size of the container on disk in kilobytes.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:95
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
checkxAOD.ContainerInfo._memSize
_memSize
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:84
checkxAOD.ContainerInfo
Class for holding information about a single container.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:77
checkxAOD.ContainerInfo._nEntries
_nEntries
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:86
pickleTool.object
object
Definition: pickleTool.py:30
checkxAOD.ContainerInfo.nEntries
def nEntries(self)
The number of entries saved for this container.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:104
dbg::print
void print(std::FILE *stream, std::format_string< Args... > fmt, Args &&... args)
Definition: SGImplSvc.cxx:70
checkxAOD.ContainerInfo.memSize
def memSize(self)
Size of the container in memory in kilobytes.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:92
checkxAOD.ContainerInfo.name
def name(self)
The name of the container.
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:89
readCCLHist.float
float
Definition: readCCLHist.py:83
checkxAOD.ContainerInfo._name
_name
Definition: Control/xAODRootAccess/scripts/checkxAOD.py:83