ATLAS Offline Software
D3PDSizeSummary.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration
4 
5 
6 import ROOT
7 import os
8 import sys
9 import re
10 from optparse import OptionParser
11 
12 #ROOT.gROOT.Macro( os.path.expanduser( '~/RootUtils/rootlogon.C' ) )
13 
14 usage = "usage: %prog [options] input_file"
15 parser = OptionParser(usage=usage)
16 parser.add_option ("-s", "--subgroups" , action="store_true" , dest="subgroups" , default=False, help="Include additional subgroups for primary branch groups")
17 parser.add_option ("-T", "--TeX" , action="store_true" , dest="TeX" , default=False, help="Print branch sizes in TeX format")
18 parser.add_option ("-p", "--plots" , action="store_true" , dest="plots" , default=False, help="Create pie plots with fractional fize size for branch groups")
19 parser.add_option ("-v", "--debug" , action="store_true" , dest="debug" , default=False, help="Print verbose output (for debugging)")
20 parser.add_option ("-f", "--file" , action="store" , dest="filename" , default="pie", help="Filename for output pie chart", type="string")
21 parser.add_option ("-e", "--ext" , action="store" , dest="extension" , default="png", help="Filetype for output pie chart", type="string")
22 (options, args) = parser.parse_args()
23 
24 
25 subgroups = {}
26 subgroups['el'] = [['Basic', 'n E Et pt m eta phi px py pz charge author ' +
27  'isEM loose medium mediumIso tight tightIso'],
28  ['PID', 'electronweight electronbgweight ' +
29  'isolationlikelihood.* softeweight ' +
30  'softebgweight neuralnet Hmatrix Hmatrix5 ' +
31  'adaboost softeneuralnet '],
32  ['Conv', '.*conv.* .*Conv.*'],
33  ['Truth', 'type origin truth_.*'],
34  ['Calo', 'Ethad Ethad1 f1 f1core Emins1 fside Emax2 ' +
35  'ws3 wstot emaxs1 deltaEs E233 E237 E277 ' +
36  'weta2 f3 f3core rphiallcalo reta rphi ' +
37  'deltaEmax2 pos7'],
38  ['Iso', 'Etcone.* ptcone.* Etring.* isIso'],
39  ['Track', 'deltaeta1 deltaeta2 deltaphi2 deltaphiRescaled '+
40  'track.* n.*Hits n.*Holes n.*Outliers ' +
41  'vert.* hastrack TRT.*Ratio etacorrmag'],
42  ['Pointing', 'zvertex errz etap depth'],
43  ['Brem', 'brem.* refitted.* hasbrem'],
44  ['Cluster', 'Es[0-3] etas[0-3] phis[0-3] cl_.*'],
45  ['Forward', 'firstEdens cellmaxfrac longitudinal ' +
46  'secondlambda lateral secondR centerlambda'],
47  ['Rawcl', 'rawcl_.*'],
48  ['Jettruth','jet_truth_.*'],
49  ['Jet', 'jet_.*'],
50  ['Trigger', 'L1_.* L2_.* EF.*'],
51  ['*Other', '.*'],
52  ]
53 
54 
56  text_format = '%(name)-10s %(nbr)5s %(nobj)7s %(nobjper)7s %(totsiz)8s %(totsizper)8s %(filesiz)8s %(filesizper)9s %(comp)4s'
57  sg_text_format = text_format.replace ('(name)-', '(name)')
58 
59  tex_format = '%(name)-10s&%(nbr)5s&%(nobj)7s&%(nobjper)7s&%(totsiz)8s&%(totsizper)8s&%(filesiz)8s&%(filesizper)9s&%(comp)4s \\\\'
60  sg_tex_format = tex_format
61 
62  if options.TeX:
63  format = tex_format
64  sg_format = sg_tex_format
65  else:
66  format = text_format
67  sg_format = sg_text_format
68 
69  def __init__ (self, name):
70  self.name = name
71  self.nbr = 0
72  self.totbytes = 0
73  self.filebytes = 0
74  self.totobj = None
75  self.subgroups = {}
76  return
77 
78 
79  def get_subgroup (self, b):
80  sglist = subgroups.get (self.name)
81  if not sglist: return None
82  bname = b.GetName()
83  for (sgname, pats) in sglist:
84  for p in pats.split():
85  pp = self.name + '_' + p + '$'
86  if re.match (pp, bname):
87  if sgname[0] == '*':
88  print '***', bname
89  sgname = sgname[1:]
90  sg = self.subgroups.get (sgname)
91  if not sg:
92  sg = Branchgroup (sgname)
93  self.subgroups[sgname] = sg
94  return sg
95  return None
96 
97 
98  def add (self, b):
99  self.nbr += 1
100  self.totbytes += b.GetTotalSize()
101  self.filebytes += b.GetZipBytes()
102  bname = b.GetName()
103  if bname == self.name + '_n':
104  self.totobj = 0
105  for i in range(b.GetEntries()):
106  b.GetEntry(i)
107  self.totobj += int(b.GetLeaf(bname).GetValue())
108 
109  if options.subgroups:
110  sg = self.get_subgroup (b)
111  if sg: sg.add (b)
112 
113 
114 
115  return
116 
117 
118  def get_dict (self, nev):
119  if self.totobj == None:
120  self.totobj = nev
121 
122  d = {}
123  d['name'] = self.name
124  d['nbr'] = `self.nbr`
125  d['nobj'] = `int(self.totobj)`
126  d['nobjper'] = "%.1f" % (float(self.totobj) / nev)
127  d['totsiz'] = "%dk" % int(float(self.totbytes) / 1024 + 0.5)
128  d['totsizper'] = "%.2fk" % (float(self.totbytes) / 1024 / nev )
129  d['filesiz'] = "%dk" % int(float(self.filebytes) / 1024 + 0.5)
130  d['filesizper'] = "%.2fk" % (float(self.filebytes) / 1024 / nev)
131  d['comp'] = "%.2f" % (float(self.totbytes) / self.filebytes)
132  return d
133 
134 
135  @staticmethod
136  def print_header ():
137  if options.TeX:
138  print '\\begin{tabular}{|l|r|r|r|r|r|r|r|r|}'
139  print '\\hline'
140  d = {}
141  d['name'] = 'Name'
142  d['nbr'] = 'Nbr'
143  d['nobj'] = 'Nobj'
144  d['nobjper'] = 'Nobj/ev'
145  d['totsiz'] = 'Totsz'
146  d['totsizper'] = 'Totsz/ev'
147  d['filesiz'] = 'Filesz'
148  d['filesizper'] = 'Filesz/ev'
149  d['comp'] = 'Comp'
150  print Branchgroup.format % d
151  if options.TeX:
152  print '\\hline'
153  return
154 
155  def print_stats (self, nev):
156  print Branchgroup.format % self.get_dict (nev)
157 
158  sl = self.subgroups.values()
159  sl.sort (lambda a, b: int(b.filebytes - a.filebytes))
160  for sg in sl:
161  print Branchgroup.sg_format % sg.get_dict (nev)
162  return
163 
164  @staticmethod
166  if options.TeX:
167  print '\\hline'
168  print '\\end{tabular}'
169  return
170 
171 
172 class Filestats:
173  def __init__ (self, fname, treename = None):
174  self.fname = fname
175  self.treename = treename
176  self.branchgroups = {}
177  self.get_stats()
178  return
179 
180 
181  def get_treename (self):
182  if self.treename: return
183  for kk in self.rfile.GetListOfKeys():
184  if (kk.GetClassName() == 'TTree' and
185  kk.GetName() != 'CollectionTree'):
186  self.treename = kk.GetName()
187  return
188 
189 
190  def get_branchgroup (self, b):
191  bname = b.GetName()
192  ll = bname.split ('_')
193  if bname in ['RunNumber',
194  'EventNumber',
195  'timestamp',
196  'timestamp_ns',
197  'lbn',
198  'bcid',
199  'detmask0',
200  'detmask1',
201  'collcand']:
202  gname = 'EventInfo'
203  elif len(ll) > 1:
204  gname = ll[0]
205  elif (bname.endswith ('Error') or bname.endswith ('Flags')):
206  gname = 'DetFlags'
207  else:
208  gname = bname
209  group = self.branchgroups.get (gname)
210  if not group:
211  group = Branchgroup (gname)
212  self.branchgroups[gname] = group
213  return group
214 
215 
216  def get_stats (self):
217  self.file_size = os.stat(self.fname).st_size
218  self.rfile = ROOT.TFile.Open (self.fname)
219  self.get_treename()
220  if self.treename == None:
221  raise Exception ("Can't find tree name in file.")
222  self.tree = self.rfile.Get (self.treename)
223  if not self.tree:
224  raise Exception ("Can't find tree " + self.treename + " in file.")
225  self.nev = self.tree.GetEntries()
226  self.scan_branches()
227  return
228 
229 
230  def scan_branches (self):
231  for b in self.tree.GetListOfBranches():
232  bg = self.get_branchgroup (b)
233  bg.add (b)
234  return
235 
236 
237  def print_stats_text (self):
238  print "Number of events:", self.nev
239  print "File size: %dk" % int(float(self.file_size)/1024 + 0.5)
240  print "Overall size/event: %.1fk" % (float(self.file_size) / self.nev / 1024)
241 
242  bl = self.branchgroups.values()
243  bl.sort (lambda a, b: int(b.filebytes - a.filebytes))
244  Branchgroup.print_header()
245  for bg in bl:
246  bg.print_stats (self.nev)
247  Branchgroup.print_trailer()
248  return
249 
250 
251  def draw_pie (self):
252 
253  if options.debug:
254  print "Number of branch groups:", len(self.branchgroups)
255  print "Values of branch groups:", list(self.branchgroups.keys())
256 
257  blist = self.branchgroups.values()
258  blist.sort (lambda a, b: int(b.filebytes - a.filebytes))
259  pie = ROOT.TPie("BranchPie", "", len(self.branchgroups))
260  for i, bgroup in enumerate( blist ):
261  bginfo = bgroup.get_dict(self.nev)
262 
263  if options.debug:
264  print "Branch group #%d, named %s with size %f = %f%%" % (i , bginfo['name'], (float(bgroup.filebytes) / 1024 / self.nev ), (100. * float(bgroup.filebytes) / float(self.file_size)) )
265 
266  pie.SetEntryLabel(i, bginfo['name'])
267  pie.SetEntryVal (i, (100. * float(bgroup.filebytes) / float(self.file_size)) )
268  pie.SetEntryFillColor(i, i+2)
269 
270 
271  c1 = ROOT.TCanvas( 'c1' , 'c1' )
272 
273  pie.SetCircle(.5,.45,.2)
274  pie.SetLabelFormat("#splitline{%txt}{(%perc)}")
275  pie.SetLabelFormat("%txt")
276  pie.SetRadius(0.2)
277  pie.SetTextSize(0.03)
278  pie.SetCircle(0.4808696, 0.7940109, 0.2)
279  pie.SetValueFormat("%4.2f")
280  pie.SetLabelFormat("%txt")
281  pie.SetPercentFormat("%3.1f")
282  pie.SetLabelsOffset(0.005)
283  pie.SetAngularOffset(265.2655)
284  leg = ROOT.TLegend(0.05580866, 0.06554878, 0.785877, 0.4512195)
285  for i, bgroup in enumerate( blist ):
286  if ( (float(bgroup.filebytes) / float(self.file_size)) > 0.05):
287  pie.SetEntryRadiusOffset(i,.07)
288  bginfo = bgroup.get_dict(self.nev)
289  leg.AddEntry( pie.GetSlice(i), "%s : %1.1lf%%" % (bginfo['name'] , 100. * float(bgroup.filebytes) / float(self.file_size)) , "f" )
290  else:
291  pie.SetEntryLabel(i,"")
292 
293  leg.SetFillColor(0);
294  leg.SetFillStyle(0);
295 
296  pie.Draw("3d")
297  leg.Draw();
298 
299  c1.SaveAs(options.filename + "." + options.extension)
300 
301 
302 
303 
304 
305 ff = Filestats (args[0])
306 ff.print_stats_text()
307 if options.plots:
308  ff.draw_pie()
D3PDSizeSummary.Branchgroup.nbr
nbr
Definition: D3PDSizeSummary.py:71
D3PDSizeSummary.Filestats.__init__
def __init__(self, fname, treename=None)
Definition: D3PDSizeSummary.py:173
D3PDSizeSummary.Branchgroup.print_header
def print_header()
Definition: D3PDSizeSummary.py:136
D3PDSizeSummary.Filestats.fname
fname
Definition: D3PDSizeSummary.py:174
D3PDSizeSummary.Filestats.print_stats_text
def print_stats_text(self)
Definition: D3PDSizeSummary.py:237
D3PDSizeSummary.Branchgroup.print_stats
def print_stats(self, nev)
Definition: D3PDSizeSummary.py:155
D3PDSizeSummary.Branchgroup.get_dict
def get_dict(self, nev)
Definition: D3PDSizeSummary.py:118
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
D3PDSizeSummary.Filestats.tree
tree
Definition: D3PDSizeSummary.py:222
D3PDSizeSummary.Branchgroup.print_trailer
def print_trailer()
Definition: D3PDSizeSummary.py:165
D3PDSizeSummary.Filestats.treename
treename
Definition: D3PDSizeSummary.py:175
D3PDSizeSummary.Branchgroup.subgroups
subgroups
Definition: D3PDSizeSummary.py:75
D3PDSizeSummary.Branchgroup.get_subgroup
def get_subgroup(self, b)
Definition: D3PDSizeSummary.py:79
D3PDSizeSummary.Filestats.get_branchgroup
def get_branchgroup(self, b)
Definition: D3PDSizeSummary.py:190
GetEntries
TGraphErrors * GetEntries(TH2F *histo)
Definition: TRTCalib_makeplots.cxx:4019
D3PDSizeSummary.Filestats.file_size
file_size
Definition: D3PDSizeSummary.py:217
D3PDSizeSummary.Branchgroup.totobj
totobj
Definition: D3PDSizeSummary.py:74
python.Bindings.values
values
Definition: Control/AthenaPython/python/Bindings.py:805
D3PDSizeSummary.Branchgroup.totbytes
totbytes
Definition: D3PDSizeSummary.py:72
D3PDSizeSummary.Filestats.get_stats
def get_stats(self)
Definition: D3PDSizeSummary.py:216
D3PDSizeSummary.Branchgroup.filebytes
filebytes
Definition: D3PDSizeSummary.py:73
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
D3PDSizeSummary.Filestats.branchgroups
branchgroups
Definition: D3PDSizeSummary.py:176
D3PDSizeSummary.Filestats
Definition: D3PDSizeSummary.py:172
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
D3PDSizeSummary.Branchgroup.__init__
def __init__(self, name)
Definition: D3PDSizeSummary.py:69
D3PDSizeSummary.Filestats.get_treename
def get_treename(self)
Definition: D3PDSizeSummary.py:181
D3PDSizeSummary.Branchgroup.add
def add(self, b)
Definition: D3PDSizeSummary.py:98
D3PDSizeSummary.Filestats.scan_branches
def scan_branches(self)
Definition: D3PDSizeSummary.py:230
D3PDSizeSummary.Filestats.nev
nev
Definition: D3PDSizeSummary.py:225
D3PDSizeSummary.Branchgroup
Definition: D3PDSizeSummary.py:55
D3PDSizeSummary.Filestats.draw_pie
def draw_pie(self)
Definition: D3PDSizeSummary.py:251
D3PDSizeSummary.Branchgroup.name
name
Definition: D3PDSizeSummary.py:70
D3PDSizeSummary.Filestats.rfile
rfile
Definition: D3PDSizeSummary.py:218
python.Bindings.keys
keys
Definition: Control/AthenaPython/python/Bindings.py:798
readCCLHist.float
float
Definition: readCCLHist.py:83