ATLAS Offline Software
hist_file_dump.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 
4 import ROOT
5 import sys, os
6 import argparse
7 import zlib
8 import json
9 from PyUtils.fprint import _formatFloat
10 from typing import List, Tuple, Callable
11 
12 def fixprecision(x, precision=15):
13  import math
14  if not isinstance(x, float):
15  return x
16  else:
17  mantissa, exponent = math.frexp(x)
18  sm = '%.12g' % mantissa
19  return _formatFloat (float(sm[:precision]) * 2**exponent)
20 
21 def jsonfixup(instr, fuzzyarray=False):
22  if not isinstance(instr, str):
23  instr = instr.Data()
24  j=json.loads(instr)
25  # the following are very subject to floating point numeric effects
26  # are doubles, keep 15 decimal digits of precision
27  for badkey in ('fTsumw', 'fTsumwx', 'fTsumw2', 'fTsumwx2', 'fTsumwy', 'fTsumwy2', 'fTsumwxy',
28  'fTsumwz', 'fTsumwz2', 'fTsumwxz', 'fTsumwyz' ):
29  if badkey in j:
30  j[badkey] = fixprecision(float(j[badkey]))
31  #print(type(j["fTsumwx"]))
32  # member, digits of precision
33  arrkeys: List[Tuple[str,int,Callable]] = [('fSumw2', 15, float)]
34  if fuzzyarray:
35  arrkeys += [('fBinEntries', 15, float), ('fBinSumw2', 15, float)]
36  # apply different precision for fArray depending on object type
37  if '_typename' in j:
38  if j['_typename'] in ('TH1F', 'TH2F', 'TH3F'):
39  arrkeys.append(('fArray', 6, float))
40  elif j['_typename'] in ('TH1C', 'TH1I', 'TH1S', 'TH2C', 'TH2I', 'TH2S',
41  'TH3C', 'TH3I', 'TH3S'):
42  # precision here isn't relevant, *should* be an integer
43  arrkeys.append(('fArray', 15, lambda x: x))
44  else:
45  arrkeys.append(('fArray', 15, float))
46  else:
47  arrkeys.append(('fArray', 15, float))
48  for badkey, precision, func in arrkeys:
49  if badkey in j:
50  j[badkey] = [fixprecision(func(_), precision) for _ in j[badkey]]
51  # the following ignores small layout fluctuations in TTrees
52  if 'fBranches' in j:
53  for branch in j['fBranches']['arr']:
54  branch['fBasketSeek'] = []
55  # The formatting of the opt array changed in newer ROOT versions
56  # to be filled with instances of None rather than empty strings.
57  if 'fXaxis' in j and 'fLabels' in j['fXaxis'] and j['fXaxis']['fLabels'] is not None and 'opt' in j['fXaxis']['fLabels']:
58  opt = j['fXaxis']['fLabels']['opt']
59  for i in range(len(opt)):
60  if opt[i] is None: opt[i] = ''
61  return json.dumps(j, sort_keys=True)
62 
63 parser=argparse.ArgumentParser()
64 parser.add_argument('filename',
65  help='Input HIST file name')
66 parser.add_argument('-r', '--rankorder', default='onfile',
67  choices=['onfile', 'uncompressed', 'name'],
68  help='rankorder is "onfile" (default), "uncompressed" or "name"')
69 parser.add_argument('-p', '--path',
70  help='Only look under this directory')
71 parser.add_argument('--hash', action='store_true',
72  help='Print hashes of objects')
73 parser.add_argument('--metadata', action='store_true',
74  help='Include metadata trees')
75 parser.add_argument('--no_onfile', action='store_true',
76  help="Don't show on file size")
77 parser.add_argument('--no_inmem', action='store_true',
78  help="Don't show in memory size")
79 parser.add_argument('--tree_entries', action='store_true',
80  help="Use more robust hash of TTree branches + entries")
81 parser.add_argument('--fuzzy_histbins', action='store_true',
82  help="Allow small variations in histogram bin content")
83 args=parser.parse_args()
84 
85 ordering = args.rankorder
86 
87 accounting = {}; hashes = {}; types = {}
88 
89 ROOT.gInterpreter.LoadText("UInt_t bufferhash(TKey* key) { key->SetBuffer(); key->ReadFile(); UInt_t rv = TString::Hash(key->GetBuffer()+key->GetKeylen(), key->GetNbytes()-key->GetKeylen()); key->DeleteBuffer(); return rv; }")
90 ROOT.gInterpreter.LoadText("void* getbuffer(TKey* key) { key->SetBuffer(); key->ReadFile(); return (void*) (key->GetBuffer()+key->GetKeylen()); }")
91 ROOT.gInterpreter.LoadText("UInt_t bufferhash2(TKey* key) { TObject* obj = key->ReadObj(); TMessage msg(kMESS_OBJECT); msg.WriteObject(obj); UInt_t rv = TString::Hash(msg.Buffer(), msg.Length()); delete obj; return rv; }")
92 ROOT.gInterpreter.LoadText("UInt_t bufferhash3(TKey* key) { TObject* obj = key->ReadObj(); UInt_t rv = obj->Hash(); delete obj; return rv; }")
93 ROOT.gInterpreter.LoadText("TString getjson(TKey* key) { TObject* obj = key->ReadObj(); auto rv = TBufferJSON::ConvertToJSON(obj); delete obj; return rv; }")
94 
95 ROOT.gSystem.Load('libDataQualityUtils')
96 
97 def fuzzytreehash(tkey):
98  t = tkey.ReadObj()
99  rv = zlib.adler32(((' '.join(_.GetName() for _ in t.GetListOfBranches()))
100  + (' '.join(_.GetName() + _.GetTypeName() for _ in t.GetListOfLeaves()))
101  + ' ' + str(t.GetEntries())).encode()
102  )
103  del t
104  return rv
105 
106 def dumpdir(d):
107  thispath = d.GetPath()
108  if ':' in thispath:
109  thispath = thispath.split(':', 1)[1]
110  #print thispath
111  subdirs = []
112  for k in d.GetListOfKeys():
113  if not args.metadata and k.GetName() == 'metadata' and k.GetClassName() == 'TTree':
114  continue
115  if k.GetClassName().startswith('TDirectory'):
116  subdirs.append(k)
117  else:
118  if args.tree_entries and k.GetClassName() == 'TTree':
119  lhash = fuzzytreehash(k)
120  elif args.hash:
121  if k.GetClassName() != 'TEfficiency':
122  fixedjson = jsonfixup(ROOT.getjson(k), args.fuzzy_histbins)
123  else:
124  j0 = json.loads(ROOT.getjson(k).Data())
125  for subh in ('fPassedHistogram', 'fTotalHistogram'):
126  j0[subh] = json.loads(jsonfixup(json.dumps(j0[subh])))
127  fixedjson = json.dumps(j0, sort_keys=True)
128  lhash = zlib.adler32(fixedjson.encode())
129  if lhash < 0:
130  lhash += 2**32
131  else:
132  lhash = 0
133  idxname = os.path.join(thispath, k.GetName())
134  accounting[idxname] = (k.GetObjlen(), k.GetNbytes()-k.GetKeylen())
135  hashes[idxname] = lhash
136  types[idxname] = k.GetClassName()
137  for k in subdirs:
138  dumpdir(k.ReadObj())
139 
140 f = ROOT.TFile.Open(args.filename)
141 if args.path:
142  d = f.Get(args.path.rstrip('/'))
143  if not d:
144  print("Can't access path", args.path, "- exiting")
145  sys.exit(1)
146 else:
147  d = f
148 dumpdir(d)
149 
150 if ordering == 'onfile':
151  def key(x): return (x[1][1], x[1][0], x[0])
152 elif ordering == 'uncompressed':
153  def key(x): return (x[1][0], x[1][1], x[0])
154 else:
155  def key(x): return (x[0], x[1][1], x[1][0])
156 sortedl = sorted(accounting.items(), key=key, reverse=True)
157 if args.hash:
158  print('\n'.join(('%s %s: '
159  + ('%d uncompressed' % b if not args.no_inmem else '')
160  + (', %d on file ' % c if not args.no_onfile else ' ')
161  + '(hash %s)')
162  % (types[a], a, hashes[a]) for a, (b, c) in sortedl)
163  )
164 else:
165  print('\n'.join(('%s %s: '
166  + ('%d uncompressed' % b if not args.no_inmem else '')
167  + (', %d on file' % c if not args.no_onfile else ' '))
168  % (types[a], a) for a, (b, c) in sortedl)
169  )
170 
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename R::value_type > sorted(const R &r, PROJ proj={})
Helper function to create a sorted vector from an unsorted range.
hist_file_dump.jsonfixup
def jsonfixup(instr, fuzzyarray=False)
Definition: hist_file_dump.py:21
hist_file_dump.key
def key(x)
Definition: hist_file_dump.py:151
hist_file_dump.dumpdir
def dumpdir(d)
Definition: hist_file_dump.py:106
AtlasMcWeight::encode
number_type encode(double weight)
Definition: AtlasMcWeight.cxx:65
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:194
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
hist_file_dump.fixprecision
def fixprecision(x, precision=15)
Definition: hist_file_dump.py:12
hist_file_dump.fuzzytreehash
def fuzzytreehash(tkey)
Definition: hist_file_dump.py:97
str
Definition: BTagTrackIpAccessor.cxx:11
python.LArMinBiasAlgConfig.float
float
Definition: LArMinBiasAlgConfig.py:65