3 from __future__
import print_function
10 from PyUtils.fprint
import _formatFloat
11 from typing
import List, Tuple, Callable
15 if not isinstance(x, float):
18 mantissa, exponent = math.frexp(x)
19 sm =
'%.12g' % mantissa
20 return _formatFloat (
float(sm[:precision]) * 2**exponent)
23 if not isinstance(instr, str):
28 for badkey
in (
'fTsumw',
'fTsumwx',
'fTsumw2',
'fTsumwx2',
'fTsumwy',
'fTsumwy2',
'fTsumwxy',
29 'fTsumwz',
'fTsumwz2',
'fTsumwxz',
'fTsumwyz' ):
34 arrkeys: List[Tuple[str,int,Callable]] = [(
'fSumw2', 15, float)]
36 arrkeys += [(
'fBinEntries', 15, float), (
'fBinSumw2', 15, float)]
39 if j[
'_typename']
in (
'TH1F',
'TH2F',
'TH3F'):
40 arrkeys.append((
'fArray', 6, float))
41 elif j[
'_typename']
in (
'TH1C',
'TH1I',
'TH1S',
'TH2C',
'TH2I',
'TH2S',
42 'TH3C',
'TH3I',
'TH3S'):
44 arrkeys.append((
'fArray', 15,
lambda x: x))
46 arrkeys.append((
'fArray', 15, float))
48 arrkeys.append((
'fArray', 15, float))
49 for badkey, precision, func
in arrkeys:
51 j[badkey] = [
fixprecision(func(_), precision)
for _
in j[badkey]]
54 for branch
in j[
'fBranches'][
'arr']:
55 branch[
'fBasketSeek'] = []
56 return json.dumps(j, sort_keys=
True)
58 parser=argparse.ArgumentParser()
59 parser.add_argument(
'filename',
60 help=
'Input HIST file name')
61 parser.add_argument(
'-r',
'--rankorder', default=
'onfile',
62 choices=[
'onfile',
'uncompressed',
'name'],
63 help=
'rankorder is "onfile" (default), "uncompressed" or "name"')
64 parser.add_argument(
'-p',
'--path',
65 help=
'Only look under this directory')
66 parser.add_argument(
'--hash', action=
'store_true',
67 help=
'Print hashes of objects')
68 parser.add_argument(
'--metadata', action=
'store_true',
69 help=
'Include metadata trees')
70 parser.add_argument(
'--no_onfile', action=
'store_true',
71 help=
"Don't show on file size")
72 parser.add_argument(
'--no_inmem', action=
'store_true',
73 help=
"Don't show in memory size")
74 parser.add_argument(
'--tree_entries', action=
'store_true',
75 help=
"Use more robust hash of TTree branches + entries")
76 parser.add_argument(
'--fuzzy_histbins', action=
'store_true',
77 help=
"Allow small variations in histogram bin content")
78 args=parser.parse_args()
80 ordering = args.rankorder
82 accounting = {}; hashes = {}; types = {}
84 ROOT.gInterpreter.LoadText(
"UInt_t bufferhash(TKey* key) { key->SetBuffer(); key->ReadFile(); UInt_t rv = TString::Hash(key->GetBuffer()+key->GetKeylen(), key->GetNbytes()-key->GetKeylen()); key->DeleteBuffer(); return rv; }")
85 ROOT.gInterpreter.LoadText(
"void* getbuffer(TKey* key) { key->SetBuffer(); key->ReadFile(); return (void*) (key->GetBuffer()+key->GetKeylen()); }")
86 ROOT.gInterpreter.LoadText(
"UInt_t bufferhash2(TKey* key) { TObject* obj = key->ReadObj(); TMessage msg(kMESS_OBJECT); msg.WriteObject(obj); UInt_t rv = TString::Hash(msg.Buffer(), msg.Length()); delete obj; return rv; }")
87 ROOT.gInterpreter.LoadText(
"UInt_t bufferhash3(TKey* key) { TObject* obj = key->ReadObj(); UInt_t rv = obj->Hash(); delete obj; return rv; }")
88 ROOT.gInterpreter.LoadText(
"TString getjson(TKey* key) { TObject* obj = key->ReadObj(); auto rv = TBufferJSON::ConvertToJSON(obj); delete obj; return rv; }")
90 ROOT.gSystem.Load(
'libDataQualityUtils')
94 rv = zlib.adler32(((
' '.
join(_.GetName()
for _
in t.GetListOfBranches()))
95 + (
' '.
join(_.GetName() + _.GetTypeName()
for _
in t.GetListOfLeaves()))
102 thispath = d.GetPath()
104 thispath = thispath.split(
':', 1)[1]
107 for k
in d.GetListOfKeys():
108 if not args.metadata
and k.GetName() ==
'metadata' and k.GetClassName() ==
'TTree':
110 if k.GetClassName().startswith(
'TDirectory'):
113 if args.tree_entries
and k.GetClassName() ==
'TTree':
116 if k.GetClassName() !=
'TEfficiency':
117 fixedjson =
jsonfixup(ROOT.getjson(k), args.fuzzy_histbins)
119 j0 = json.loads(ROOT.getjson(k).
Data())
120 for subh
in (
'fPassedHistogram',
'fTotalHistogram'):
121 j0[subh] = json.loads(
jsonfixup(json.dumps(j0[subh])))
122 fixedjson = json.dumps(j0, sort_keys=
True)
123 lhash = zlib.adler32(fixedjson.encode())
128 idxname = os.path.join(thispath, k.GetName())
129 accounting[idxname] = (k.GetObjlen(), k.GetNbytes()-k.GetKeylen())
130 hashes[idxname] = lhash
131 types[idxname] = k.GetClassName()
135 f = ROOT.TFile.Open(args.filename)
137 d = f.Get(args.path.rstrip(
'/'))
139 print(
"Can't access path", args.path,
"- exiting")
145 if ordering ==
'onfile':
146 def key(x):
return (x[1][1], x[1][0], x[0])
147 elif ordering ==
'uncompressed':
148 def key(x):
return (x[1][0], x[1][1], x[0])
150 def key(x):
return (x[0], x[1][1], x[1][0])
151 sortedl =
sorted(accounting.items(), key=key, reverse=
True)
154 + (
'%d uncompressed' % b
if not args.no_inmem
else '')
155 + (
', %d on file ' % c
if not args.no_onfile
else ' ')
157 % (types[a], a, hashes[a])
for a, (b, c)
in sortedl)
161 + (
'%d uncompressed' % b
if not args.no_inmem
else '')
162 + (
', %d on file' % c
if not args.no_onfile
else ' '))
163 % (types[a], a)
for a, (b, c)
in sortedl)