ATLAS Offline Software
Loading...
Searching...
No Matches
hist_file_dump.py
Go to the documentation of this file.
1#!/usr/bin/env python
2# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3
4import ROOT
5import sys, os
6import argparse
7import zlib
8import json
9from PyUtils.fprint import _formatFloat
10from typing import List, Tuple, Callable
11
12def fixprecision(x, precision=15):
13 import math
14 if not isinstance(x, float):
15 return x
16 else:
17 mantissa, exponent = math.frexp(x)
18 sm = '%.12g' % mantissa
19 return _formatFloat (float(sm[:precision]) * 2**exponent)
20
21def jsonfixup(instr, fuzzyarray=False):
22 if not isinstance(instr, str):
23 instr = instr.Data()
24 j=json.loads(instr)
25 # the following are very subject to floating point numeric effects
26 # are doubles, keep 15 decimal digits of precision
27 for badkey in ('fTsumw', 'fTsumwx', 'fTsumw2', 'fTsumwx2', 'fTsumwy', 'fTsumwy2', 'fTsumwxy',
28 'fTsumwz', 'fTsumwz2', 'fTsumwxz', 'fTsumwyz' ):
29 if badkey in j:
30 j[badkey] = fixprecision(float(j[badkey]))
31 #print(type(j["fTsumwx"]))
32 # member, digits of precision
33 arrkeys: List[Tuple[str,int,Callable]] = [('fSumw2', 15, float)]
34 if fuzzyarray:
35 arrkeys += [('fBinEntries', 15, float), ('fBinSumw2', 15, float)]
36 # apply different precision for fArray depending on object type
37 if '_typename' in j:
38 if j['_typename'] in ('TH1F', 'TH2F', 'TH3F'):
39 arrkeys.append(('fArray', 6, float))
40 elif j['_typename'] in ('TH1C', 'TH1I', 'TH1S', 'TH2C', 'TH2I', 'TH2S',
41 'TH3C', 'TH3I', 'TH3S'):
42 # precision here isn't relevant, *should* be an integer
43 arrkeys.append(('fArray', 15, lambda x: x))
44 else:
45 arrkeys.append(('fArray', 15, float))
46 else:
47 arrkeys.append(('fArray', 15, float))
48 for badkey, precision, func in arrkeys:
49 if badkey in j:
50 j[badkey] = [fixprecision(func(_), precision) for _ in j[badkey]]
51 # the following ignores small layout fluctuations in TTrees
52 if 'fBranches' in j:
53 for branch in j['fBranches']['arr']:
54 branch['fBasketSeek'] = []
55 # The formatting of the opt array changed in newer ROOT versions
56 # to be filled with instances of None rather than empty strings.
57 if 'fXaxis' in j and 'fLabels' in j['fXaxis'] and j['fXaxis']['fLabels'] is not None and 'opt' in j['fXaxis']['fLabels']:
58 opt = j['fXaxis']['fLabels']['opt']
59 for i in range(len(opt)):
60 if opt[i] is None: opt[i] = ''
61 return json.dumps(j, sort_keys=True)
62
63parser=argparse.ArgumentParser()
64parser.add_argument('filename',
65 help='Input HIST file name')
66parser.add_argument('-r', '--rankorder', default='onfile',
67 choices=['onfile', 'uncompressed', 'name'],
68 help='rankorder is "onfile" (default), "uncompressed" or "name"')
69parser.add_argument('-p', '--path',
70 help='Only look under this directory')
71parser.add_argument('--hash', action='store_true',
72 help='Print hashes of objects')
73parser.add_argument('--metadata', action='store_true',
74 help='Include metadata trees')
75parser.add_argument('--no_onfile', action='store_true',
76 help="Don't show on file size")
77parser.add_argument('--no_inmem', action='store_true',
78 help="Don't show in memory size")
79parser.add_argument('--tree_entries', action='store_true',
80 help="Use more robust hash of TTree branches + entries")
81parser.add_argument('--fuzzy_histbins', action='store_true',
82 help="Allow small variations in histogram bin content")
83args=parser.parse_args()
84
85ordering = args.rankorder
86
87accounting = {}; hashes = {}; types = {}
88
89ROOT.gInterpreter.LoadText("UInt_t bufferhash(TKey* key) { key->SetBuffer(); key->ReadFile(); UInt_t rv = TString::Hash(key->GetBuffer()+key->GetKeylen(), key->GetNbytes()-key->GetKeylen()); key->DeleteBuffer(); return rv; }")
90ROOT.gInterpreter.LoadText("void* getbuffer(TKey* key) { key->SetBuffer(); key->ReadFile(); return (void*) (key->GetBuffer()+key->GetKeylen()); }")
91ROOT.gInterpreter.LoadText("UInt_t bufferhash2(TKey* key) { TObject* obj = key->ReadObj(); TMessage msg(kMESS_OBJECT); msg.WriteObject(obj); UInt_t rv = TString::Hash(msg.Buffer(), msg.Length()); delete obj; return rv; }")
92ROOT.gInterpreter.LoadText("UInt_t bufferhash3(TKey* key) { TObject* obj = key->ReadObj(); UInt_t rv = obj->Hash(); delete obj; return rv; }")
93ROOT.gInterpreter.LoadText("TString getjson(TKey* key) { TObject* obj = key->ReadObj(); auto rv = TBufferJSON::ConvertToJSON(obj); delete obj; return rv; }")
94
95ROOT.gSystem.Load('libDataQualityUtils')
96
97def fuzzytreehash(tkey):
98 t = tkey.ReadObj()
99 rv = zlib.adler32(((' '.join(_.GetName() for _ in t.GetListOfBranches()))
100 + (' '.join(_.GetName() + _.GetTypeName() for _ in t.GetListOfLeaves()))
101 + ' ' + str(t.GetEntries())).encode()
102 )
103 del t
104 return rv
105
106def dumpdir(d):
107 thispath = d.GetPath()
108 if ':' in thispath:
109 thispath = thispath.split(':', 1)[1]
110 #print thispath
111 subdirs = []
112 for k in d.GetListOfKeys():
113 if not args.metadata and k.GetName() == 'metadata' and k.GetClassName() == 'TTree':
114 continue
115 if k.GetClassName().startswith('TDirectory'):
116 subdirs.append(k)
117 else:
118 if args.tree_entries and k.GetClassName() == 'TTree':
119 lhash = fuzzytreehash(k)
120 elif args.hash:
121 if k.GetClassName() != 'TEfficiency':
122 fixedjson = jsonfixup(ROOT.getjson(k), args.fuzzy_histbins)
123 else:
124 j0 = json.loads(ROOT.getjson(k).Data())
125 for subh in ('fPassedHistogram', 'fTotalHistogram'):
126 j0[subh] = json.loads(jsonfixup(json.dumps(j0[subh])))
127 fixedjson = json.dumps(j0, sort_keys=True)
128 lhash = zlib.adler32(fixedjson.encode())
129 if lhash < 0:
130 lhash += 2**32
131 else:
132 lhash = 0
133 idxname = os.path.join(thispath, k.GetName())
134 accounting[idxname] = (k.GetObjlen(), k.GetNbytes()-k.GetKeylen())
135 hashes[idxname] = lhash
136 types[idxname] = k.GetClassName()
137 for k in subdirs:
138 dumpdir(k.ReadObj())
139
140f = ROOT.TFile.Open(args.filename)
141if args.path:
142 d = f.Get(args.path.rstrip('/'))
143 if not d:
144 print("Can't access path", args.path, "- exiting")
145 sys.exit(1)
146else:
147 d = f
148dumpdir(d)
149
150if ordering == 'onfile':
151 def key(x): return (x[1][1], x[1][0], x[0])
152elif ordering == 'uncompressed':
153 def key(x): return (x[1][0], x[1][1], x[0])
154else:
155 def key(x): return (x[0], x[1][1], x[1][0])
156sortedl = sorted(accounting.items(), key=key, reverse=True)
157if args.hash:
158 print('\n'.join(('%s %s: '
159 + ('%d uncompressed' % b if not args.no_inmem else '')
160 + (', %d on file ' % c if not args.no_onfile else ' ')
161 + '(hash %s)')
162 % (types[a], a, hashes[a]) for a, (b, c) in sortedl)
163 )
164else:
165 print('\n'.join(('%s %s: '
166 + ('%d uncompressed' % b if not args.no_inmem else '')
167 + (', %d on file' % c if not args.no_onfile else ' '))
168 % (types[a], a) for a, (b, c) in sortedl)
169 )
170
@ Data
Definition BaseObject.h:11
void print(char *figname, TCanvas *c1)
fixprecision(x, precision=15)
jsonfixup(instr, fuzzyarray=False)