8 __doc__ =
"diff two ROOT files (containers and sizes)"
9 __author__ =
"Sebastien Binet"
12 import PyUtils.acmdlib
as acmdlib
14 from functools
import cache, reduce
15 from math
import isnan
16 from numbers
import Real
17 from os
import environ
20 g_ALLOWED_MODES = (
'summary',
'semi-detailed',
'detailed')
21 g_ALLOWED_ERROR_MODES = (
'bailout',
'resilient')
27 return args.mode ==
'detailed'
30 return args.mode ==
'summary'
33 return args.error_mode ==
'bailout'
39 _vectypes = {
'std::vector<float>',
40 'std::vector<double>',
42 'std::vector<unsigned int>',
44 'std::vector<unsigned long>',
46 'std::vector<unsigned short>',
48 'std::vector<unsigned char>',
49 'std::vector<long long>',
50 'std::vector<unsigned long long>'}
53 if getattr(
type(
type(v1)),
'__cpp_name__',
None)
not in _vectypes:
55 if type(v1)
is not type(v2):
return -1
57 if sz != v2.size():
return -1
63 if val1 != val2
and not all(
64 [isinstance(_, Real)
and isnan_(_)
for _
in (val1, val2)]):
72 @acmdlib.command(name=
'diff-root')
73 @acmdlib.argument(
'old',
74 help=
'path to the reference ROOT file to analyze')
75 @acmdlib.argument(
'new',
76 help=
'path to the ROOT file to compare to the reference')
77 @acmdlib.argument(
'-t',
'--tree-name',
78 default=
'CollectionTree',
79 help=
'name of the TTree to compare')
80 @acmdlib.argument(
'--branches-of-interest',
83 help=
'set of regex matching names of branches to compare; assumes all if none specified.')
84 @acmdlib.argument(
'--ignore-leaves',
86 default=(
'Token',
'index_ref',
r'(.*)_timings\.(.*)',
r'(.*)_mems\.(.*)',
r'(.*)TrigCostContainer(.*)'),
87 help=
'set of leaves names to ignore from comparison; can be a branch name or a partial leaf name (accepts regex)')
88 @acmdlib.argument(
'--enforce-leaves',
91 help=
'set of leaves names we make sure to compare')
92 @acmdlib.argument(
'--leaves-prefix',
94 help=
'Remove prefix value from all leaves')
95 @acmdlib.argument(
'--known-hacks',
97 default=(
'm_athenabarcode',
'm_token',),
98 help=
'set of leaves which are known to fail (but should be fixed at some point) [default: %(default)s]')
99 @acmdlib.argument(
'--entries',
101 help=
'a list of entries (indices, not event numbers) or an expression (like range(3) or 0,2,1 or 0:3) leading to such a list, to compare.')
102 @acmdlib.argument(
'-v',
'--verbose',
105 help=
"""Enable verbose printout""")
106 @acmdlib.argument(
'--order-trees',
109 help=
"""To order trees according to event numbers""")
110 @acmdlib.argument(
'--exact-branches',
113 help=
"""Only allow exact list of branches present""")
114 @acmdlib.argument(
'--mode',
115 choices=g_ALLOWED_MODES,
118 Enable a particular mode.
119 'summary': only report the number of differences.
120 'semi-detailed': report the number of differences and the leaves that differ.
121 'detailed': display everything.
122 default='%(default)s'.
126 @acmdlib.argument(
'--error-mode',
127 choices=g_ALLOWED_ERROR_MODES,
130 Enable a particular error mode.
131 'bailout': bail out on first error.
132 'resilient': keep running.
133 default='%(default)s'.
137 @acmdlib.argument(
'--nan-equal',
140 help=
"""Compare nan as equal to nan""")
143 """diff two ROOT files (containers and sizes)"""
149 gc.set_threshold (100000)
151 import PyUtils.RootUtils
as ru
152 root = ru.import_root()
155 if 'AtlasProject' in environ
and environ[
'AtlasProject'] ==
'Athena':
157 root.xAOD.ParticleContainer_v1
158 root.xAOD.DiTauJetContainer_v1
160 import PyUtils.Logging
as L
161 msg = L.logging.getLogger(
'diff-root')
163 msg.setLevel(L.logging.VERBOSE)
165 msg.setLevel(L.logging.INFO)
167 from PyUtils.Helpers
import ShutUp
169 if args.entries ==
'':
172 msg.info(
'comparing tree [%s] in files:', args.tree_name)
173 msg.info(
' old: [%s]', args.old)
174 msg.info(
' new: [%s]', args.new)
175 msg.info(
'branches of interest: %s', args.branches_of_interest)
176 msg.info(
'ignore leaves: %s', args.ignore_leaves)
177 msg.info(
'enforce leaves: %s', args.enforce_leaves)
178 msg.info(
'leaves prefix: %s', args.leaves_prefix)
179 msg.info(
'hacks: %s', args.known_hacks)
180 msg.info(
'entries: %s', args.entries)
181 msg.info(
'mode: %s', args.mode)
182 msg.info(
'error mode: %s', args.error_mode)
183 msg.info(
'order trees: %s', args.order_trees)
184 msg.info(
'exact branches: %s', args.exact_branches)
186 import PyUtils.Helpers
as H
188 fold = ru.RootFileDumper(args.old, args.tree_name)
189 fnew = ru.RootFileDumper(args.new, args.tree_name)
192 def tree_infos(tree, args):
193 nentries = tree.GetEntriesFast()
195 leaves = [l.GetBranch().GetName()
for l
in tree.GetListOfLeaves()
196 if l.GetBranch().GetName()
not in args.ignore_leaves]
197 if args.leaves_prefix:
198 leaves = [l.replace(args.leaves_prefix,
'')
for l
in leaves]
201 'leaves':
set(leaves),
204 def ordered_indices(tree, reverse_order = False):
205 from collections
import OrderedDict
209 nevts = tree.GetEntriesFast()
211 eiDict = {
'':[
'EventInfoAuxDyn.eventNumber'],
212 'eventNumber':[
'EventInfoAux.',
214 'xAOD::EventAuxInfo_v3_EventInfoAux.',
215 'xAOD::EventAuxInfo_v2_EventInfoAux.',
216 'xAOD::EventAuxInfo_v1_EventInfoAux.',
217 'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux.',
218 'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux.',
219 'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux.'],
220 'm_event_ID m_event_number':[
'McEventInfo',
221 'ByteStreamEventInfo',
222 'EventInfo_p4_McEventInfo',
223 'EventInfo_p4_ByteStreamEventInfo']}
226 """Find the relevant attributes for reading the event number"""
227 for ii, jj
in eiDict.items():
229 if hasattr(tree, kk):
235 attr1, attr2 = find_attrs()
236 if attr1
is None or attr2
is None:
237 msg.error(
'Cannot read event info, will bail out.')
238 msg.error(f
"Tried attributes {attr1} and {attr2}")
240 attrs = [attr1] + attr2.split()
242 tree.SetBranchStatus (
'*', 0)
243 tree.SetBranchStatus (attr1, 1)
245 for idx
in range(0, nevts):
247 msg.debug(
'Read {} events from the input so far'.
format(idx))
249 event_number =
reduce(getattr, attrs, tree)
250 msg.debug(
'Idx : EvtNum {:10d} : {}'.
format(idx,event_number))
251 dict_in[idx] = event_number
253 tree.SetBranchStatus (
'*', 1)
256 dict_out = OrderedDict(
sorted(dict_in.items(), key=operator.itemgetter(1), reverse = reverse_order))
259 return [(idx, ival)
for idx, ival
in dict_out.items()]
261 def diff_tree(fold, fnew, args):
263 'old' : tree_infos(fold.tree, args),
264 'new' : tree_infos(fnew.tree, args),
267 nentries =
min(infos[
'old'][
'entries'],
268 infos[
'new'][
'entries'])
269 itr_entries = nentries
270 if args.entries
in (-1,
'',
'-1'):
272 itr_entries = nentries
273 if infos[
'old'][
'entries'] != infos[
'new'][
'entries']:
274 msg.info(
'different numbers of entries:')
275 msg.info(
' old: [%s]', infos[
'old'][
'entries'])
276 msg.info(
' new: [%s]', infos[
'new'][
'entries'])
277 msg.info(
'=> comparing [%s] first entries...', nentries)
279 itr_entries = args.entries
281 msg.info(
'comparing over [%s] entries...', itr_entries)
284 def skip_leaf(name_from_dump, skip_leaves):
285 """ Here decide if the current leaf should be skipped.
286 Previously the matching was done based on the full or partial
287 leaf name. E.g. foo.bar.zzz would be skipped if any of the
288 following were provided:
292 * Any of the foo, bar, or zzz
293 Now, we make a regex matching such that the user doesn't
294 need to provide full branch names.
296 for pattern
in skip_leaves:
298 m = re.match(pattern, name_from_dump)
307 def skip_leaf_entry(entry2, skip_leaves):
308 leafname =
'.'.
join([s
for s
in entry2
if not s.isdigit()])
309 return skip_leaf (leafname, skip_leaves)
311 def filter_branches(leaves):
313 for regex
in args.branches_of_interest:
314 test = re.compile(regex)
315 matches.update({l
for l
in leaves
if test.match(l)})
318 skipset = frozenset(args.ignore_leaves)
319 removed_leaves = infos[
'old'][
'leaves'] - infos[
'new'][
'leaves']
320 added_leaves = infos[
'new'][
'leaves'] - infos[
'old'][
'leaves']
322 if args.branches_of_interest:
323 removed_leaves = filter_branches(removed_leaves)
324 added_leaves = filter_branches(added_leaves)
326 removed_leaves = {l
for l
in removed_leaves
if not skip_leaf(l, skipset)}
327 added_leaves = {l
for l
in added_leaves
if not skip_leaf(l, skipset)}
330 removed_leaves_list =
list(removed_leaves)
331 removed_leaves_list.sort()
332 if args.exact_branches:
333 msg.error(
'the following variables exist only in the old file !')
334 for l
in removed_leaves_list:
335 msg.error(
' - [%s]', l)
337 msg.warning(
'the following variables exist only in the old file !')
338 for l
in removed_leaves_list:
339 msg.warning(
' - [%s]', l)
341 added_leaves_list =
list(added_leaves)
342 added_leaves_list.sort()
343 if args.exact_branches:
344 msg.error(
'the following variables exist only in the new file !')
345 for l
in added_leaves_list:
346 msg.error(
' - [%s]', l)
348 msg.warning(
'the following variables exist only in the new file !')
349 for l
in added_leaves_list:
350 msg.warning(
' - [%s]', l)
353 skip_leaves = [ l.rstrip(
'.')
for l
in removed_leaves | added_leaves |
set(args.ignore_leaves) ]
354 for l
in skip_leaves:
355 msg.debug(
'skipping [%s]', l)
356 skip_leaves = frozenset (skip_leaves)
358 oldBranches =
set(b.GetName().rstrip(
'\0')
for b
in fold.tree.GetListOfBranches())
359 newBranches =
set(b.GetName().rstrip(
'\0')
for b
in fnew.tree.GetListOfBranches())
360 branches = oldBranches & newBranches
362 if args.branches_of_interest:
363 branches_of_interest = args.branches_of_interest
366 for regex
in branches_of_interest:
367 test = re.compile(regex)
368 if not {l
for l
in infos[
'new'][
'leaves']
if test.match(l)}:
369 msg.error(f
'no match in new file for branch of interest: {regex}')
372 for branch_of_interest
in branches_of_interest:
374 r = re.compile(branch_of_interest)
375 BOI_matches.update(
filter(r.match, branches))
379 if len(BOI_matches)<1:
380 msg.error(
'No matching branches found in both files for supplied branches of interest, quitting.')
382 msg.info(
'only the following branches of interest will be compared: ')
383 for l
in BOI_matches:
384 msg.info(
' - [%s]', l)
385 branches = BOI_matches
387 msg.info(
'comparing [%s] leaves over entries...', len(infos[
'old'][
'leaves'] & infos[
'new'][
'leaves']))
390 if args.exact_branches:
391 n_bad += len(removed_leaves) + len(added_leaves)
393 summary = collections.defaultdict(int)
395 def get_event_range(entry):
398 if isinstance(entry, str):
402 vals = entry.split(
':')
403 smin =
int(vals[0])
if len(vals) > 0
and vals[0].isdigit()
else 0
404 smax =
int(vals[1])
if len(vals) > 1
and vals[1].isdigit()
else None
406 elif entry.isdigit():
408 smax =
int(entry)
if int(entry) > 0
else None
410 elif isinstance(entry, int):
412 smax = entry
if entry > 0
else None
415 msg.warning(f
"Unknown entries argument {entry}, will compare all events...")
416 msg.debug(f
"Event slice is parsed as [{smin},{smax}]")
420 smin, smax = get_event_range(itr_entries)
421 idx_old = ordered_indices(fold.tree)[smin:smax]
422 idx_new = ordered_indices(fnew.tree)[smin:smax]
423 itr_entries_old, event_numbers_old =
list(map(list,zip(*idx_old)))
424 itr_entries_new, event_numbers_new =
list(map(list,zip(*idx_new)))
425 msg.debug(f
"List of old indices {itr_entries_old}")
426 msg.debug(f
"List of new indices {itr_entries_new}")
427 msg.debug(f
"List of old events {event_numbers_old}")
428 msg.debug(f
"List of new events {event_numbers_new}")
429 if event_numbers_old != event_numbers_new:
430 msg.error(
'Events differ, quitting!')
431 msg.error(f
"List of old events {event_numbers_old}")
432 msg.error(f
"List of new events {event_numbers_new}")
435 itr_entries_old = itr_entries
436 itr_entries_new = itr_entries
438 branches =
sorted(branches)
439 old_dump_iter = fold.dump(args.tree_name, itr_entries_old, branches,
True,
False)
440 new_dump_iter = fnew.dump(args.tree_name, itr_entries_new, branches,
True,
False)
444 def leafname_fromdump(entry):
448 return '.'.
join([s
for s
in entry[2]
if not s.isdigit()])
450 def elindices_fromdump(entry):
454 return [
int(s)
for s
in entry[2]
if s.isdigit()]
456 def reach_next(dump_iter, skip_leaves, skip_dict, leaves_prefix=None):
460 entry =
next(dump_iter)
461 except StopIteration:
464 entry2_orig = entry[2][0]
465 entry[2][0] = entry[2][0].rstrip(
'.\0')
467 entry[2][0] = entry[2][0].
replace(leaves_prefix,
'')
477 skip = skip_dict.setdefault (entry2_orig, entry[1])
478 if skip > 0
and skip != entry[1]:
482 if not skip_leaf(leafname_fromdump(entry), skip_leaves):
484 skip_dict[entry2_orig] = -1
485 msg.debug(
'SKIP: {}'.
format(leafname_fromdump(entry)))
495 d_old = reach_next(old_dump_iter, skip_leaves, old_skip_dict, args.leaves_prefix)
497 d_new = reach_next(new_dump_iter, skip_leaves, new_skip_dict, args.leaves_prefix)
499 if not d_new
and not d_old:
505 if (args.order_trees
and d_old
and d_new
and d_old[2:] == d_new[2:])
or d_old == d_new:
510 tree_name, ientry, iname, iold = d_old
512 tree_name, jentry, jname, inew = d_new
514 idiff = _vecdiff (iold, inew, args.nan_equal)
521 iname = iname[:-1] + [idiff] + iname[-1:]
522 jname = jname[:-1] + [idiff] + jname[-1:]
526 if all([isinstance(x,Real)
and isnan(x)
for x
in [iold,inew]]):
531 if iname[-1]
in args.known_hacks
or jname[-1]
in args.known_hacks:
538 id_old = dict(idx_old)[ientry]
539 id_new = dict(idx_new)[jentry]
544 if not args.order_trees:
545 in_synch = d_old
and d_new
and d_old[:-1] == d_new[:-1]
547 in_synch = d_old
and d_new
and d_old[0] == d_new[0]
and d_old[2] == d_new[2]
and id_old == id_new
551 msg.info(
'::sync-old %s',
'.'.
join([
"%03i"%ientry]+
list(map(str, d_old[2]))))
553 msg.info(
'::sync-old ABSENT')
555 msg.info(
'::sync-new %s',
'.'.
join([
"%03i"%jentry]+
list(map(str, d_new[2]))))
557 msg.info(
'::sync-new ABSENT')
562 summary[leafname_fromdump(d_new)] += 1
565 summary[leafname_fromdump(d_old)] += 1
567 branch_old = f
"{id_old}.{d_old[2][0]}"
568 branch_new = f
"{id_new}.{d_new[2][0]}"
569 leaf_old = leafname_fromdump(d_old)
570 leaf_new = leafname_fromdump(d_new)
571 indices_old = elindices_fromdump(d_old)
572 indices_new = elindices_fromdump(d_new)
578 if branch_old > branch_new:
580 elif branch_old < branch_new:
583 if leaf_old > leaf_new:
585 elif leaf_old < leaf_new:
587 elif indices_old
and indices_new
and len(indices_old) == len(indices_new):
588 if indices_old > indices_new:
590 elif indices_old < indices_new:
594 if read_old
and not read_new:
596 msg.info(
'::sync-old skipping entry')
598 summary[leaf_old] += 1
599 elif read_new
and not read_old:
601 msg.info(
'::sync-new skipping entry')
603 summary[leaf_new] += 1
605 msg.error(
'::sync attempt failed, bailing out...')
606 msg.error(f
"::sync-old Leaf vs Index : {leaf_old} vs {indices_old}")
607 msg.error(f
"::sync-new Leaf vs Index : {leaf_new} vs {indices_new}")
610 summary[leaf_old] += 1
611 summary[leaf_new] += 1
615 msg.info(
'*** exit on first error ***')
619 if not args.order_trees:
620 n =
'.'.
join(
list(map(str, [
"%03i"%ientry]+iname)))
622 n =
'.'.
join(
list(map(str, [
"%03i"%ientry]+iname+[
"%03i"%jentry]+jname)))
625 diff_value = 50.*(iold-inew)/(iold+inew)
626 diff_value =
'%.8f%%' % (diff_value,)
630 msg.info(
'%s %r -> %r => diff= [%s]', n, iold, inew, diff_value)
632 summary[leafname_fromdump(d_old)] += 1
634 if iname[0]
in args.enforce_leaves
or jname[0]
in args.enforce_leaves:
635 msg.info(
"don't compare further")
639 msg.info(
'Found [%s] identical leaves', n_good)
640 msg.info(
'Found [%s] different leaves', n_bad)
643 keys =
sorted(summary.keys())
646 msg.info(
' [%s]: %i leaves differ', n, v)
650 if (
not fold.allgood)
or (
not fnew.allgood):
651 msg.error(
'NOTE: there were errors during the dump')
652 msg.info(
'fold.allgood: %s' , fold.allgood)
653 msg.info(
'fnew.allgood: %s' , fnew.allgood)
657 ndiff = diff_tree(fold, fnew, args)
659 msg.error(
'files differ!')
661 msg.info(
'all good.')