143 """diff two ROOT files (containers and sizes)"""
149 gc.set_threshold (100000)
151 import PyUtils.RootUtils
as ru
152 root = ru.import_root()
154 RNTupleReader = root.RNTupleReader
155 except AttributeError:
156 RNTupleReader = root.Experimental.RNTupleReader
159 if 'AtlasProject' in environ
and environ[
'AtlasProject'] ==
'Athena':
161 root.xAOD.ParticleContainer_v1
162 root.xAOD.DiTauJetContainer_v1
164 import PyUtils.Logging
as L
165 msg = L.logging.getLogger(
'diff-root')
167 msg.setLevel(L.logging.VERBOSE)
169 msg.setLevel(L.logging.INFO)
171 from PyUtils.Helpers
import ShutUp
173 if args.entries ==
'':
176 msg.info(
'comparing tree [%s] in files:', args.tree_name)
177 msg.info(
' old: [%s]', args.old)
178 msg.info(
' new: [%s]', args.new)
179 msg.info(
'branches of interest: %s', args.branches_of_interest)
180 msg.info(
'ignore leaves: %s', args.ignore_leaves)
181 msg.info(
'enforce leaves: %s', args.enforce_leaves)
182 msg.info(
'leaves prefix: %s', args.leaves_prefix)
183 msg.info(
'hacks: %s', args.known_hacks)
184 msg.info(
'entries: %s', args.entries)
185 msg.info(
'mode: %s', args.mode)
186 msg.info(
'error mode: %s', args.error_mode)
187 msg.info(
'order trees: %s', args.order_trees)
188 msg.info(
'exact branches: %s', args.exact_branches)
190 import PyUtils.Helpers
as H
192 fold = ru.RootFileDumper(args.old, args.tree_name)
193 fnew = ru.RootFileDumper(args.new, args.tree_name)
196 def obj_info(obj, args):
197 if isinstance(obj, root.TTree):
198 return _tree_info(obj, args)
199 elif isinstance(obj, RNTupleReader):
200 with H.ShutUp(filters=[
r'.+RuntimeWarning: class "[\w:]+" has no virtual destructor']):
201 return _reader_info(obj, args)
203 raise NotImplementedError(f
"'obj_info' not implemented for object of {type(obj)=}")
205 def _tree_info(tree, args):
206 nentries = tree.GetEntriesFast()
208 leaves = [l.GetBranch().GetName()
for l
in tree.GetListOfLeaves()
209 if l.GetBranch().GetName()
not in args.ignore_leaves]
210 if args.leaves_prefix:
211 leaves = [l.replace(args.leaves_prefix,
'')
for l
in leaves]
214 'leaves':
set(leaves),
217 def _reader_info(reader, args):
218 nentries = reader.GetNEntries()
220 RFieldVisitor = root.Detail.RFieldVisitor
221 except AttributeError:
222 RFieldVisitor = root.Experimental.Detail.RFieldVisitor
223 class NameVisitor(RFieldVisitor):
227 def VisitField(self, field):
228 if field.GetFieldName()[0] ==
'_':
230 self.names.
append(field.GetQualifiedFieldName())
233 subFields = field.GetConstSubfields()
234 except AttributeError:
235 subFields = field.GetSubFields()
239 f.AcceptVisitor(self)
240 def VisitFieldZero(self, field):
243 subFields = field.GetConstSubfields()
244 except AttributeError:
245 subFields = field.GetSubFields()
249 f.AcceptVisitor(self)
251 fieldZero = reader.GetModel().GetConstFieldZero()
253 visitor = NameVisitor(names)
254 fieldZero.AcceptVisitor(visitor)
255 leaves = visitor.names
256 leaves = [l
for l
in leaves
if l
not in args.ignore_leaves]
257 if args.leaves_prefix:
258 leaves = [l.replace(args.leaves_prefix,
'')
for l
in leaves]
261 'leaves':
set(leaves),
264 def ordered_indices(obj, reverse_order=False):
265 if isinstance(obj, root.TTree):
266 return _tree_ordered_indices(obj, reverse_order)
267 elif isinstance(obj, RNTupleReader):
268 return _reader_ordered_indices(obj, reverse_order)
270 raise NotImplementedError(f
"'ordered_indices' not implemented for object of {type(obj)=}")
272 def _reader_ordered_indices(reader, reverse_order=False):
276 nevts = reader.GetNEntries()
278 eiDict = {(): [
'EventInfoAuxDyn:eventNumber'],
279 (
'eventNumber',): [
'EventInfoAux:',
281 'xAOD::EventAuxInfo_v3_EventInfoAux:',
282 'xAOD::EventAuxInfo_v2_EventInfoAux:',
283 'xAOD::EventAuxInfo_v1_EventInfoAux:',
284 'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux:',
285 'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux:',
286 'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux:'],
287 (
'm_event_ID',
'm_event_number'): [
'McEventInfo',
288 'ByteStreamEventInfo',
289 'EventInfo_p4_McEventInfo',
290 'EventInfo_p4_ByteStreamEventInfo']}
293 """Find the relevant attributes for reading the event number"""
295 kInvalidDescriptorId = root.kInvalidDescriptorId
296 except AttributeError:
297 kInvalidDescriptorId = root.Experimental.kInvalidDescriptorId
298 for path, names
in eiDict.items():
300 if (fieldId := reader.GetDescriptor().FindFieldId(name)) != kInvalidDescriptorId:
301 typeName = reader.GetDescriptor().GetFieldDescriptor(fieldId).GetTypeName()
302 return (name, typeName), path
306 name, attrs = find_attrs()
307 if name
is None or attrs
is None:
308 msg.error(
'Cannot read event info, will bail out.')
309 msg.error(f
"Tried {name=} and attributes {attrs=}")
312 view = reader.GetView[name[1]](name[0])
313 for idx
in range(nevts):
315 msg.debug(
'Read {} events from the input so far'.
format(idx))
317 event_number =
reduce(getattr, attrs, value)
318 msg.debug(
'Idx : EvtNum {:10d} : {}'.
format(idx, event_number))
319 dict_in[idx] = event_number
322 dict_out = dict(
sorted(dict_in.items(), key=operator.itemgetter(1), reverse=reverse_order))
325 return list(dict_out.items())
327 def _tree_ordered_indices(tree, reverse_order=False):
328 from collections
import OrderedDict
332 nevts = tree.GetEntriesFast()
334 eiDict = {
'':[
'EventInfoAuxDyn.eventNumber'],
335 'eventNumber':[
'EventInfoAux.',
337 'xAOD::EventAuxInfo_v3_EventInfoAux.',
338 'xAOD::EventAuxInfo_v2_EventInfoAux.',
339 'xAOD::EventAuxInfo_v1_EventInfoAux.',
340 'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux.',
341 'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux.',
342 'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux.'],
343 'm_event_ID m_event_number':[
'McEventInfo',
344 'ByteStreamEventInfo',
345 'EventInfo_p4_McEventInfo',
346 'EventInfo_p4_ByteStreamEventInfo']}
349 """Find the relevant attributes for reading the event number"""
350 for ii, jj
in eiDict.items():
352 if hasattr(tree, kk):
358 attr1, attr2 = find_attrs()
359 if attr1
is None or attr2
is None:
360 msg.error(
'Cannot read event info, will bail out.')
361 msg.error(f
"Tried attributes {attr1} and {attr2}")
363 attrs = [attr1] + attr2.split()
365 tree.SetBranchStatus (
'*', 0)
366 tree.SetBranchStatus (attr1, 1)
368 for idx
in range(0, nevts):
370 msg.debug(
'Read {} events from the input so far'.
format(idx))
372 event_number =
reduce(getattr, attrs, tree)
373 msg.debug(
'Idx : EvtNum {:10d} : {}'.
format(idx,event_number))
374 dict_in[idx] = event_number
376 tree.SetBranchStatus (
'*', 1)
379 dict_out = OrderedDict(
sorted(dict_in.items(), key=operator.itemgetter(1), reverse = reverse_order))
382 return [(idx, ival)
for idx, ival
in dict_out.items()]
384 def diff_obj(fold, fnew, args):
387 'old' : obj_info(fold.obj, args),
388 'new' : obj_info(fnew.obj, args),
391 nentries =
min(infos[
'old'][
'entries'],
392 infos[
'new'][
'entries'])
393 itr_entries = nentries
394 if args.entries
in (-1,
'',
'-1'):
396 itr_entries = nentries
397 if infos[
'old'][
'entries'] != infos[
'new'][
'entries']:
398 msg.info(
'different numbers of entries:')
399 msg.info(
' old: [%s]', infos[
'old'][
'entries'])
400 msg.info(
' new: [%s]', infos[
'new'][
'entries'])
401 msg.info(
'=> comparing [%s] first entries...', nentries)
403 itr_entries = args.entries
405 msg.info(
'comparing over [%s] entries...', itr_entries)
408 def skip_leaf(name_from_dump, skip_leaves):
409 """ Here decide if the current leaf should be skipped.
410 Previously the matching was done based on the full or partial
411 leaf name. E.g. foo.bar.zzz would be skipped if any of the
412 following were provided:
416 * Any of the foo, bar, or zzz
417 Now, we make a regex matching such that the user doesn't
418 need to provide full branch names.
420 for pattern
in skip_leaves:
422 if re.match(pattern, name_from_dump):
424 except re.error
as e:
425 from traceback
import format_exception
426 msg.error(
"Exception '%s', pattern %r, line %s, column %s\n%s",
427 e, e.pattern, e.lineno, e.colno,
"".
join(format_exception(e)))
433 def skip_leaf_entry(entry2, skip_leaves):
434 leafname =
'.'.
join(s
for s
in entry2
if not s.isdigit())
435 return skip_leaf (leafname, skip_leaves)
437 def filter_branches(leaves):
439 for regex
in args.branches_of_interest:
440 test = re.compile(regex)
441 matches.update({l
for l
in leaves
if test.match(l)})
444 skipset = frozenset(args.ignore_leaves)
445 removed_leaves = infos[
'old'][
'leaves'] - infos[
'new'][
'leaves']
446 added_leaves = infos[
'new'][
'leaves'] - infos[
'old'][
'leaves']
448 if args.branches_of_interest:
449 removed_leaves = filter_branches(removed_leaves)
450 added_leaves = filter_branches(added_leaves)
452 removed_leaves = {l
for l
in removed_leaves
if not skip_leaf(l, skipset)}
453 added_leaves = {l
for l
in added_leaves
if not skip_leaf(l, skipset)}
456 removed_leaves_list =
list(removed_leaves)
457 removed_leaves_list.sort()
458 if args.exact_branches:
459 msg.error(
'the following variables exist only in the old file !')
460 for l
in removed_leaves_list:
461 msg.error(
' - [%s]', l)
463 msg.warning(
'the following variables exist only in the old file !')
464 for l
in removed_leaves_list:
465 msg.warning(
' - [%s]', l)
467 added_leaves_list =
list(added_leaves)
468 added_leaves_list.sort()
469 if args.exact_branches:
470 msg.error(
'the following variables exist only in the new file !')
471 for l
in added_leaves_list:
472 msg.error(
' - [%s]', l)
474 msg.warning(
'the following variables exist only in the new file !')
475 for l
in added_leaves_list:
476 msg.warning(
' - [%s]', l)
479 skip_leaves = [ l.rstrip(
'.')
for l
in removed_leaves | added_leaves |
set(args.ignore_leaves) ]
480 for l
in skip_leaves:
481 msg.debug(
'skipping [%s]', l)
482 skip_leaves = frozenset (skip_leaves)
484 if isinstance(fold.obj, root.TTree):
485 oldBranches =
set(b.GetName().rstrip(
'\0')
for b
in fold.tree.GetListOfBranches())
486 elif isinstance(fold.obj, RNTupleReader):
487 oldBranches = {f.GetFieldName()
for f
in fold.obj.GetDescriptor().GetTopLevelFields()}
488 if isinstance(fnew.obj, root.TTree):
489 newBranches =
set(b.GetName().rstrip(
'\0')
for b
in fnew.tree.GetListOfBranches())
490 elif isinstance(fnew.obj, root.RNTupleReader):
491 newBranches = {f.GetFieldName()
for f
in fnew.obj.GetDescriptor().GetTopLevelFields()}
492 branches = oldBranches & newBranches
494 if args.branches_of_interest:
495 branches_of_interest = args.branches_of_interest
498 for regex
in branches_of_interest:
499 test = re.compile(regex)
500 if not {l
for l
in infos[
'new'][
'leaves']
if test.match(l)}:
501 msg.error(f
'no match in new file for branch of interest: {regex}')
504 for branch_of_interest
in branches_of_interest:
506 r = re.compile(branch_of_interest)
507 BOI_matches.update(
filter(r.match, branches))
511 if len(BOI_matches)<1:
512 msg.error(
'No matching branches found in both files for supplied branches of interest, quitting.')
514 msg.info(
'only the following branches of interest will be compared: ')
515 for l
in BOI_matches:
516 msg.info(
' - [%s]', l)
517 branches = BOI_matches
519 msg.info(
'comparing [%s] leaves over entries...', len(infos[
'old'][
'leaves'] & infos[
'new'][
'leaves']))
522 if args.exact_branches:
523 n_bad += len(removed_leaves) + len(added_leaves)
525 summary = collections.defaultdict(int)
527 def get_event_range(entry):
530 if isinstance(entry, str):
534 vals = entry.split(
':')
535 smin =
int(vals[0])
if len(vals) > 0
and vals[0].isdigit()
else 0
536 smax =
int(vals[1])
if len(vals) > 1
and vals[1].isdigit()
else None
538 elif entry.isdigit():
540 smax =
int(entry)
if int(entry) > 0
else None
542 elif isinstance(entry, int):
544 smax = entry
if entry > 0
else None
547 msg.warning(f
"Unknown entries argument {entry}, will compare all events...")
548 msg.debug(f
"Event slice is parsed as [{smin},{smax}]")
552 smin, smax = get_event_range(itr_entries)
553 msg.debug(
"Indices/Event Numbers of old events ...")
554 idx_old = ordered_indices(fold.obj)[smin:smax]
555 msg.debug(
"Indices/Event Numbers of new events ...")
556 idx_new = ordered_indices(fnew.obj)[smin:smax]
557 itr_entries_old, event_numbers_old =
list(map(list,zip(*idx_old)))
558 itr_entries_new, event_numbers_new =
list(map(list,zip(*idx_new)))
559 msg.debug(f
"List of old indices {itr_entries_old}")
560 msg.debug(f
"List of new indices {itr_entries_new}")
561 msg.debug(f
"List of old events {event_numbers_old}")
562 msg.debug(f
"List of new events {event_numbers_new}")
563 if event_numbers_old != event_numbers_new:
564 msg.error(
'Events differ, quitting!')
565 msg.error(f
"List of old events {event_numbers_old}")
566 msg.error(f
"List of new events {event_numbers_new}")
569 itr_entries_old = itr_entries
570 itr_entries_new = itr_entries
572 branches =
sorted(branches)
573 old_dump_iter = fold.dump(args.tree_name, itr_entries_old, branches,
True,
False)
574 new_dump_iter = fnew.dump(args.tree_name, itr_entries_new, branches,
True,
False)
578 def leafname_fromdump(entry):
582 return '.'.
join(s
for s
in entry[2]
if not s.isdigit())
584 def elindices_fromdump(entry):
588 return [
int(s)
for s
in entry[2]
if s.isdigit()]
590 def reach_next(dump_iter, skip_leaves, skip_dict, leaves_prefix=None):
594 entry =
next(dump_iter)
595 except StopIteration:
598 entry2_orig = entry[2][0]
599 if isinstance(fold.obj, root.TTree):
600 entry[2][0] = entry[2][0].rstrip(
'.\0')
601 elif isinstance(fold.obj, RNTupleReader):
602 entry[2][0] = entry[2][0].rstrip(
':')
604 entry[2][0] = entry[2][0].
replace(leaves_prefix,
'')
614 skip = skip_dict.setdefault (entry2_orig, entry[1])
615 if skip > 0
and skip != entry[1]:
619 if not skip_leaf(leafname_fromdump(entry), skip_leaves):
621 skip_dict[entry2_orig] = -1
622 msg.debug(
'SKIP: {}'.
format(leafname_fromdump(entry)))
632 d_old = reach_next(old_dump_iter, skip_leaves, old_skip_dict, args.leaves_prefix)
634 d_new = reach_next(new_dump_iter, skip_leaves, new_skip_dict, args.leaves_prefix)
636 if not d_new
and not d_old:
642 if (args.order_trees
and d_old
and d_new
and d_old[2:] == d_new[2:])
or d_old == d_new:
647 tree_name, ientry, iname, iold = d_old
649 msg.debug(
"try to delete 'ientry', 'iname', 'iold'")
650 try: del ientry, iname, iold
651 except NameError:
pass
653 tree_name, jentry, jname, inew = d_new
655 msg.debug(
"try to delete 'jentry', 'jname', 'inew'")
656 try: del jentry, jname, inew
657 except NameError:
pass
661 if jname[-1]
in args.known_hacks:
664 summary[leafname_fromdump(d_new)] += 1
669 if iname[-1]
in args.known_hacks:
672 summary[leafname_fromdump(d_old)] += 1
676 idiff = _vecdiff (iold, inew, args.nan_equal)
683 iname.insert(-1,
str(idiff))
684 jname.insert(-1,
str(idiff))
688 if all([isinstance(x,Real)
and isnan(x)
for x
in [iold,inew]]):
693 if iname[-1]
in args.known_hacks
or jname[-1]
in args.known_hacks:
700 id_old = dict(idx_old)[ientry]
701 id_new = dict(idx_new)[jentry]
706 if not args.order_trees:
707 in_synch = d_old
and d_new
and d_old[:-1] == d_new[:-1]
709 in_synch = d_old
and d_new
and d_old[0] == d_new[0]
and d_old[2] == d_new[2]
and id_old == id_new
713 msg.info(
'::sync-old %s',
'.'.
join([
"%03i"%ientry]+d_old[2]))
715 msg.info(
'::sync-old ABSENT')
717 msg.info(
'::sync-new %s',
'.'.
join([
"%03i"%jentry]+d_new[2]))
719 msg.info(
'::sync-new ABSENT')
724 summary[leafname_fromdump(d_new)] += 1
727 summary[leafname_fromdump(d_old)] += 1
729 branch_old = f
"{id_old}.{d_old[2][0]}"
730 branch_new = f
"{id_new}.{d_new[2][0]}"
731 leaf_old = leafname_fromdump(d_old)
732 leaf_new = leafname_fromdump(d_new)
733 indices_old = elindices_fromdump(d_old)
734 indices_new = elindices_fromdump(d_new)
740 if branch_old > branch_new:
742 elif branch_old < branch_new:
745 if leaf_old > leaf_new:
747 elif leaf_old < leaf_new:
749 elif indices_old
and indices_new
and len(indices_old) == len(indices_new):
750 if indices_old > indices_new:
752 elif indices_old < indices_new:
756 if read_old
and not read_new:
758 msg.info(
'::sync-old skipping entry')
760 summary[leaf_old] += 1
761 elif read_new
and not read_old:
763 msg.info(
'::sync-new skipping entry')
765 summary[leaf_new] += 1
767 msg.error(
'::sync attempt failed, bailing out...')
768 msg.error(f
"::sync-old Leaf vs Index : {leaf_old} vs {indices_old}")
769 msg.error(f
"::sync-new Leaf vs Index : {leaf_new} vs {indices_new}")
772 summary[leaf_old] += 1
773 summary[leaf_new] += 1
777 msg.info(
'*** exit on first error ***')
781 if not args.order_trees:
782 n =
'.'.
join([
"%03i"%ientry]+iname)
784 n =
'.'.
join([
"%03i"%ientry]+iname+[
"%03i"%jentry]+jname)
787 diff_value = 50.*(iold-inew)/(iold+inew)
788 diff_value =
'%.8f%%' % (diff_value,)
792 msg.info(
'%s %r -> %r => diff= [%s]', n, iold, inew, diff_value)
794 summary[leafname_fromdump(d_old)] += 1
796 if iname[0]
in args.enforce_leaves
or jname[0]
in args.enforce_leaves:
797 msg.info(
"don't compare further")
801 msg.info(
'Found [%s] identical leaves', n_good)
802 msg.info(
'Found [%s] different leaves', n_bad)
805 keys =
sorted(summary.keys())
808 msg.info(
' [%s]: %i leaves differ', n, v)
812 if (
not fold.allgood)
or (
not fnew.allgood):
813 msg.error(
'NOTE: there were errors during the dump')
814 msg.info(
'fold.allgood: %s' , fold.allgood)
815 msg.info(
'fnew.allgood: %s' , fnew.allgood)
819 if (isinstance(fold.obj, root.TTree)
and isinstance(fnew.obj, root.TTree)
or
820 isinstance(fold.obj, RNTupleReader)
and isinstance(fnew.obj, RNTupleReader)):
821 ndiff = diff_obj(fold, fnew, args)
823 raise NotImplementedError(
"Cannot compare object of type=%s to object of type=%s" % (
type(fold.obj),
type(fnew.obj)))
825 msg.error(
'files differ!')
827 msg.info(
'all good.')