143 """diff two ROOT files (containers and sizes)"""
149 gc.set_threshold (100000)
151 import PyUtils.RootUtils
as ru
152 root = ru.import_root()
154 RNTupleReader = root.RNTupleReader
155 except AttributeError:
156 RNTupleReader = root.Experimental.RNTupleReader
159 if 'AtlasProject' in environ
and environ[
'AtlasProject'] ==
'Athena':
160 root.xAOD.Init().ignore()
161 root.xAOD.ParticleContainer_v1
162 root.xAOD.DiTauJetContainer_v1
164 import PyUtils.Logging
as L
165 msg = L.logging.getLogger(
'diff-root')
167 msg.setLevel(L.logging.VERBOSE)
169 msg.setLevel(L.logging.INFO)
171 from PyUtils.Helpers
import ShutUp
173 if args.entries ==
'':
176 msg.info(
'comparing tree [%s] in files:', args.tree_name)
177 msg.info(
' old: [%s]', args.old)
178 msg.info(
' new: [%s]', args.new)
179 msg.info(
'branches of interest: %s', args.branches_of_interest)
180 msg.info(
'ignore leaves: %s', args.ignore_leaves)
181 msg.info(
'enforce leaves: %s', args.enforce_leaves)
182 msg.info(
'leaves prefix: %s', args.leaves_prefix)
183 msg.info(
'hacks: %s', args.known_hacks)
184 msg.info(
'entries: %s', args.entries)
185 msg.info(
'mode: %s', args.mode)
186 msg.info(
'error mode: %s', args.error_mode)
187 msg.info(
'order trees: %s', args.order_trees)
188 msg.info(
'exact branches: %s', args.exact_branches)
190 import PyUtils.Helpers
as H
192 fold = ru.RootFileDumper(args.old, args.tree_name)
193 fnew = ru.RootFileDumper(args.new, args.tree_name)
196 def obj_info(obj, args):
197 if isinstance(obj, root.TTree):
198 return _tree_info(obj, args)
199 elif isinstance(obj, RNTupleReader):
200 with H.ShutUp(filters=[
r'.+RuntimeWarning: class "[\w:]+" has no virtual destructor']):
201 return _reader_info(obj, args)
203 raise NotImplementedError(f
"'obj_info' not implemented for object of {type(obj)=}")
205 def _tree_info(tree, args):
206 nentries = tree.GetEntriesFast()
208 leaves = [l.GetBranch().GetName()
for l
in tree.GetListOfLeaves()
209 if l.GetBranch().GetName()
not in args.ignore_leaves]
210 if args.leaves_prefix:
211 leaves = [l.replace(args.leaves_prefix,
'')
for l
in leaves]
214 'leaves':
set(leaves),
217 def _reader_info(reader, args):
218 nentries = reader.GetNEntries()
220 RFieldVisitor = root.Detail.RFieldVisitor
221 except AttributeError:
222 RFieldVisitor = root.Experimental.Detail.RFieldVisitor
223 class NameVisitor(RFieldVisitor):
224 def __init__(self, names):
227 def VisitField(self, field):
228 if field.GetFieldName()[0] ==
'_':
230 self.names.append(field.GetQualifiedFieldName())
233 subFields = field.GetConstSubfields()
234 except AttributeError:
235 subFields = field.GetSubFields()
239 f.AcceptVisitor(self)
240 def VisitFieldZero(self, field):
243 subFields = field.GetConstSubfields()
244 except AttributeError:
245 subFields = field.GetSubFields()
249 f.AcceptVisitor(self)
251 fieldZero = reader.GetModel().GetConstFieldZero()
253 visitor = NameVisitor(names)
254 fieldZero.AcceptVisitor(visitor)
255 leaves = visitor.names
256 leaves = [l
for l
in leaves
if l
not in args.ignore_leaves]
257 if args.leaves_prefix:
258 leaves = [l.replace(args.leaves_prefix,
'')
for l
in leaves]
261 'leaves':
set(leaves),
264 def ordered_indices(obj, reverse_order=False):
265 if isinstance(obj, root.TTree):
266 return _tree_ordered_indices(obj, reverse_order)
267 elif isinstance(obj, RNTupleReader):
268 return _reader_ordered_indices(obj, reverse_order)
270 raise NotImplementedError(f
"'ordered_indices' not implemented for object of {type(obj)=}")
272 def _reader_ordered_indices(reader, reverse_order=False):
276 nevts = reader.GetNEntries()
278 eiDict = {(): [
'EventInfoAuxDyn:eventNumber'],
279 (
'eventNumber',): [
'EventInfoAux:',
281 'xAOD::EventAuxInfo_v3_EventInfoAux:',
282 'xAOD::EventAuxInfo_v2_EventInfoAux:',
283 'xAOD::EventAuxInfo_v1_EventInfoAux:',
284 'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux:',
285 'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux:',
286 'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux:'],
287 (
'm_event_ID',
'm_event_number'): [
'McEventInfo',
288 'ByteStreamEventInfo',
289 'EventInfo_p4_McEventInfo',
290 'EventInfo_p4_ByteStreamEventInfo']}
293 """Find the relevant attributes for reading the event number"""
295 kInvalidDescriptorId = root.kInvalidDescriptorId
296 except AttributeError:
297 kInvalidDescriptorId = root.Experimental.kInvalidDescriptorId
298 for path, names in eiDict.items():
300 if (fieldId := reader.GetDescriptor().FindFieldId(name)) != kInvalidDescriptorId:
301 typeName = reader.GetDescriptor().GetFieldDescriptor(fieldId).GetTypeName()
302 return (name, typeName), path
306 name, attrs = find_attrs()
307 if name is None or attrs is None:
308 msg.error('Cannot read event info, will bail out.')
309 msg.error(f"Tried {name=} and attributes {attrs=}")
312 view = reader.GetView[name[1]](name[0])
313 for idx in range(nevts):
315 msg.debug('Read {} events from the input so far'.format(idx))
317 event_number = reduce(getattr, attrs, value)
318 msg.debug('Idx : EvtNum {:10d} : {}'.format(idx, event_number))
319 dict_in[idx] = event_number
321 # Sort the dictionary by event numbers
322 dict_out = dict(sorted(dict_in.items(), key=operator.itemgetter(1), reverse=reverse_order))
324 # Write out the ordered index and event number pairs
325 return list(dict_out.items())
327 def _tree_ordered_indices(tree, reverse_order=False):
328 from collections import OrderedDict
332 nevts = tree.GetEntriesFast()
334 eiDict = {'':['EventInfoAuxDyn.eventNumber'],
335 'eventNumber':['EventInfoAux.',
337 'xAOD::EventAuxInfo_v3_EventInfoAux.',
338 'xAOD::EventAuxInfo_v2_EventInfoAux.',
339 'xAOD::EventAuxInfo_v1_EventInfoAux.',
340 'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux.',
341 'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux.',
342 'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux.'],
343 'm_event_ID m_event_number':['McEventInfo',
344 'ByteStreamEventInfo',
345 'EventInfo_p4_McEventInfo',
346 'EventInfo_p4_ByteStreamEventInfo']}
349 """Find the relevant attributes for reading the event numbe
r"""
350 for ii, jj in eiDict.items():
352 if hasattr(tree, kk):
358 attr1, attr2 = find_attrs()
359 if attr1 is None or attr2 is None:
360 msg.error('Cannot read event info, will bail out.')
361 msg.error(f"Tried attributes {attr1} and {attr2}")
363 attrs = [attr1] + attr2.split()
365 tree.SetBranchStatus ('*', 0)
366 tree.SetBranchStatus (attr1, 1)
368 for idx in range(0, nevts):
370 msg.debug('Read {} events from the input so far'.format(idx))
372 event_number = reduce(getattr, attrs, tree)
373 msg.debug('Idx : EvtNum {:10d} : {}'.format(idx,event_number))
374 dict_in[idx] = event_number
376 tree.SetBranchStatus ('*', 1)
378 # Sort the dictionary by event numbers
379 dict_out = OrderedDict(sorted(dict_in.items(), key=operator.itemgetter(1), reverse = reverse_order))
381 # Write out the ordered index and event number pairs
382 return [(idx, ival) for idx, ival in dict_out.items()]
384 def diff_obj(fold, fnew, args):
387 'old' : obj_info(fold.obj, args),
388 'new' : obj_info(fnew.obj, args),
391 nentries = min(infos['old']['entries'],
392 infos['new']['entries'])
393 itr_entries = nentries
394 if args.entries in (-1,'','-1'):
395 #msg.info('comparing over [%s] entries...', nentries)
396 itr_entries = nentries
397 if infos['old']['entries'] != infos['new']['entries']:
398 msg.info('different numbers of entries:')
399 msg.info(' old: [%s]', infos['old']['entries'])
400 msg.info(' new: [%s]', infos['new']['entries'])
401 msg.info('=> comparing [%s] first entries...', nentries)
403 itr_entries = args.entries
405 msg.info('comparing over [%s] entries...', itr_entries)
408 def skip_leaf(name_from_dump, skip_leaves):
409 """ Here decide if the current leaf should be skipped.
410 Previously the matching was done based on the full or partial
411 leaf name. E.g. foo.bar.zzz would be skipped if any of the
412 following were provided:
416 * Any of the foo, bar, or zzz
417 Now, we make a regex matching such that the user doesn't
418 need to provide full branch names.
420 for pattern in skip_leaves:
422 if re.match(pattern, name_from_dump):
424 except re.error as e:
425 from traceback import format_exception
426 msg.error("Exception '%s', pattern %r, line %s, column %s\n%s",
427 e, e.pattern, e.lineno, e.colno, "".join(format_exception(e)))
433 def skip_leaf_entry(entry2, skip_leaves):
434 leafname = '.'.join(s for s in entry2 if not s.isdigit())
435 return skip_leaf (leafname, skip_leaves)
437 def filter_branches(leaves):
439 for regex in args.branches_of_interest:
440 test = re.compile(regex)
441 matches.update({l for l in leaves if test.match(l)})
444 skipset = frozenset(args.ignore_leaves)
445 removed_leaves = infos['old']['leaves'] - infos['new']['leaves']
446 added_leaves = infos['new']['leaves'] - infos['old']['leaves']
448 if args.branches_of_interest:
449 removed_leaves = filter_branches(removed_leaves)
450 added_leaves = filter_branches(added_leaves)
452 removed_leaves = {l for l in removed_leaves if not skip_leaf(l, skipset)}
453 added_leaves = {l for l in added_leaves if not skip_leaf(l, skipset)}
456 removed_leaves_list = list(removed_leaves)
457 removed_leaves_list.sort()
458 if args.exact_branches:
459 msg.error('the following variables exist only in the old file !')
460 for l in removed_leaves_list:
461 msg.error(' - [%s]', l)
463 msg.warning('the following variables exist only in the old file !')
464 for l in removed_leaves_list:
465 msg.warning(' - [%s]', l)
467 added_leaves_list = list(added_leaves)
468 added_leaves_list.sort()
469 if args.exact_branches:
470 msg.error('the following variables exist only in the new file !')
471 for l in added_leaves_list:
472 msg.error(' - [%s]', l)
474 msg.warning('the following variables exist only in the new file !')
475 for l in added_leaves_list:
476 msg.warning(' - [%s]', l)
478 # need to remove trailing dots as they confuse reach_next()?
479 skip_leaves = [ l.rstrip('.') for l in removed_leaves | added_leaves | set(args.ignore_leaves) ]
480 for l in skip_leaves:
481 msg.debug('skipping [%s]', l)
482 skip_leaves = frozenset (skip_leaves)
484 if isinstance(fold.obj, root.TTree):
485 oldBranches = set(b.GetName().rstrip('\0') for b in fold.tree.GetListOfBranches())
486 elif isinstance(fold.obj, RNTupleReader):
487 oldBranches = {f.GetFieldName() for f in fold.obj.GetDescriptor().GetTopLevelFields()}
488 if isinstance(fnew.obj, root.TTree):
489 newBranches = set(b.GetName().rstrip('\0') for b in fnew.tree.GetListOfBranches())
490 elif isinstance(fnew.obj, root.RNTupleReader):
491 newBranches = {f.GetFieldName() for f in fnew.obj.GetDescriptor().GetTopLevelFields()}
492 branches = oldBranches & newBranches
494 if args.branches_of_interest:
495 branches_of_interest = args.branches_of_interest
497 # check that all branches of interest exist in the new file
498 for regex in branches_of_interest:
499 test = re.compile(regex)
500 if not {l for l in infos['new']['leaves'] if test.match(l)}:
501 msg.error(f'no match in new file for branch of interest: {regex}')
504 for branch_of_interest in branches_of_interest:
506 r = re.compile(branch_of_interest)
507 BOI_matches.update(filter(r.match, branches))
511 if len(BOI_matches)<1:
512 msg.error('No matching branches found in both files for supplied branches of interest, quitting.')
514 msg.info('only the following branches of interest will be compared: ')
515 for l in BOI_matches:
516 msg.info(' - [%s]', l)
517 branches = BOI_matches
519 msg.info('comparing [%s] leaves over entries...', len(infos['old']['leaves'] & infos['new']['leaves']))
522 if args.exact_branches:
523 n_bad += len(removed_leaves) + len(added_leaves)
525 summary = collections.defaultdict(int)
527 def get_event_range(entry):
530 if isinstance(entry, str):
531 # We support three main cases in this format: 5:10 (5th to 10th),
532 # 5: (5th to the end), and :5 (from the start to 5th)
534 vals = entry.split(':')
535 smin = int(vals[0]) if len(vals) > 0 and vals[0].isdigit() else 0
536 smax = int(vals[1]) if len(vals) > 1 and vals[1].isdigit() else None
537 # This is the case where the user inputs the total number of events
538 elif entry.isdigit():
540 smax = int(entry) if int(entry) > 0 else None
541 # Handle the case where the input is a number (i.e. default)
542 elif isinstance(entry, int):
544 smax = entry if entry > 0 else None
545 # If we come across an unhandled case, bail out
547 msg.warning(f"Unknown entries argument {entry}, will compare all events...")
548 msg.debug(f"Event slice is parsed as [{smin},{smax}]")
552 smin, smax = get_event_range(itr_entries)
553 msg.debug("Indices/Event Numbers of old events ...")
554 idx_old = ordered_indices(fold.obj)[smin:smax]
555 msg.debug("Indices/Event Numbers of new events ...")
556 idx_new = ordered_indices(fnew.obj)[smin:smax]
557 itr_entries_old, event_numbers_old = list(map(list,zip(*idx_old)))
558 itr_entries_new, event_numbers_new = list(map(list,zip(*idx_new)))
559 msg.debug(f"List of old indices {itr_entries_old}")
560 msg.debug(f"List of new indices {itr_entries_new}")
561 msg.debug(f"List of old events {event_numbers_old}")
562 msg.debug(f"List of new events {event_numbers_new}")
563 if event_numbers_old != event_numbers_new:
564 msg.error('Events differ, quitting!')
565 msg.error(f"List of old events {event_numbers_old}")
566 msg.error(f"List of new events {event_numbers_new}")
569 itr_entries_old = itr_entries
570 itr_entries_new = itr_entries
572 branches = sorted(branches)
573 old_dump_iter = fold.dump(args.tree_name, itr_entries_old, branches, True, False)
574 new_dump_iter = fnew.dump(args.tree_name, itr_entries_new, branches, True, False)
578 def leafname_fromdump(entry):
582 return '.'.join(s for s in entry[2] if not s.isdigit())
584 def elindices_fromdump(entry):
588 return [int(s) for s in entry[2] if s.isdigit()]
590 def reach_next(dump_iter, skip_leaves, skip_dict, leaves_prefix=None):
594 entry = next(dump_iter)
595 except StopIteration:
598 # entry is a tuple with these contents:
599 # entry[0]: Name of the tree being read
600 # entry[1]: Index of the current event in the tree
601 # entry[2]: Leaf being dumped, split at periods.
602 # So for a variable of an xAOD object, entry[2][0]
603 # is the aux container name (ending in 'Aux.') and
604 # entry[2][1] is the variable name.
605 # entry[3]: The value of the variable.
606 # For an xAOD variable, this will typically be a std::vector.
608 #if entry[2][0].find('xAOD::TrackParticleAuxContainer_v5_HLT_xAOD__TrackParticleContainer_InDetTrigTrackingxAODCnv_Bphysics_FTFAux') >= 0:
609 # print ('aaa', entry)
611 entry2_orig = entry[2][0]
612 if isinstance(fold.obj, root.TTree):
613 entry[2][0] = entry[2][0].rstrip('.\0') # clean branch name
614 elif isinstance(fold.obj, RNTupleReader):
615 entry[2][0] = entry[2][0].rstrip(':') # clean branch name
617 entry[2][0] = entry[2][0].replace(leaves_prefix, '')
619 # Check whether we should skip this leaf, memoizing
621 # Earlier we tried to do this by looking at only the first
622 # element of entry[2]. On the first event we would record
623 # whether any leaves were skipped for this branch, so on
624 # subsequent events we can quickly test that no leaves
625 # were skipped. However, that turned out not to work
626 # because for xAOD variables that are vectors (and are
627 # thus stored as nested vectors), we don't see them at all
628 # if the container is empty.
629 # So instead just memoize the result of skip_leaf.
630 leafname = leafname_fromdump(entry)
631 skip = skip_dict.setdefault (entry2_orig, None)
633 skip = skip_leaf(leafname, skip_leaves)
636 msg.debug('SKIP: {}'.format(leafname))
646 d_old = reach_next(old_dump_iter, skip_leaves, old_skip_dict, args.leaves_prefix)
648 d_new = reach_next(new_dump_iter, skip_leaves, new_skip_dict, args.leaves_prefix)
650 if not d_new and not d_old:
656 if (args.order_trees and d_old and d_new and d_old[2:] == d_new[2:]) or d_old == d_new:
661 tree_name, ientry, iname, iold = d_old
663 msg.debug("try to delete 'ientry', 'iname', 'iold'")
664 try: del ientry, iname, iold
665 except NameError: pass
667 tree_name, jentry, jname, inew = d_new
669 msg.debug("try to delete 'jentry', 'jname', 'inew'")
670 try: del jentry, jname, inew
671 except NameError: pass
674 # FIXME: that's a plain (temporary?) hack
675 if jname[-1] in args.known_hacks:
678 summary[leafname_fromdump(d_new)] += 1
682 # FIXME: that's a plain (temporary?) hack
683 if iname[-1] in args.known_hacks:
686 summary[leafname_fromdump(d_old)] += 1
690 idiff = _vecdiff (iold, inew, args.nan_equal)
697 iname.insert(-1, str(idiff))
698 jname.insert(-1, str(idiff))
700 # for regression testing we should have NAN == NAN
702 if all([isinstance(x,Real) and isnan(x) for x in [iold,inew]]):
706 # FIXME: that's a plain (temporary?) hack
707 if iname[-1] in args.known_hacks or jname[-1] in args.known_hacks:
712 # Identifiers are event numbers if we're ordering the trees, otherwise tree indices
714 id_old = dict(idx_old)[ientry]
715 id_new = dict(idx_new)[jentry]
720 if not args.order_trees:
721 in_synch = d_old and d_new and d_old[:-1] == d_new[:-1]
723 in_synch = d_old and d_new and d_old[0] == d_new[0] and d_old[2] == d_new[2] and id_old == id_new
725 if _is_detailed(args):
727 msg.info('::sync-old %s','.'.join(["%03i"%ientry]+d_old[2]))
729 msg.info('::sync-old ABSENT')
731 msg.info('::sync-new %s','.'.join(["%03i"%jentry]+d_new[2]))
733 msg.info('::sync-new ABSENT')
738 summary[leafname_fromdump(d_new)] += 1
741 summary[leafname_fromdump(d_old)] += 1
743 branch_old = f"{id_old}.{d_old[2][0]}"
744 branch_new = f"{id_new}.{d_new[2][0]}"
745 leaf_old = leafname_fromdump(d_old)
746 leaf_new = leafname_fromdump(d_new)
747 indices_old = elindices_fromdump(d_old)
748 indices_new = elindices_fromdump(d_new)
749 # Branches/Leaves are alphabetically ordered
750 # If we're out-of-sync, we try to figure out
751 # if we should advance the old or the new branch
752 # For same branches, we look at the full leaf name
753 # If that fails we look at the indices
754 if branch_old > branch_new:
756 elif branch_old < branch_new:
759 if leaf_old > leaf_new:
761 elif leaf_old < leaf_new:
763 elif indices_old and indices_new and len(indices_old) == len(indices_new):
764 if indices_old > indices_new:
766 elif indices_old < indices_new:
768 # Let's see if we can reconcile
769 # If not, just bail out to avoid false positivies
770 if read_old and not read_new:
771 if _is_detailed(args):
772 msg.info('::sync-old skipping entry')
774 summary[leaf_old] += 1
775 elif read_new and not read_old:
776 if _is_detailed(args):
777 msg.info('::sync-new skipping entry')
779 summary[leaf_new] += 1
781 msg.error('::sync attempt failed, bailing out...')
782 msg.error(f"::sync-old Leaf vs Index : {leaf_old} vs {indices_old}")
783 msg.error(f"::sync-new Leaf vs Index : {leaf_new} vs {indices_new}")
786 summary[leaf_old] += 1
787 summary[leaf_new] += 1
790 if _is_exit_early(args):
791 msg.info('*** exit on first error ***')
795 if not args.order_trees:
796 n = '.'.join(["%03i"%ientry]+iname)
798 n = '.'.join(["%03i"%ientry]+iname+["%03i"%jentry]+jname)
801 diff_value = 50.*(iold-inew)/(iold+inew)
802 diff_value = '%.8f%%' % (diff_value,)
805 if _is_detailed(args):
806 msg.info('%s %r -> %r => diff= [%s]', n, iold, inew, diff_value)
808 summary[leafname_fromdump(d_old)] += 1
810 if iname[0] in args.enforce_leaves or jname[0] in args.enforce_leaves:
811 msg.info("don't compare further")
813 pass # loop over events/branches
815 msg.info('Found [%s] identical leaves', n_good)
816 msg.info('Found [%s] different leaves', n_bad)
818 if not _is_summary(args):
819 keys = sorted(summary.keys())
822 msg.info(' [%s]: %i leaves differ', n, v)
826 if (not fold.allgood) or (not fnew.allgood):
827 msg.error('NOTE: there were errors during the dump')
828 msg.info('fold.allgood: %s' , fold.allgood)
829 msg.info('fnew.allgood: %s' , fnew.allgood)
833 if (isinstance(fold.obj, root.TTree) and isinstance(fnew.obj, root.TTree) or
834 isinstance(fold.obj, RNTupleReader) and isinstance(fnew.obj, RNTupleReader)):
835 ndiff = diff_obj(fold, fnew, args)
837 raise NotImplementedError("Cannot compare object of type=%s to object of type=%s" % (type(fold.obj), type(fnew.obj)))
839 msg.error('files differ!')
841 msg.info('all good.')