ATLAS Offline Software
Functions | Variables
python.scripts.diff_root_files Namespace Reference

Functions

def _is_detailed (args)
 classes ----------------------------------------------------------------— More...
 
def _is_summary (args)
 
def _is_exit_early (args)
 
def _vecdiff (v1, v2, nan_equal)
 
def main (args)
 

Variables

 __doc__
 
 __author__
 
 g_ALLOWED_MODES
 globals ----------------------------------------------------------------— More...
 
 g_ALLOWED_ERROR_MODES
 
 _vectypes
 
 names
 

Function Documentation

◆ _is_detailed()

def python.scripts.diff_root_files._is_detailed (   args)
private

classes ----------------------------------------------------------------—

functions --------------------------------------------------------------—

Definition at line 26 of file diff_root_files.py.

26 def _is_detailed(args):
27  return args.mode == 'detailed'
28 

◆ _is_exit_early()

def python.scripts.diff_root_files._is_exit_early (   args)
private

Definition at line 32 of file diff_root_files.py.

32 def _is_exit_early(args):
33  return args.error_mode == 'bailout'
34 
35 # Possibly compare two vectors. If nan_equal, then consider NaNs to be equal.
36 # Returns None if we have two matching vectors.
37 # If we have two vectors that differ at some element, return that index.
38 # Otherwise return -1 (inputs not vectors, etc).

◆ _is_summary()

def python.scripts.diff_root_files._is_summary (   args)
private

Definition at line 29 of file diff_root_files.py.

29 def _is_summary(args):
30  return args.mode == 'summary'
31 

◆ _vecdiff()

def python.scripts.diff_root_files._vecdiff (   v1,
  v2,
  nan_equal 
)
private

Definition at line 52 of file diff_root_files.py.

52 def _vecdiff (v1, v2, nan_equal):
53  if getattr(type(type(v1)), '__cpp_name__', None) not in _vectypes:
54  return -1
55  if type(v1) is not type(v2): return -1
56  sz = v1.size()
57  if sz != v2.size(): return -1
58  if nan_equal:
59  isnan_ = isnan
60  for i in range (sz):
61  val1 = v1[i]
62  val2 = v2[i]
63  if val1 != val2 and not all(
64  [isinstance(_, Real) and isnan_(_) for _ in (val1, val2)]):
65  return i
66  else:
67  for i in range (sz):
68  if v1[i] != v2[i]:
69  return i
70  return None
71 
72 @acmdlib.command(name='diff-root')
73 @acmdlib.argument('old',
74  help='path to the reference ROOT file to analyze')
75 @acmdlib.argument('new',
76  help='path to the ROOT file to compare to the reference')
77 @acmdlib.argument('-t', '--tree-name',
78  default=None,
79  help='name of the TTree or RNTuple to compare')
80 @acmdlib.argument('--branches-of-interest',
81  nargs='+',
82  default=set(),
83  help='set of regex matching names of branches to compare; assumes all if none specified.')
84 @acmdlib.argument('--ignore-leaves',
85  nargs='+',
86  default=('Token', 'index_ref', r'(.*)_timings\.(.*)', r'(.*)_mems\.(.*)', r'(.*)TrigCostContainer(.*)'),
87  help='set of leaves names to ignore from comparison; can be a branch name or a partial leaf name (accepts regex)')
88 @acmdlib.argument('--enforce-leaves',
89  nargs='+',
90  default=('BCID',),
91  help='set of leaves names we make sure to compare')
92 @acmdlib.argument('--leaves-prefix',
93  default='',
94  help='Remove prefix value from all leaves')
95 @acmdlib.argument('--known-hacks',
96  nargs='+',
97  default=('m_athenabarcode', 'm_token',),
98  help='set of leaves which are known to fail (but should be fixed at some point) [default: %(default)s]')
99 @acmdlib.argument('--entries',
100  default='',
101  help='a list of entries (indices, not event numbers) or an expression (like range(3) or 0,2,1 or 0:3) leading to such a list, to compare.')
102 @acmdlib.argument('-v', '--verbose',
103  action='store_true',
104  default=False,
105  help="""Enable verbose printout""")
106 @acmdlib.argument('--order-trees',
107  action='store_true',
108  default=False,
109  help="""To order trees according to event numbers""")
110 @acmdlib.argument('--exact-branches',
111  action='store_true',
112  default=False,
113  help="""Only allow exact list of branches present""")
114 @acmdlib.argument('--mode',
115  choices=g_ALLOWED_MODES,
116  default='detailed',
117  help="""\
118 Enable a particular mode.
119  'summary': only report the number of differences.
120  'semi-detailed': report the number of differences and the leaves that differ.
121  'detailed': display everything.
122 default='%(default)s'.
123 allowed: %(choices)s
124 """
125  )
126 @acmdlib.argument('--error-mode',
127  choices=g_ALLOWED_ERROR_MODES,
128  default='bailout',
129  help="""\
130 Enable a particular error mode.
131  'bailout': bail out on first error.
132  'resilient': keep running.
133 default='%(default)s'.
134 allowed: %(choices)s
135 """
136  )
137 @acmdlib.argument('--nan-equal',
138  action='store_true',
139  default=False,
140  help="""Compare nan as equal to nan""")
141 

◆ main()

def python.scripts.diff_root_files.main (   args)
diff two ROOT files (containers and sizes)

Definition at line 142 of file diff_root_files.py.

142 def main(args):
143  """diff two ROOT files (containers and sizes)"""
144 
145  # We allocate many python objects at once.
146  # Running GC less often by jacking up the threshold speeds things up
147  # considerably.
148  import gc
149  gc.set_threshold (100000)
150 
151  import PyUtils.RootUtils as ru
152  root = ru.import_root() # noqa: F841
153  try:
154  RNTupleReader = root.RNTupleReader
155  except AttributeError:
156  RNTupleReader = root.Experimental.RNTupleReader
157 
158  # Force load some dictionaries to work around ATLASRECTS-6261/ROOT-10940/ATEAM-942
159  if 'AtlasProject' in environ and environ['AtlasProject'] == 'Athena':
160  root.xAOD.Init().ignore()
161  root.xAOD.ParticleContainer_v1
162  root.xAOD.DiTauJetContainer_v1
163 
164  import PyUtils.Logging as L
165  msg = L.logging.getLogger('diff-root')
166  if args.verbose:
167  msg.setLevel(L.logging.VERBOSE)
168  else:
169  msg.setLevel(L.logging.INFO)
170 
171  from PyUtils.Helpers import ShutUp # noqa: F401
172 
173  if args.entries == '':
174  args.entries = -1
175 
176  msg.info('comparing tree [%s] in files:', args.tree_name)
177  msg.info(' old: [%s]', args.old)
178  msg.info(' new: [%s]', args.new)
179  msg.info('branches of interest: %s', args.branches_of_interest)
180  msg.info('ignore leaves: %s', args.ignore_leaves)
181  msg.info('enforce leaves: %s', args.enforce_leaves)
182  msg.info('leaves prefix: %s', args.leaves_prefix)
183  msg.info('hacks: %s', args.known_hacks)
184  msg.info('entries: %s', args.entries)
185  msg.info('mode: %s', args.mode)
186  msg.info('error mode: %s', args.error_mode)
187  msg.info('order trees: %s', args.order_trees)
188  msg.info('exact branches: %s', args.exact_branches)
189 
190  import PyUtils.Helpers as H
191  with H.ShutUp() :
192  fold = ru.RootFileDumper(args.old, args.tree_name)
193  fnew = ru.RootFileDumper(args.new, args.tree_name)
194  pass
195 
196  def obj_info(obj, args):
197  if isinstance(obj, root.TTree):
198  return _tree_info(obj, args)
199  elif isinstance(obj, RNTupleReader):
200  with H.ShutUp(filters=[r'.+RuntimeWarning: class "[\w:]+" has no virtual destructor']):
201  return _reader_info(obj, args)
202  else:
203  raise NotImplementedError(f"'obj_info' not implemented for object of {type(obj)=}")
204 
205  def _tree_info(tree, args):
206  nentries = tree.GetEntriesFast()
207  # l.GetBranch().GetName() gives the full leaf path name
208  leaves = [l.GetBranch().GetName() for l in tree.GetListOfLeaves()
209  if l.GetBranch().GetName() not in args.ignore_leaves]
210  if args.leaves_prefix:
211  leaves = [l.replace(args.leaves_prefix, '') for l in leaves]
212  return {
213  'entries': nentries,
214  'leaves': set(leaves),
215  }
216 
217  def _reader_info(reader, args):
218  nentries = reader.GetNEntries()
219  try:
220  RFieldVisitor = root.Detail.RFieldVisitor
221  except AttributeError:
222  RFieldVisitor = root.Experimental.Detail.RFieldVisitor
223  class NameVisitor(RFieldVisitor):
224  def __init__(self, names):
225  super().__init__()
226  self.names = names
227  def VisitField(self, field):
228  if field.GetFieldName()[0] == '_':
229  return
230  self.names.append(field.GetQualifiedFieldName())
231  try:
232  # ROOT Version: 6.35.01
233  subFields = field.GetConstSubfields()
234  except AttributeError:
235  subFields = field.GetSubFields()
236  for f in subFields:
237  # ROOT Version: 6.35.01
238  # for f in field.GetConstSubfields():
239  f.AcceptVisitor(self)
240  def VisitFieldZero(self, field):
241  try:
242  # ROOT Version: 6.35.01
243  subFields = field.GetConstSubfields()
244  except AttributeError:
245  subFields = field.GetSubFields()
246  for f in subFields:
247  # ROOT Version: 6.35.01
248  # for f in field.GetConstSubfields():
249  f.AcceptVisitor(self)
250 
251  fieldZero = reader.GetModel().GetConstFieldZero()
252  names = list()
253  visitor = NameVisitor(names)
254  fieldZero.AcceptVisitor(visitor)
255  leaves = visitor.names
256  leaves = [l for l in leaves if l not in args.ignore_leaves]
257  if args.leaves_prefix:
258  leaves = [l.replace(args.leaves_prefix, '') for l in leaves]
259  return {
260  'entries': nentries,
261  'leaves': set(leaves),
262  }
263 
264  def ordered_indices(obj, reverse_order=False):
265  if isinstance(obj, root.TTree):
266  return _tree_ordered_indices(obj, reverse_order)
267  elif isinstance(obj, RNTupleReader):
268  return _reader_ordered_indices(obj, reverse_order)
269  else:
270  raise NotImplementedError(f"'ordered_indices' not implemented for object of {type(obj)=}")
271 
272  def _reader_ordered_indices(reader, reverse_order=False):
273  import operator
274 
275  dict_in = {}
276  nevts = reader.GetNEntries()
277 
278  eiDict = {(): ['EventInfoAuxDyn:eventNumber'],
279  ('eventNumber',): ['EventInfoAux:',
280  'Bkg_EventInfoAux:',
281  'xAOD::EventAuxInfo_v3_EventInfoAux:',
282  'xAOD::EventAuxInfo_v2_EventInfoAux:',
283  'xAOD::EventAuxInfo_v1_EventInfoAux:',
284  'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux:',
285  'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux:',
286  'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux:'],
287  ('m_event_ID', 'm_event_number'): ['McEventInfo',
288  'ByteStreamEventInfo',
289  'EventInfo_p4_McEventInfo',
290  'EventInfo_p4_ByteStreamEventInfo']}
291 
292  def find_attrs():
293  """Find the relevant attributes for reading the event number"""
294  try:
295  kInvalidDescriptorId = root.kInvalidDescriptorId
296  except AttributeError:
297  kInvalidDescriptorId = root.Experimental.kInvalidDescriptorId
298  for path, names in eiDict.items():
299  for name in names:
300  if (fieldId := reader.GetDescriptor().FindFieldId(name)) != kInvalidDescriptorId:
301  typeName = reader.GetDescriptor().GetFieldDescriptor(fieldId).GetTypeName()
302  return (name, typeName), path
303  else:
304  return None, None
305 
306  name, attrs = find_attrs()
307  if name is None or attrs is None:
308  msg.error('Cannot read event info, will bail out.')
309  msg.error(f"Tried {name=} and attributes {attrs=}")
310  return []
311 
312  view = reader.GetView[name[1]](name[0])
313  for idx in range(nevts):
314  if idx % 100 == 0:
315  msg.debug('Read {} events from the input so far'.format(idx))
316  value = view(idx)
317  event_number = reduce(getattr, attrs, value)
318  msg.debug('Idx : EvtNum {:10d} : {}'.format(idx, event_number))
319  dict_in[idx] = event_number
320 
321  # Sort the dictionary by event numbers
322  dict_out = dict(sorted(dict_in.items(), key=operator.itemgetter(1), reverse=reverse_order))
323 
324  # Write out the ordered index and event number pairs
325  return list(dict_out.items())
326 
327  def _tree_ordered_indices(tree, reverse_order=False):
328  from collections import OrderedDict
329  import operator
330 
331  dict_in = {}
332  nevts = tree.GetEntriesFast()
333 
334  eiDict = {'':['EventInfoAuxDyn.eventNumber'],
335  'eventNumber':['EventInfoAux.',
336  'Bkg_EventInfoAux.',
337  'xAOD::EventAuxInfo_v3_EventInfoAux.',
338  'xAOD::EventAuxInfo_v2_EventInfoAux.',
339  'xAOD::EventAuxInfo_v1_EventInfoAux.',
340  'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux.',
341  'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux.',
342  'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux.'],
343  'm_event_ID m_event_number':['McEventInfo',
344  'ByteStreamEventInfo',
345  'EventInfo_p4_McEventInfo',
346  'EventInfo_p4_ByteStreamEventInfo']}
347 
348  def find_attrs():
349  """Find the relevant attributes for reading the event number"""
350  for ii, jj in eiDict.items():
351  for kk in jj:
352  if hasattr(tree, kk):
353  return kk, ii
354  else:
355  return None, None
356 
357  tree.GetEntry(0)
358  attr1, attr2 = find_attrs()
359  if attr1 is None or attr2 is None:
360  msg.error('Cannot read event info, will bail out.')
361  msg.error(f"Tried attributes {attr1} and {attr2}")
362  return []
363  attrs = [attr1] + attr2.split()
364 
365  tree.SetBranchStatus ('*', 0)
366  tree.SetBranchStatus (attr1, 1)
367 
368  for idx in range(0, nevts):
369  if idx % 100 == 0:
370  msg.debug('Read {} events from the input so far'.format(idx))
371  tree.GetEntry(idx)
372  event_number = reduce(getattr, attrs, tree)
373  msg.debug('Idx : EvtNum {:10d} : {}'.format(idx,event_number))
374  dict_in[idx] = event_number
375 
376  tree.SetBranchStatus ('*', 1)
377 
378  # Sort the dictionary by event numbers
379  dict_out = OrderedDict(sorted(dict_in.items(), key=operator.itemgetter(1), reverse = reverse_order))
380 
381  # Write out the ordered index and event number pairs
382  return [(idx, ival) for idx, ival in dict_out.items()]
383 
384  def diff_obj(fold, fnew, args):
385 
386  infos = {
387  'old' : obj_info(fold.obj, args),
388  'new' : obj_info(fnew.obj, args),
389  }
390 
391  nentries = min(infos['old']['entries'],
392  infos['new']['entries'])
393  itr_entries = nentries
394  if args.entries in (-1,'','-1'):
395  #msg.info('comparing over [%s] entries...', nentries)
396  itr_entries = nentries
397  if infos['old']['entries'] != infos['new']['entries']:
398  msg.info('different numbers of entries:')
399  msg.info(' old: [%s]', infos['old']['entries'])
400  msg.info(' new: [%s]', infos['new']['entries'])
401  msg.info('=> comparing [%s] first entries...', nentries)
402  else:
403  itr_entries = args.entries
404  pass
405  msg.info('comparing over [%s] entries...', itr_entries)
406 
407  @cache
408  def skip_leaf(name_from_dump, skip_leaves):
409  """ Here decide if the current leaf should be skipped.
410  Previously the matching was done based on the full or partial
411  leaf name. E.g. foo.bar.zzz would be skipped if any of the
412  following were provided:
413  * foo
414  * foo.bar
415  * foo.bar.zzz
416  * Any of the foo, bar, or zzz
417  Now, we make a regex matching such that the user doesn't
418  need to provide full branch names.
419  """
420  for pattern in skip_leaves:
421  try:
422  if re.match(pattern, name_from_dump):
423  return True
424  except re.error as e:
425  from traceback import format_exception
426  msg.error("Exception '%s', pattern %r, line %s, column %s\n%s",
427  e, e.pattern, e.lineno, e.colno, "".join(format_exception(e)))
428  continue
429  else:
430  return False
431 
432  @cache
433  def skip_leaf_entry(entry2, skip_leaves):
434  leafname = '.'.join(s for s in entry2 if not s.isdigit())
435  return skip_leaf (leafname, skip_leaves)
436 
437  def filter_branches(leaves):
438  matches = set()
439  for regex in args.branches_of_interest:
440  test = re.compile(regex)
441  matches.update({l for l in leaves if test.match(l)})
442  return matches
443 
444  skipset = frozenset(args.ignore_leaves)
445  removed_leaves = infos['old']['leaves'] - infos['new']['leaves']
446  added_leaves = infos['new']['leaves'] - infos['old']['leaves']
447 
448  if args.branches_of_interest:
449  removed_leaves = filter_branches(removed_leaves)
450  added_leaves = filter_branches(added_leaves)
451  else:
452  removed_leaves = {l for l in removed_leaves if not skip_leaf(l, skipset)}
453  added_leaves = {l for l in added_leaves if not skip_leaf(l, skipset)}
454 
455  if removed_leaves:
456  removed_leaves_list = list(removed_leaves)
457  removed_leaves_list.sort()
458  if args.exact_branches:
459  msg.error('the following variables exist only in the old file !')
460  for l in removed_leaves_list:
461  msg.error(' - [%s]', l)
462  else:
463  msg.warning('the following variables exist only in the old file !')
464  for l in removed_leaves_list:
465  msg.warning(' - [%s]', l)
466  if added_leaves:
467  added_leaves_list = list(added_leaves)
468  added_leaves_list.sort()
469  if args.exact_branches:
470  msg.error('the following variables exist only in the new file !')
471  for l in added_leaves_list:
472  msg.error(' - [%s]', l)
473  else:
474  msg.warning('the following variables exist only in the new file !')
475  for l in added_leaves_list:
476  msg.warning(' - [%s]', l)
477 
478  # need to remove trailing dots as they confuse reach_next()?
479  skip_leaves = [ l.rstrip('.') for l in removed_leaves | added_leaves | set(args.ignore_leaves) ]
480  for l in skip_leaves:
481  msg.debug('skipping [%s]', l)
482  skip_leaves = frozenset (skip_leaves)
483 
484  if isinstance(fold.obj, root.TTree):
485  oldBranches = set(b.GetName().rstrip('\0') for b in fold.tree.GetListOfBranches())
486  elif isinstance(fold.obj, RNTupleReader):
487  oldBranches = {f.GetFieldName() for f in fold.obj.GetDescriptor().GetTopLevelFields()}
488  if isinstance(fnew.obj, root.TTree):
489  newBranches = set(b.GetName().rstrip('\0') for b in fnew.tree.GetListOfBranches())
490  elif isinstance(fnew.obj, root.RNTupleReader):
491  newBranches = {f.GetFieldName() for f in fnew.obj.GetDescriptor().GetTopLevelFields()}
492  branches = oldBranches & newBranches
493 
494  if args.branches_of_interest:
495  branches_of_interest = args.branches_of_interest
496 
497  # check that all branches of interest exist in the new file
498  for regex in branches_of_interest:
499  test = re.compile(regex)
500  if not {l for l in infos['new']['leaves'] if test.match(l)}:
501  msg.error(f'no match in new file for branch of interest: {regex}')
502 
503  BOI_matches = set()
504  for branch_of_interest in branches_of_interest:
505  try:
506  r = re.compile(branch_of_interest)
507  BOI_matches.update(filter(r.match, branches))
508  except TypeError:
509  continue
510 
511  if len(BOI_matches)<1:
512  msg.error('No matching branches found in both files for supplied branches of interest, quitting.')
513  return 1
514  msg.info('only the following branches of interest will be compared: ')
515  for l in BOI_matches:
516  msg.info(' - [%s]', l)
517  branches = BOI_matches
518 
519  msg.info('comparing [%s] leaves over entries...', len(infos['old']['leaves'] & infos['new']['leaves']))
520  n_good = 0
521  n_bad = 0
522  if args.exact_branches:
523  n_bad += len(removed_leaves) + len(added_leaves)
524  import collections
525  summary = collections.defaultdict(int)
526 
527  def get_event_range(entry):
528  smin, smax = 0, None
529  # Parse user input
530  if isinstance(entry, str):
531  # We support three main cases in this format: 5:10 (5th to 10th),
532  # 5: (5th to the end), and :5 (from the start to 5th)
533  if ':' in entry:
534  vals = entry.split(':')
535  smin = int(vals[0]) if len(vals) > 0 and vals[0].isdigit() else 0
536  smax = int(vals[1]) if len(vals) > 1 and vals[1].isdigit() else None
537  # This is the case where the user inputs the total number of events
538  elif entry.isdigit():
539  smin = 0
540  smax = int(entry) if int(entry) > 0 else None
541  # Handle the case where the input is a number (i.e. default)
542  elif isinstance(entry, int):
543  smin = 0
544  smax = entry if entry > 0 else None
545  # If we come across an unhandled case, bail out
546  else:
547  msg.warning(f"Unknown entries argument {entry}, will compare all events...")
548  msg.debug(f"Event slice is parsed as [{smin},{smax}]")
549  return smin, smax
550 
551  if args.order_trees:
552  smin, smax = get_event_range(itr_entries)
553  msg.debug("Indices/Event Numbers of old events ...")
554  idx_old = ordered_indices(fold.obj)[smin:smax]
555  msg.debug("Indices/Event Numbers of new events ...")
556  idx_new = ordered_indices(fnew.obj)[smin:smax]
557  itr_entries_old, event_numbers_old = list(map(list,zip(*idx_old)))
558  itr_entries_new, event_numbers_new = list(map(list,zip(*idx_new)))
559  msg.debug(f"List of old indices {itr_entries_old}")
560  msg.debug(f"List of new indices {itr_entries_new}")
561  msg.debug(f"List of old events {event_numbers_old}")
562  msg.debug(f"List of new events {event_numbers_new}")
563  if event_numbers_old != event_numbers_new:
564  msg.error('Events differ, quitting!')
565  msg.error(f"List of old events {event_numbers_old}")
566  msg.error(f"List of new events {event_numbers_new}")
567  return 1
568  else:
569  itr_entries_old = itr_entries
570  itr_entries_new = itr_entries
571 
572  branches = sorted(branches)
573  old_dump_iter = fold.dump(args.tree_name, itr_entries_old, branches, True, False)
574  new_dump_iter = fnew.dump(args.tree_name, itr_entries_new, branches, True, False)
575  old_skip_dict = {}
576  new_skip_dict = {}
577 
578  def leafname_fromdump(entry):
579  if entry is None:
580  return None
581  else:
582  return '.'.join(s for s in entry[2] if not s.isdigit())
583 
584  def elindices_fromdump(entry):
585  if entry is None:
586  return None
587  else:
588  return [int(s) for s in entry[2] if s.isdigit()]
589 
590  def reach_next(dump_iter, skip_leaves, skip_dict, leaves_prefix=None):
591  keep_reading = True
592  while keep_reading:
593  try:
594  entry = next(dump_iter)
595  except StopIteration:
596  return None
597 
598  entry2_orig = entry[2][0]
599  if isinstance(fold.obj, root.TTree):
600  entry[2][0] = entry[2][0].rstrip('.\0') # clean branch name
601  elif isinstance(fold.obj, RNTupleReader):
602  entry[2][0] = entry[2][0].rstrip(':') # clean branch name
603  if leaves_prefix:
604  entry[2][0] = entry[2][0].replace(leaves_prefix, '')
605 
606  # Calling leafname_fromdump is expensive. When we can,
607  # try to make the skip decision using just the first element
608  # in entry[2]. skip_dict maps from entry[2] values to either
609  # -1 if some branch with this entry prefix is being skipped
610  # or the event index at which we first saw this value.
611  # If we get to a different index and no branches with
612  # this prefix have been skipped, then we can assume that
613  # none of them are.
614  skip = skip_dict.setdefault (entry2_orig, entry[1])
615  if skip > 0 and skip != entry[1]:
616  # Old entry --- we can assume no skipping.
617  return entry
618 
619  if not skip_leaf(leafname_fromdump(entry), skip_leaves):
620  return entry
621  skip_dict[entry2_orig] = -1
622  msg.debug('SKIP: {}'.format(leafname_fromdump(entry)))
623  pass
624 
625  read_old = True
626  read_new = True
627  d_old = None
628  d_new = None
629 
630  while True:
631  if read_old:
632  d_old = reach_next(old_dump_iter, skip_leaves, old_skip_dict, args.leaves_prefix)
633  if read_new:
634  d_new = reach_next(new_dump_iter, skip_leaves, new_skip_dict, args.leaves_prefix)
635 
636  if not d_new and not d_old:
637  break
638 
639  read_old = True
640  read_new = True
641 
642  if (args.order_trees and d_old and d_new and d_old[2:] == d_new[2:]) or d_old == d_new:
643  n_good += 1
644  continue
645 
646  if d_old:
647  tree_name, ientry, iname, iold = d_old
648  else:
649  msg.debug("try to delete 'ientry', 'iname', 'iold'")
650  try: del ientry, iname, iold
651  except NameError: pass
652  if d_new:
653  tree_name, jentry, jname, inew = d_new
654  else:
655  msg.debug("try to delete 'jentry', 'jname', 'inew'")
656  try: del jentry, jname, inew
657  except NameError: pass
658 
659  if not d_old:
660  # FIXME: that's a plain (temporary?) hack
661  if jname[-1] in args.known_hacks:
662  continue
663  fold.allgood = False
664  summary[leafname_fromdump(d_new)] += 1
665  n_bad += 1
666  continue
667  elif not d_new:
668  # FIXME: that's a plain (temporary?) hack
669  if iname[-1] in args.known_hacks:
670  continue
671  fnew.allgood = False
672  summary[leafname_fromdump(d_old)] += 1
673  n_bad += 1
674  continue
675 
676  idiff = _vecdiff (iold, inew, args.nan_equal)
677  if idiff is None:
678  n_good += 1
679  continue
680  elif idiff >= 0:
681  iold = iold[idiff]
682  inew = inew[idiff]
683  iname.insert(-1, str(idiff))
684  jname.insert(-1, str(idiff))
685 
686  # for regression testing we should have NAN == NAN
687  if args.nan_equal:
688  if all([isinstance(x,Real) and isnan(x) for x in [iold,inew]]):
689  n_good += 1
690  continue
691 
692  # FIXME: that's a plain (temporary?) hack
693  if iname[-1] in args.known_hacks or jname[-1] in args.known_hacks:
694  continue
695 
696  n_bad += 1
697 
698  # Identifiers are event numbers if we're ordering the trees, otherwise tree indices
699  if args.order_trees:
700  id_old = dict(idx_old)[ientry]
701  id_new = dict(idx_new)[jentry]
702  else:
703  id_old = ientry
704  id_new = jentry
705 
706  if not args.order_trees:
707  in_synch = d_old and d_new and d_old[:-1] == d_new[:-1]
708  else:
709  in_synch = d_old and d_new and d_old[0] == d_new[0] and d_old[2] == d_new[2] and id_old == id_new
710  if not in_synch:
711  if _is_detailed(args):
712  if d_old:
713  msg.info('::sync-old %s','.'.join(["%03i"%ientry]+d_old[2]))
714  else:
715  msg.info('::sync-old ABSENT')
716  if d_new:
717  msg.info('::sync-new %s','.'.join(["%03i"%jentry]+d_new[2]))
718  else:
719  msg.info('::sync-new ABSENT')
720  pass
721  # remember for later
722  if not d_old:
723  fold.allgood = False
724  summary[leafname_fromdump(d_new)] += 1
725  elif not d_new:
726  fnew.allgood = False
727  summary[leafname_fromdump(d_old)] += 1
728  else:
729  branch_old = f"{id_old}.{d_old[2][0]}"
730  branch_new = f"{id_new}.{d_new[2][0]}"
731  leaf_old = leafname_fromdump(d_old)
732  leaf_new = leafname_fromdump(d_new)
733  indices_old = elindices_fromdump(d_old)
734  indices_new = elindices_fromdump(d_new)
735  # Branches/Leaves are alphabetically ordered
736  # If we're out-of-sync, we try to figure out
737  # if we should advance the old or the new branch
738  # For same branches, we look at the full leaf name
739  # If that fails we look at the indices
740  if branch_old > branch_new:
741  read_old = False
742  elif branch_old < branch_new:
743  read_new = False
744  else:
745  if leaf_old > leaf_new:
746  read_old = False
747  elif leaf_old < leaf_new:
748  read_new = False
749  elif indices_old and indices_new and len(indices_old) == len(indices_new):
750  if indices_old > indices_new:
751  read_old = False
752  elif indices_old < indices_new:
753  read_new = False
754  # Let's see if we can reconcile
755  # If not, just bail out to avoid false positivies
756  if read_old and not read_new:
757  if _is_detailed(args):
758  msg.info('::sync-old skipping entry')
759  fold.allgood = False
760  summary[leaf_old] += 1
761  elif read_new and not read_old:
762  if _is_detailed(args):
763  msg.info('::sync-new skipping entry')
764  fnew.allgood = False
765  summary[leaf_new] += 1
766  else:
767  msg.error('::sync attempt failed, bailing out...')
768  msg.error(f"::sync-old Leaf vs Index : {leaf_old} vs {indices_old}")
769  msg.error(f"::sync-new Leaf vs Index : {leaf_new} vs {indices_new}")
770  fold.allgood = False
771  fnew.allgood = False
772  summary[leaf_old] += 1
773  summary[leaf_new] += 1
774  break
775 
776  if _is_exit_early(args):
777  msg.info('*** exit on first error ***')
778  break
779  continue
780 
781  if not args.order_trees:
782  n = '.'.join(["%03i"%ientry]+iname)
783  else:
784  n = '.'.join(["%03i"%ientry]+iname+["%03i"%jentry]+jname)
785  diff_value = 'N/A'
786  try:
787  diff_value = 50.*(iold-inew)/(iold+inew)
788  diff_value = '%.8f%%' % (diff_value,)
789  except Exception:
790  pass
791  if _is_detailed(args):
792  msg.info('%s %r -> %r => diff= [%s]', n, iold, inew, diff_value)
793  pass
794  summary[leafname_fromdump(d_old)] += 1
795 
796  if iname[0] in args.enforce_leaves or jname[0] in args.enforce_leaves:
797  msg.info("don't compare further")
798  break
799  pass # loop over events/branches
800 
801  msg.info('Found [%s] identical leaves', n_good)
802  msg.info('Found [%s] different leaves', n_bad)
803 
804  if not _is_summary(args):
805  keys = sorted(summary.keys())
806  for n in keys:
807  v = summary[n]
808  msg.info(' [%s]: %i leaves differ', n, v)
809  pass
810  pass
811 
812  if (not fold.allgood) or (not fnew.allgood):
813  msg.error('NOTE: there were errors during the dump')
814  msg.info('fold.allgood: %s' , fold.allgood)
815  msg.info('fnew.allgood: %s' , fnew.allgood)
816  n_bad += 0.5
817  return n_bad
818 
819  if (isinstance(fold.obj, root.TTree) and isinstance(fnew.obj, root.TTree) or
820  isinstance(fold.obj, RNTupleReader) and isinstance(fnew.obj, RNTupleReader)):
821  ndiff = diff_obj(fold, fnew, args)
822  else:
823  raise NotImplementedError("Cannot compare object of type=%s to object of type=%s" % (type(fold.obj), type(fnew.obj)))
824  if ndiff != 0:
825  msg.error('files differ!')
826  return 2
827  msg.info('all good.')
828  return 0

Variable Documentation

◆ __author__

python.scripts.diff_root_files.__author__
private

Definition at line 9 of file diff_root_files.py.

◆ __doc__

python.scripts.diff_root_files.__doc__
private

Definition at line 8 of file diff_root_files.py.

◆ _vectypes

python.scripts.diff_root_files._vectypes
private

Definition at line 39 of file diff_root_files.py.

◆ g_ALLOWED_ERROR_MODES

python.scripts.diff_root_files.g_ALLOWED_ERROR_MODES

Definition at line 21 of file diff_root_files.py.

◆ g_ALLOWED_MODES

python.scripts.diff_root_files.g_ALLOWED_MODES

globals ----------------------------------------------------------------—

Definition at line 20 of file diff_root_files.py.

◆ names

python.scripts.diff_root_files.names

Definition at line 226 of file diff_root_files.py.

DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename R::value_type > sorted(const R &r, PROJ proj={})
Helper function to create a sorted vector from an unsorted range.
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
vtune_athena.format
format
Definition: vtune_athena.py:14
python.scripts.diff_root_files._is_exit_early
def _is_exit_early(args)
Definition: diff_root_files.py:32
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.CaloAddPedShiftConfig.type
type
Definition: CaloAddPedShiftConfig.py:42
reduce
void reduce(HepMC::GenEvent *ge, std::vector< HepMC::GenParticlePtr > toremove)
Remove unwanted particles from the event, collapsing the graph structure consistently.
Definition: FixHepMC.cxx:84
python.scripts.diff_root_files._is_summary
def _is_summary(args)
Definition: diff_root_files.py:29
covarianceTool.filter
filter
Definition: covarianceTool.py:514
DiTauMassTools::ignore
void ignore(T &&)
Definition: PhysicsAnalysis/TauID/DiTauMassTools/DiTauMassTools/HelperFunctions.h:58
python.scripts.diff_root_files.main
def main(args)
Definition: diff_root_files.py:142
python.scripts.diff_root_files._is_detailed
def _is_detailed(args)
classes ----------------------------------------------------------------—
Definition: diff_root_files.py:26
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:194
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
python.scripts.diff_root_files._vecdiff
def _vecdiff(v1, v2, nan_equal)
Definition: diff_root_files.py:52
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
python.processes.powheg.ZZ.ZZ.__init__
def __init__(self, base_directory, **kwargs)
Constructor: all process options are set here.
Definition: ZZ.py:18
python.CaloAddPedShiftConfig.int
int
Definition: CaloAddPedShiftConfig.py:45
str
Definition: BTagTrackIpAccessor.cxx:11
Cut::all
@ all
Definition: SUSYToolsAlg.cxx:67
drawFromPickle.view
view
Definition: drawFromPickle.py:294