ATLAS Offline Software
Loading...
Searching...
No Matches
python.scripts.diff_root_files Namespace Reference

Functions

 _is_detailed (args)
 classes ----------------------------------------------------------------—
 _is_summary (args)
 _is_exit_early (args)
 _vecdiff (v1, v2, nan_equal)
 main (args)

Variables

str __doc__ = "diff two ROOT files (containers and sizes)"
str __author__ = "Sebastien Binet"
tuple g_ALLOWED_MODES = ('summary', 'semi-detailed', 'detailed')
 globals ----------------------------------------------------------------—
tuple g_ALLOWED_ERROR_MODES = ('bailout', 'resilient')
dict _vectypes
 names = names

Function Documentation

◆ _is_detailed()

python.scripts.diff_root_files._is_detailed ( args)
protected

classes ----------------------------------------------------------------—

functions --------------------------------------------------------------—

Definition at line 26 of file diff_root_files.py.

26def _is_detailed(args):
27 return args.mode == 'detailed'
28

◆ _is_exit_early()

python.scripts.diff_root_files._is_exit_early ( args)
protected

Definition at line 32 of file diff_root_files.py.

32def _is_exit_early(args):
33 return args.error_mode == 'bailout'
34
35# Possibly compare two vectors. If nan_equal, then consider NaNs to be equal.
36# Returns None if we have two matching vectors.
37# If we have two vectors that differ at some element, return that index.
38# Otherwise return -1 (inputs not vectors, etc).

◆ _is_summary()

python.scripts.diff_root_files._is_summary ( args)
protected

Definition at line 29 of file diff_root_files.py.

29def _is_summary(args):
30 return args.mode == 'summary'
31

◆ _vecdiff()

python.scripts.diff_root_files._vecdiff ( v1,
v2,
nan_equal )
protected

Definition at line 52 of file diff_root_files.py.

52def _vecdiff (v1, v2, nan_equal):
53 if getattr(type(type(v1)), '__cpp_name__', None) not in _vectypes:
54 return -1
55 if type(v1) is not type(v2): return -1
56 sz = v1.size()
57 if sz != v2.size(): return -1
58 if nan_equal:
59 isnan_ = isnan
60 for i in range (sz):
61 val1 = v1[i]
62 val2 = v2[i]
63 if val1 != val2 and not all(
64 [isinstance(_, Real) and isnan_(_) for _ in (val1, val2)]):
65 return i
66 else:
67 for i in range (sz):
68 if v1[i] != v2[i]:
69 return i
70 return None
71
72@acmdlib.command(name='diff-root')
73@acmdlib.argument('old',
74 help='path to the reference ROOT file to analyze')
75@acmdlib.argument('new',
76 help='path to the ROOT file to compare to the reference')
77@acmdlib.argument('-t', '--tree-name',
78 default=None,
79 help='name of the TTree or RNTuple to compare')
80@acmdlib.argument('--branches-of-interest',
81 nargs='+',
82 default=set(),
83 help='set of regex matching names of branches to compare; assumes all if none specified.')
84@acmdlib.argument('--ignore-leaves',
85 nargs='+',
86 default=('Token', 'index_ref', r'(.*)_timings\.(.*)', r'(.*)_mems\.(.*)', r'(.*)TrigCostContainer(.*)'),
87 help='set of leaves names to ignore from comparison; can be a branch name or a partial leaf name (accepts regex)')
88@acmdlib.argument('--enforce-leaves',
89 nargs='+',
90 default=('BCID',),
91 help='set of leaves names we make sure to compare')
92@acmdlib.argument('--leaves-prefix',
93 default='',
94 help='Remove prefix value from all leaves')
95@acmdlib.argument('--known-hacks',
96 nargs='+',
97 default=('m_athenabarcode', 'm_token',),
98 help='set of leaves which are known to fail (but should be fixed at some point) [default: %(default)s]')
99@acmdlib.argument('--entries',
100 default='',
101 help='a list of entries (indices, not event numbers) or an expression (like range(3) or 0,2,1 or 0:3) leading to such a list, to compare.')
102@acmdlib.argument('-v', '--verbose',
103 action='store_true',
104 default=False,
105 help="""Enable verbose printout""")
106@acmdlib.argument('--order-trees',
107 action='store_true',
108 default=False,
109 help="""To order trees according to event numbers""")
110@acmdlib.argument('--exact-branches',
111 action='store_true',
112 default=False,
113 help="""Only allow exact list of branches present""")
114@acmdlib.argument('--mode',
115 choices=g_ALLOWED_MODES,
116 default='detailed',
117 help="""\
118Enable a particular mode.
119 'summary': only report the number of differences.
120 'semi-detailed': report the number of differences and the leaves that differ.
121 'detailed': display everything.
122default='%(default)s'.
123allowed: %(choices)s
124"""
125 )
126@acmdlib.argument('--error-mode',
127 choices=g_ALLOWED_ERROR_MODES,
128 default='bailout',
129 help="""\
130Enable a particular error mode.
131 'bailout': bail out on first error.
132 'resilient': keep running.
133default='%(default)s'.
134allowed: %(choices)s
135"""
136 )
137@acmdlib.argument('--nan-equal',
138 action='store_true',
139 default=False,
140 help="""Compare nan as equal to nan""")
141
STL class.

◆ main()

python.scripts.diff_root_files.main ( args)
diff two ROOT files (containers and sizes)

Definition at line 142 of file diff_root_files.py.

142def main(args):
143 """diff two ROOT files (containers and sizes)"""
144
145 # We allocate many python objects at once.
146 # Running GC less often by jacking up the threshold speeds things up
147 # considerably.
148 import gc
149 gc.set_threshold (100000)
150
151 import PyUtils.RootUtils as ru
152 root = ru.import_root() # noqa: F841
153 try:
154 RNTupleReader = root.RNTupleReader
155 except AttributeError:
156 RNTupleReader = root.Experimental.RNTupleReader
157
158 # Force load some dictionaries to work around ATLASRECTS-6261/ROOT-10940/ATEAM-942
159 if 'AtlasProject' in environ and environ['AtlasProject'] == 'Athena':
160 root.xAOD.Init().ignore()
161 root.xAOD.ParticleContainer_v1
162 root.xAOD.DiTauJetContainer_v1
163
164 import PyUtils.Logging as L
165 msg = L.logging.getLogger('diff-root')
166 if args.verbose:
167 msg.setLevel(L.logging.VERBOSE)
168 else:
169 msg.setLevel(L.logging.INFO)
170
171 from PyUtils.Helpers import ShutUp # noqa: F401
172
173 if args.entries == '':
174 args.entries = -1
175
176 msg.info('comparing tree [%s] in files:', args.tree_name)
177 msg.info(' old: [%s]', args.old)
178 msg.info(' new: [%s]', args.new)
179 msg.info('branches of interest: %s', args.branches_of_interest)
180 msg.info('ignore leaves: %s', args.ignore_leaves)
181 msg.info('enforce leaves: %s', args.enforce_leaves)
182 msg.info('leaves prefix: %s', args.leaves_prefix)
183 msg.info('hacks: %s', args.known_hacks)
184 msg.info('entries: %s', args.entries)
185 msg.info('mode: %s', args.mode)
186 msg.info('error mode: %s', args.error_mode)
187 msg.info('order trees: %s', args.order_trees)
188 msg.info('exact branches: %s', args.exact_branches)
189
190 import PyUtils.Helpers as H
191 with H.ShutUp() :
192 fold = ru.RootFileDumper(args.old, args.tree_name)
193 fnew = ru.RootFileDumper(args.new, args.tree_name)
194 pass
195
196 def obj_info(obj, args):
197 if isinstance(obj, root.TTree):
198 return _tree_info(obj, args)
199 elif isinstance(obj, RNTupleReader):
200 with H.ShutUp(filters=[r'.+RuntimeWarning: class "[\w:]+" has no virtual destructor']):
201 return _reader_info(obj, args)
202 else:
203 raise NotImplementedError(f"'obj_info' not implemented for object of {type(obj)=}")
204
205 def _tree_info(tree, args):
206 nentries = tree.GetEntriesFast()
207 # l.GetBranch().GetName() gives the full leaf path name
208 leaves = [l.GetBranch().GetName() for l in tree.GetListOfLeaves()
209 if l.GetBranch().GetName() not in args.ignore_leaves]
210 if args.leaves_prefix:
211 leaves = [l.replace(args.leaves_prefix, '') for l in leaves]
212 return {
213 'entries': nentries,
214 'leaves': set(leaves),
215 }
216
217 def _reader_info(reader, args):
218 nentries = reader.GetNEntries()
219 try:
220 RFieldVisitor = root.Detail.RFieldVisitor
221 except AttributeError:
222 RFieldVisitor = root.Experimental.Detail.RFieldVisitor
223 class NameVisitor(RFieldVisitor):
224 def __init__(self, names):
225 super().__init__()
226 self.names = names
227 def VisitField(self, field):
228 if field.GetFieldName()[0] == '_':
229 return
230 self.names.append(field.GetQualifiedFieldName())
231 try:
232 # ROOT Version: 6.35.01
233 subFields = field.GetConstSubfields()
234 except AttributeError:
235 subFields = field.GetSubFields()
236 for f in subFields:
237 # ROOT Version: 6.35.01
238 # for f in field.GetConstSubfields():
239 f.AcceptVisitor(self)
240 def VisitFieldZero(self, field):
241 try:
242 # ROOT Version: 6.35.01
243 subFields = field.GetConstSubfields()
244 except AttributeError:
245 subFields = field.GetSubFields()
246 for f in subFields:
247 # ROOT Version: 6.35.01
248 # for f in field.GetConstSubfields():
249 f.AcceptVisitor(self)
250
251 fieldZero = reader.GetModel().GetConstFieldZero()
252 names = list()
253 visitor = NameVisitor(names)
254 fieldZero.AcceptVisitor(visitor)
255 leaves = visitor.names
256 leaves = [l for l in leaves if l not in args.ignore_leaves]
257 if args.leaves_prefix:
258 leaves = [l.replace(args.leaves_prefix, '') for l in leaves]
259 return {
260 'entries': nentries,
261 'leaves': set(leaves),
262 }
263
264 def ordered_indices(obj, reverse_order=False):
265 if isinstance(obj, root.TTree):
266 return _tree_ordered_indices(obj, reverse_order)
267 elif isinstance(obj, RNTupleReader):
268 return _reader_ordered_indices(obj, reverse_order)
269 else:
270 raise NotImplementedError(f"'ordered_indices' not implemented for object of {type(obj)=}")
271
272 def _reader_ordered_indices(reader, reverse_order=False):
273 import operator
274
275 dict_in = {}
276 nevts = reader.GetNEntries()
277
278 eiDict = {(): ['EventInfoAuxDyn:eventNumber'],
279 ('eventNumber',): ['EventInfoAux:',
280 'Bkg_EventInfoAux:',
281 'xAOD::EventAuxInfo_v3_EventInfoAux:',
282 'xAOD::EventAuxInfo_v2_EventInfoAux:',
283 'xAOD::EventAuxInfo_v1_EventInfoAux:',
284 'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux:',
285 'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux:',
286 'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux:'],
287 ('m_event_ID', 'm_event_number'): ['McEventInfo',
288 'ByteStreamEventInfo',
289 'EventInfo_p4_McEventInfo',
290 'EventInfo_p4_ByteStreamEventInfo']}
291
292 def find_attrs():
293 """Find the relevant attributes for reading the event number"""
294 try:
295 kInvalidDescriptorId = root.kInvalidDescriptorId
296 except AttributeError:
297 kInvalidDescriptorId = root.Experimental.kInvalidDescriptorId
298 for path, names in eiDict.items():
299 for name in names:
300 if (fieldId := reader.GetDescriptor().FindFieldId(name)) != kInvalidDescriptorId:
301 typeName = reader.GetDescriptor().GetFieldDescriptor(fieldId).GetTypeName()
302 return (name, typeName), path
303 else:
304 return None, None
305
306 name, attrs = find_attrs()
307 if name is None or attrs is None:
308 msg.error('Cannot read event info, will bail out.')
309 msg.error(f"Tried {name=} and attributes {attrs=}")
310 return []
311
312 view = reader.GetView[name[1]](name[0])
313 for idx in range(nevts):
314 if idx % 100 == 0:
315 msg.debug('Read {} events from the input so far'.format(idx))
316 value = view(idx)
317 event_number = reduce(getattr, attrs, value)
318 msg.debug('Idx : EvtNum {:10d} : {}'.format(idx, event_number))
319 dict_in[idx] = event_number
320
321 # Sort the dictionary by event numbers
322 dict_out = dict(sorted(dict_in.items(), key=operator.itemgetter(1), reverse=reverse_order))
323
324 # Write out the ordered index and event number pairs
325 return list(dict_out.items())
326
327 def _tree_ordered_indices(tree, reverse_order=False):
328 from collections import OrderedDict
329 import operator
330
331 dict_in = {}
332 nevts = tree.GetEntriesFast()
333
334 eiDict = {'':['EventInfoAuxDyn.eventNumber'],
335 'eventNumber':['EventInfoAux.',
336 'Bkg_EventInfoAux.',
337 'xAOD::EventAuxInfo_v3_EventInfoAux.',
338 'xAOD::EventAuxInfo_v2_EventInfoAux.',
339 'xAOD::EventAuxInfo_v1_EventInfoAux.',
340 'xAOD::EventAuxInfo_v3_Bkg_EventInfoAux.',
341 'xAOD::EventAuxInfo_v2_Bkg_EventInfoAux.',
342 'xAOD::EventAuxInfo_v1_Bkg_EventInfoAux.'],
343 'm_event_ID m_event_number':['McEventInfo',
344 'ByteStreamEventInfo',
345 'EventInfo_p4_McEventInfo',
346 'EventInfo_p4_ByteStreamEventInfo']}
347
348 def find_attrs():
349 """Find the relevant attributes for reading the event number"""
350 for ii, jj in eiDict.items():
351 for kk in jj:
352 if hasattr(tree, kk):
353 return kk, ii
354 else:
355 return None, None
356
357 tree.GetEntry(0)
358 attr1, attr2 = find_attrs()
359 if attr1 is None or attr2 is None:
360 msg.error('Cannot read event info, will bail out.')
361 msg.error(f"Tried attributes {attr1} and {attr2}")
362 return []
363 attrs = [attr1] + attr2.split()
364
365 tree.SetBranchStatus ('*', 0)
366 tree.SetBranchStatus (attr1, 1)
367
368 for idx in range(0, nevts):
369 if idx % 100 == 0:
370 msg.debug('Read {} events from the input so far'.format(idx))
371 tree.GetEntry(idx)
372 event_number = reduce(getattr, attrs, tree)
373 msg.debug('Idx : EvtNum {:10d} : {}'.format(idx,event_number))
374 dict_in[idx] = event_number
375
376 tree.SetBranchStatus ('*', 1)
377
378 # Sort the dictionary by event numbers
379 dict_out = OrderedDict(sorted(dict_in.items(), key=operator.itemgetter(1), reverse = reverse_order))
380
381 # Write out the ordered index and event number pairs
382 return [(idx, ival) for idx, ival in dict_out.items()]
383
384 def diff_obj(fold, fnew, args):
385
386 infos = {
387 'old' : obj_info(fold.obj, args),
388 'new' : obj_info(fnew.obj, args),
389 }
390
391 nentries = min(infos['old']['entries'],
392 infos['new']['entries'])
393 itr_entries = nentries
394 if args.entries in (-1,'','-1'):
395 #msg.info('comparing over [%s] entries...', nentries)
396 itr_entries = nentries
397 if infos['old']['entries'] != infos['new']['entries']:
398 msg.info('different numbers of entries:')
399 msg.info(' old: [%s]', infos['old']['entries'])
400 msg.info(' new: [%s]', infos['new']['entries'])
401 msg.info('=> comparing [%s] first entries...', nentries)
402 else:
403 itr_entries = args.entries
404 pass
405 msg.info('comparing over [%s] entries...', itr_entries)
406
407 @cache
408 def skip_leaf(name_from_dump, skip_leaves):
409 """ Here decide if the current leaf should be skipped.
410 Previously the matching was done based on the full or partial
411 leaf name. E.g. foo.bar.zzz would be skipped if any of the
412 following were provided:
413 * foo
414 * foo.bar
415 * foo.bar.zzz
416 * Any of the foo, bar, or zzz
417 Now, we make a regex matching such that the user doesn't
418 need to provide full branch names.
419 """
420 for pattern in skip_leaves:
421 try:
422 if re.match(pattern, name_from_dump):
423 return True
424 except re.error as e:
425 from traceback import format_exception
426 msg.error("Exception '%s', pattern %r, line %s, column %s\n%s",
427 e, e.pattern, e.lineno, e.colno, "".join(format_exception(e)))
428 continue
429 else:
430 return False
431
432 @cache
433 def skip_leaf_entry(entry2, skip_leaves):
434 leafname = '.'.join(s for s in entry2 if not s.isdigit())
435 return skip_leaf (leafname, skip_leaves)
436
437 def filter_branches(leaves):
438 matches = set()
439 for regex in args.branches_of_interest:
440 test = re.compile(regex)
441 matches.update({l for l in leaves if test.match(l)})
442 return matches
443
444 skipset = frozenset(args.ignore_leaves)
445 removed_leaves = infos['old']['leaves'] - infos['new']['leaves']
446 added_leaves = infos['new']['leaves'] - infos['old']['leaves']
447
448 if args.branches_of_interest:
449 removed_leaves = filter_branches(removed_leaves)
450 added_leaves = filter_branches(added_leaves)
451 else:
452 removed_leaves = {l for l in removed_leaves if not skip_leaf(l, skipset)}
453 added_leaves = {l for l in added_leaves if not skip_leaf(l, skipset)}
454
455 if removed_leaves:
456 removed_leaves_list = list(removed_leaves)
457 removed_leaves_list.sort()
458 if args.exact_branches:
459 msg.error('the following variables exist only in the old file !')
460 for l in removed_leaves_list:
461 msg.error(' - [%s]', l)
462 else:
463 msg.warning('the following variables exist only in the old file !')
464 for l in removed_leaves_list:
465 msg.warning(' - [%s]', l)
466 if added_leaves:
467 added_leaves_list = list(added_leaves)
468 added_leaves_list.sort()
469 if args.exact_branches:
470 msg.error('the following variables exist only in the new file !')
471 for l in added_leaves_list:
472 msg.error(' - [%s]', l)
473 else:
474 msg.warning('the following variables exist only in the new file !')
475 for l in added_leaves_list:
476 msg.warning(' - [%s]', l)
477
478 # need to remove trailing dots as they confuse reach_next()?
479 skip_leaves = [ l.rstrip('.') for l in removed_leaves | added_leaves | set(args.ignore_leaves) ]
480 for l in skip_leaves:
481 msg.debug('skipping [%s]', l)
482 skip_leaves = frozenset (skip_leaves)
483
484 if isinstance(fold.obj, root.TTree):
485 oldBranches = set(b.GetName().rstrip('\0') for b in fold.tree.GetListOfBranches())
486 elif isinstance(fold.obj, RNTupleReader):
487 oldBranches = {f.GetFieldName() for f in fold.obj.GetDescriptor().GetTopLevelFields()}
488 if isinstance(fnew.obj, root.TTree):
489 newBranches = set(b.GetName().rstrip('\0') for b in fnew.tree.GetListOfBranches())
490 elif isinstance(fnew.obj, root.RNTupleReader):
491 newBranches = {f.GetFieldName() for f in fnew.obj.GetDescriptor().GetTopLevelFields()}
492 branches = oldBranches & newBranches
493
494 if args.branches_of_interest:
495 branches_of_interest = args.branches_of_interest
496
497 # check that all branches of interest exist in the new file
498 for regex in branches_of_interest:
499 test = re.compile(regex)
500 if not {l for l in infos['new']['leaves'] if test.match(l)}:
501 msg.error(f'no match in new file for branch of interest: {regex}')
502
503 BOI_matches = set()
504 for branch_of_interest in branches_of_interest:
505 try:
506 r = re.compile(branch_of_interest)
507 BOI_matches.update(filter(r.match, branches))
508 except TypeError:
509 continue
510
511 if len(BOI_matches)<1:
512 msg.error('No matching branches found in both files for supplied branches of interest, quitting.')
513 return 1
514 msg.info('only the following branches of interest will be compared: ')
515 for l in BOI_matches:
516 msg.info(' - [%s]', l)
517 branches = BOI_matches
518
519 msg.info('comparing [%s] leaves over entries...', len(infos['old']['leaves'] & infos['new']['leaves']))
520 n_good = 0
521 n_bad = 0
522 if args.exact_branches:
523 n_bad += len(removed_leaves) + len(added_leaves)
524 import collections
525 summary = collections.defaultdict(int)
526
527 def get_event_range(entry):
528 smin, smax = 0, None
529 # Parse user input
530 if isinstance(entry, str):
531 # We support three main cases in this format: 5:10 (5th to 10th),
532 # 5: (5th to the end), and :5 (from the start to 5th)
533 if ':' in entry:
534 vals = entry.split(':')
535 smin = int(vals[0]) if len(vals) > 0 and vals[0].isdigit() else 0
536 smax = int(vals[1]) if len(vals) > 1 and vals[1].isdigit() else None
537 # This is the case where the user inputs the total number of events
538 elif entry.isdigit():
539 smin = 0
540 smax = int(entry) if int(entry) > 0 else None
541 # Handle the case where the input is a number (i.e. default)
542 elif isinstance(entry, int):
543 smin = 0
544 smax = entry if entry > 0 else None
545 # If we come across an unhandled case, bail out
546 else:
547 msg.warning(f"Unknown entries argument {entry}, will compare all events...")
548 msg.debug(f"Event slice is parsed as [{smin},{smax}]")
549 return smin, smax
550
551 if args.order_trees:
552 smin, smax = get_event_range(itr_entries)
553 msg.debug("Indices/Event Numbers of old events ...")
554 idx_old = ordered_indices(fold.obj)[smin:smax]
555 msg.debug("Indices/Event Numbers of new events ...")
556 idx_new = ordered_indices(fnew.obj)[smin:smax]
557 itr_entries_old, event_numbers_old = list(map(list,zip(*idx_old)))
558 itr_entries_new, event_numbers_new = list(map(list,zip(*idx_new)))
559 msg.debug(f"List of old indices {itr_entries_old}")
560 msg.debug(f"List of new indices {itr_entries_new}")
561 msg.debug(f"List of old events {event_numbers_old}")
562 msg.debug(f"List of new events {event_numbers_new}")
563 if event_numbers_old != event_numbers_new:
564 msg.error('Events differ, quitting!')
565 msg.error(f"List of old events {event_numbers_old}")
566 msg.error(f"List of new events {event_numbers_new}")
567 return 1
568 else:
569 itr_entries_old = itr_entries
570 itr_entries_new = itr_entries
571
572 branches = sorted(branches)
573 old_dump_iter = fold.dump(args.tree_name, itr_entries_old, branches, True, False)
574 new_dump_iter = fnew.dump(args.tree_name, itr_entries_new, branches, True, False)
575 old_skip_dict = {}
576 new_skip_dict = {}
577
578 def leafname_fromdump(entry):
579 if entry is None:
580 return None
581 else:
582 return '.'.join(s for s in entry[2] if not s.isdigit())
583
584 def elindices_fromdump(entry):
585 if entry is None:
586 return None
587 else:
588 return [int(s) for s in entry[2] if s.isdigit()]
589
590 def reach_next(dump_iter, skip_leaves, skip_dict, leaves_prefix=None):
591 keep_reading = True
592 while keep_reading:
593 try:
594 entry = next(dump_iter)
595 except StopIteration:
596 return None
597
598 entry2_orig = entry[2][0]
599 if isinstance(fold.obj, root.TTree):
600 entry[2][0] = entry[2][0].rstrip('.\0') # clean branch name
601 elif isinstance(fold.obj, RNTupleReader):
602 entry[2][0] = entry[2][0].rstrip(':') # clean branch name
603 if leaves_prefix:
604 entry[2][0] = entry[2][0].replace(leaves_prefix, '')
605
606 # Calling leafname_fromdump is expensive. When we can,
607 # try to make the skip decision using just the first element
608 # in entry[2]. skip_dict maps from entry[2] values to either
609 # -1 if some branch with this entry prefix is being skipped
610 # or the event index at which we first saw this value.
611 # If we get to a different index and no branches with
612 # this prefix have been skipped, then we can assume that
613 # none of them are.
614 skip = skip_dict.setdefault (entry2_orig, entry[1])
615 if skip > 0 and skip != entry[1]:
616 # Old entry --- we can assume no skipping.
617 return entry
618
619 if not skip_leaf(leafname_fromdump(entry), skip_leaves):
620 return entry
621 skip_dict[entry2_orig] = -1
622 msg.debug('SKIP: {}'.format(leafname_fromdump(entry)))
623 pass
624
625 read_old = True
626 read_new = True
627 d_old = None
628 d_new = None
629
630 while True:
631 if read_old:
632 d_old = reach_next(old_dump_iter, skip_leaves, old_skip_dict, args.leaves_prefix)
633 if read_new:
634 d_new = reach_next(new_dump_iter, skip_leaves, new_skip_dict, args.leaves_prefix)
635
636 if not d_new and not d_old:
637 break
638
639 read_old = True
640 read_new = True
641
642 if (args.order_trees and d_old and d_new and d_old[2:] == d_new[2:]) or d_old == d_new:
643 n_good += 1
644 continue
645
646 if d_old:
647 tree_name, ientry, iname, iold = d_old
648 else:
649 msg.debug("try to delete 'ientry', 'iname', 'iold'")
650 try: del ientry, iname, iold
651 except NameError: pass
652 if d_new:
653 tree_name, jentry, jname, inew = d_new
654 else:
655 msg.debug("try to delete 'jentry', 'jname', 'inew'")
656 try: del jentry, jname, inew
657 except NameError: pass
658
659 if not d_old:
660 # FIXME: that's a plain (temporary?) hack
661 if jname[-1] in args.known_hacks:
662 continue
663 fold.allgood = False
664 summary[leafname_fromdump(d_new)] += 1
665 n_bad += 1
666 continue
667 elif not d_new:
668 # FIXME: that's a plain (temporary?) hack
669 if iname[-1] in args.known_hacks:
670 continue
671 fnew.allgood = False
672 summary[leafname_fromdump(d_old)] += 1
673 n_bad += 1
674 continue
675
676 idiff = _vecdiff (iold, inew, args.nan_equal)
677 if idiff is None:
678 n_good += 1
679 continue
680 elif idiff >= 0:
681 iold = iold[idiff]
682 inew = inew[idiff]
683 iname.insert(-1, str(idiff))
684 jname.insert(-1, str(idiff))
685
686 # for regression testing we should have NAN == NAN
687 if args.nan_equal:
688 if all([isinstance(x,Real) and isnan(x) for x in [iold,inew]]):
689 n_good += 1
690 continue
691
692 # FIXME: that's a plain (temporary?) hack
693 if iname[-1] in args.known_hacks or jname[-1] in args.known_hacks:
694 continue
695
696 n_bad += 1
697
698 # Identifiers are event numbers if we're ordering the trees, otherwise tree indices
699 if args.order_trees:
700 id_old = dict(idx_old)[ientry]
701 id_new = dict(idx_new)[jentry]
702 else:
703 id_old = ientry
704 id_new = jentry
705
706 if not args.order_trees:
707 in_synch = d_old and d_new and d_old[:-1] == d_new[:-1]
708 else:
709 in_synch = d_old and d_new and d_old[0] == d_new[0] and d_old[2] == d_new[2] and id_old == id_new
710 if not in_synch:
711 if _is_detailed(args):
712 if d_old:
713 msg.info('::sync-old %s','.'.join(["%03i"%ientry]+d_old[2]))
714 else:
715 msg.info('::sync-old ABSENT')
716 if d_new:
717 msg.info('::sync-new %s','.'.join(["%03i"%jentry]+d_new[2]))
718 else:
719 msg.info('::sync-new ABSENT')
720 pass
721 # remember for later
722 if not d_old:
723 fold.allgood = False
724 summary[leafname_fromdump(d_new)] += 1
725 elif not d_new:
726 fnew.allgood = False
727 summary[leafname_fromdump(d_old)] += 1
728 else:
729 branch_old = f"{id_old}.{d_old[2][0]}"
730 branch_new = f"{id_new}.{d_new[2][0]}"
731 leaf_old = leafname_fromdump(d_old)
732 leaf_new = leafname_fromdump(d_new)
733 indices_old = elindices_fromdump(d_old)
734 indices_new = elindices_fromdump(d_new)
735 # Branches/Leaves are alphabetically ordered
736 # If we're out-of-sync, we try to figure out
737 # if we should advance the old or the new branch
738 # For same branches, we look at the full leaf name
739 # If that fails we look at the indices
740 if branch_old > branch_new:
741 read_old = False
742 elif branch_old < branch_new:
743 read_new = False
744 else:
745 if leaf_old > leaf_new:
746 read_old = False
747 elif leaf_old < leaf_new:
748 read_new = False
749 elif indices_old and indices_new and len(indices_old) == len(indices_new):
750 if indices_old > indices_new:
751 read_old = False
752 elif indices_old < indices_new:
753 read_new = False
754 # Let's see if we can reconcile
755 # If not, just bail out to avoid false positivies
756 if read_old and not read_new:
757 if _is_detailed(args):
758 msg.info('::sync-old skipping entry')
759 fold.allgood = False
760 summary[leaf_old] += 1
761 elif read_new and not read_old:
762 if _is_detailed(args):
763 msg.info('::sync-new skipping entry')
764 fnew.allgood = False
765 summary[leaf_new] += 1
766 else:
767 msg.error('::sync attempt failed, bailing out...')
768 msg.error(f"::sync-old Leaf vs Index : {leaf_old} vs {indices_old}")
769 msg.error(f"::sync-new Leaf vs Index : {leaf_new} vs {indices_new}")
770 fold.allgood = False
771 fnew.allgood = False
772 summary[leaf_old] += 1
773 summary[leaf_new] += 1
774 break
775
776 if _is_exit_early(args):
777 msg.info('*** exit on first error ***')
778 break
779 continue
780
781 if not args.order_trees:
782 n = '.'.join(["%03i"%ientry]+iname)
783 else:
784 n = '.'.join(["%03i"%ientry]+iname+["%03i"%jentry]+jname)
785 diff_value = 'N/A'
786 try:
787 diff_value = 50.*(iold-inew)/(iold+inew)
788 diff_value = '%.8f%%' % (diff_value,)
789 except Exception:
790 pass
791 if _is_detailed(args):
792 msg.info('%s %r -> %r => diff= [%s]', n, iold, inew, diff_value)
793 pass
794 summary[leafname_fromdump(d_old)] += 1
795
796 if iname[0] in args.enforce_leaves or jname[0] in args.enforce_leaves:
797 msg.info("don't compare further")
798 break
799 pass # loop over events/branches
800
801 msg.info('Found [%s] identical leaves', n_good)
802 msg.info('Found [%s] different leaves', n_bad)
803
804 if not _is_summary(args):
805 keys = sorted(summary.keys())
806 for n in keys:
807 v = summary[n]
808 msg.info(' [%s]: %i leaves differ', n, v)
809 pass
810 pass
811
812 if (not fold.allgood) or (not fnew.allgood):
813 msg.error('NOTE: there were errors during the dump')
814 msg.info('fold.allgood: %s' , fold.allgood)
815 msg.info('fnew.allgood: %s' , fnew.allgood)
816 n_bad += 0.5
817 return n_bad
818
819 if (isinstance(fold.obj, root.TTree) and isinstance(fnew.obj, root.TTree) or
820 isinstance(fold.obj, RNTupleReader) and isinstance(fnew.obj, RNTupleReader)):
821 ndiff = diff_obj(fold, fnew, args)
822 else:
823 raise NotImplementedError("Cannot compare object of type=%s to object of type=%s" % (type(fold.obj), type(fnew.obj)))
824 if ndiff != 0:
825 msg.error('files differ!')
826 return 2
827 msg.info('all good.')
828 return 0
int main()
Definition hello.cxx:18

Variable Documentation

◆ __author__

str python.scripts.diff_root_files.__author__ = "Sebastien Binet"
private

Definition at line 9 of file diff_root_files.py.

◆ __doc__

str python.scripts.diff_root_files.__doc__ = "diff two ROOT files (containers and sizes)"
private

Definition at line 8 of file diff_root_files.py.

◆ _vectypes

dict python.scripts.diff_root_files._vectypes
protected
Initial value:
1= {'std::vector<float>',
2 'std::vector<double>',
3 'std::vector<int>',
4 'std::vector<unsigned int>',
5 'std::vector<long>',
6 'std::vector<unsigned long>',
7 'std::vector<short>',
8 'std::vector<unsigned short>',
9 'std::vector<char>',
10 'std::vector<unsigned char>',
11 'std::vector<long long>',
12 'std::vector<unsigned long long>'}

Definition at line 39 of file diff_root_files.py.

◆ g_ALLOWED_ERROR_MODES

tuple python.scripts.diff_root_files.g_ALLOWED_ERROR_MODES = ('bailout', 'resilient')

Definition at line 21 of file diff_root_files.py.

◆ g_ALLOWED_MODES

tuple python.scripts.diff_root_files.g_ALLOWED_MODES = ('summary', 'semi-detailed', 'detailed')

globals ----------------------------------------------------------------—

Definition at line 20 of file diff_root_files.py.

◆ names

python.scripts.diff_root_files.names = names

Definition at line 226 of file diff_root_files.py.