ATLAS Offline Software
chainDump.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
4 #
5 
6 '''Script to dump trigger counts to a text file'''
7 
8 import sys
9 import argparse
10 import logging
11 import json
12 import yaml
13 import ROOT
14 from collections import OrderedDict, defaultdict
15 
16 total_events_key = 'TotalEventsProcessed'
17 column_width = 10 # width of the count columns for print out
18 name_width = 50 # width of the item name column for print out
19 
20 # Store defaultdict as dict in yaml files
21 from yaml.representer import Representer
22 yaml.add_representer(defaultdict, Representer.represent_dict)
23 
24 def get_parser():
25  parser = argparse.ArgumentParser(usage='%(prog)s [options]',
26  description=__doc__)
27  parser.add_argument('-f', '--inputFile',
28  metavar='PATH',
29  default='expert-monitoring.root',
30  help='Name of input root file')
31  parser.add_argument('-r', '--referenceFile',
32  metavar='PATH',
33  help='Name of reference root file')
34  parser.add_argument('-v', '--verbose',
35  action='store_true',
36  help='Increase output verbosity')
37  parser.add_argument('-p', '--printOnly',
38  action='store_true',
39  default=False,
40  help='Print counts instead of saving to file')
41  parser.add_argument('-d', '--diffOnly',
42  action='store_true',
43  default=False,
44  help='Only store out of tolerance results (does not change JSON)')
45  parser.add_argument('--json',
46  metavar='PATH',
47  nargs='?',
48  const='chainDump.json',
49  help='Save outputs also to a json file with the given name or %(const)s if no name is given')
50  parser.add_argument('--yaml',
51  metavar='PATH',
52  nargs='?',
53  const='chainDump.yml',
54  help='Produce a small yaml file including condensed counts information for test file only '
55  '(no ref) with the given name or %(const)s if no name is given')
56  parser.add_argument('--yamlL1',
57  action='store_true',
58  help='Include the L1 count information to the yaml file')
59  parser.add_argument('--fracTolerance',
60  metavar='FRAC',
61  type=float,
62  default=0.001,
63  help='Tolerance as a fraction, default = %(default)s. '
64  'Flagged diffs must exceed all tolerances')
65  parser.add_argument('--intTolerance',
66  metavar='NUM',
67  type=int,
68  default=2,
69  help='Tolerance as a number of counts, default = %(default)s. '
70  'Flagged diffs must exceed all tolerances')
71  parser.add_argument('--countHists',
72  metavar='HISTS',
73  nargs='+',
74  default=[
75  'HLTFramework/TrigSignatureMoni/SignatureAcceptance',
76  'HLTFramework/TrigSignatureMoni/../TrigSignatureMoni/SignatureAcceptance',
77  'HLTFramework/../HLTFramework/TrigSignatureMoni/SignatureAcceptance',
78  'TrigSteer_HLT/ChainAcceptance',
79  'TrigSteer_HLT/NumberOfActiveTEs',
80  'HLTFramework/TrigSignatureMoni/DecisionCount',
81  'CTPSimulation/L1ItemsAV',
82  'L1/CTPSimulation/output/tavById'],
83  help='Histograms to use for counts dump. All existing '
84  'histograms from the list are used, default = %(default)s')
85  parser.add_argument('--totalHists',
86  metavar='HISTS',
87  nargs='+',
88  default=[
89  'TrigSteer_HLT/NInitialRoIsPerEvent',
90  'HLTFramework/HLTSeeding/RoIs_EM/count'],
91  help='Histograms to use for total events. First existing '
92  'histogram from the list is used, default = %(default)s')
93  parser.add_argument('--histDict',
94  metavar='DICT',
95  nargs='+',
96  default=[
97  'HLTFramework/TrigSignatureMoni/SignatureAcceptance:HLTChain',
98  'HLTFramework/TrigSignatureMoni/../TrigSignatureMoni/SignatureAcceptance:HLTExpress',
99  'HLTFramework/../HLTFramework/TrigSignatureMoni/SignatureAcceptance:HLTStep',
100  'TrigSteer_HLT/ChainAcceptance:HLTChain',
101  'TrigSteer_HLT/NumberOfActiveTEs:HLTTE',
102  'HLTFramework/TrigSignatureMoni/DecisionCount:HLTDecision',
103  'CTPSimulation/L1ItemsAV:L1AV',
104  'L1/CTPSimulation/output/tavById:L1AV'],
105  help='Dictionary defining names of output text files for each '
106  'histogram, default = %(default)s')
107  parser.add_argument('--printHeader',
108  action='store_true',
109  default=False,
110  help='Add title of columns to the output txt (just for readability)')
111  return parser
112 
113 
114 def open_root_file(file_path):
115  f = ROOT.TFile(file_path)
116  if f.IsOpen() and not f.IsZombie():
117  return f
118  else:
119  return None
120 
121 
122 def load_histograms(root_file, hist_paths):
123  hist_dict = {}
124  for hist_path in hist_paths:
125  h = root_file.Get(hist_path)
126  if not isinstance(h, ROOT.TH1):
127  logging.debug('Cannot open histogram %s, skipping', hist_path)
128  continue
129  logging.debug('Loaded histogram %s', hist_path)
130  hist_dict[hist_path] = h
131  return hist_dict
132 
133 
134 def get_counts(hist, rowLabel='Output'):
135  '''
136  Extract {xlabel, value} dictionary from a histogram. Values are stored as integers.
137  If histogram is 2D, the y-bin labelled rowLabel is used to extract the value.
138  '''
139 
140  nbinsx = hist.GetNbinsX()
141  nbinsy = hist.GetNbinsY()
142  outputRow = None # Default to last row if 'Output' not found
143  for bin in range(1, nbinsy):
144  if hist.GetYaxis().GetBinLabel(bin) == rowLabel:
145  outputRow = bin
146  break
147 
148  counts = {}
149  for b in range(1, nbinsx+1):
150  label = hist.GetXaxis().GetBinLabel(b)
151  if not label:
152  logging.debug('Bin %d in histogram %s has no label, skipping', b, hist.GetName())
153  continue
154 
155  value = hist.GetBinContent(b) if hist.GetDimension() == 1 else hist.GetBinContent(b, outputRow or nbinsy)
156  counts[label] = int(value)
157 
158  return counts
159 
160 def get_2D_counts(hist):
161  '''
162  Extract {xlabel_ylabel, value} dictionary from a histogram. Values are stored as integers.
163  '''
164  nbinsx = hist.GetNbinsX()
165  nbinsy = hist.GetNbinsY()
166  counts = {}
167  for x in range(1, nbinsx+1):
168  label = hist.GetXaxis().GetBinLabel(x)
169  if not label:
170  logging.debug('Bin %d in histogram %s has no label, skipping', x, hist.GetName())
171  continue
172 
173  for y in range(3, nbinsy):
174  rowName = hist.GetYaxis().GetBinLabel(y)
175  # Get only steps and skip the base rows
176  if rowName in ['Input','AfterPS','Output','Express']:
177  continue
178  name = label + '_' + rowName
179  name = name.replace(' ', '')
180  value = hist.GetBinContent(x, y)
181  counts[name] = int(value)
182 
183  return counts
184 
185 def make_counts_json_dict(in_counts, ref_counts):
186  counts = OrderedDict()
187  all_keys = set(in_counts.keys())
188  all_keys.update(ref_counts.keys())
189  keys_sorted = sorted(all_keys)
190  for k in keys_sorted:
191  v = in_counts[k] if k in in_counts else 'n/a'
192  ref_v = ref_counts[k] if k in ref_counts else 'n/a'
193  counts[k] = {
194  'count': v,
195  'ref_count': ref_v,
196  'ref_diff': 'n/a' # Filled in compare_ref
197  }
198  return counts
199 
200 
201 def parse_name_dict(name_dict_as_list):
202  name_dict = {}
203  for kv in name_dict_as_list:
204  kv_split = kv.split(':')
205  if len(kv_split) < 2:
206  continue
207  name_dict[kv_split[0]] = kv_split[1]
208  return name_dict
209 
210 
211 def get_text_name(hist_name, name_dict):
212  if hist_name in name_dict.keys():
213  return name_dict[hist_name]
214  else:
215  return hist_name.replace('/', '_')
216 
217 
218 def count_diff(count_in, count_ref, total_in, total_ref, thr_frac, thr_num):
219  # normalise input counts to total events in reference
220  count_in_norm = (count_in / float(total_in)) * total_ref
221  frac = count_in_norm / float(count_ref) if count_ref != 0 else None
222 
223  num_diff = abs(count_in_norm - count_ref) > thr_num
224  frac_diff = abs(frac - 1.0) > thr_frac if frac else True
225 
226  return num_diff and frac_diff
227 
228 
229 def compare_ref(json_dict, thr_frac, thr_num):
230  results = []
231  in_total = json_dict[total_events_key]['count']
232  ref_total = json_dict[total_events_key]['ref_count']
233  for text_name in sorted(json_dict.keys()):
234  if text_name == total_events_key:
235  continue
236  diff_val = [] # different counts in input and reference
237  missing_ref = [] # input count exists but reference is n/a
238  missing_val = [] # input count is n/a but reference exists
239  for item_name, item_counts in json_dict[text_name]['counts'].items():
240  v = item_counts['count']
241  ref_v = item_counts['ref_count']
242  if v == 'n/a':
243  missing_val.append([item_name, v, ref_v])
244  item_counts['ref_diff'] = True
245  elif ref_v == 'n/a':
246  missing_ref.append([item_name, v, ref_v])
247  item_counts['ref_diff'] = True
248  elif count_diff(v, ref_v, in_total, ref_total, thr_frac, thr_num):
249  diff_val.append([item_name, v, ref_v])
250  item_counts['ref_diff'] = True
251  else:
252  item_counts['ref_diff'] = False
253  good = True
254  if len(diff_val) > 0:
255  good = False
256  dump = '\n'.join(
257  [' {e[0]:{nw}s} {e[1]:>{w}d} {e[2]:>{w}d}'.format(
258  e=element, nw=name_width, w=column_width) for element in diff_val])
259  logging.info('%s has %d item(s) out of tolerance:\n%s',
260  text_name, len(diff_val), dump)
261  if (len(missing_ref)) > 0:
262  good = False
263  dump = '\n'.join([' {e[0]:s}'.format(e=element) for element in missing_ref])
264  logging.info('%s has %d item(s) missing in the reference:\n%s',
265  text_name, len(missing_ref), dump)
266  if (len(missing_val)) > 0:
267  good = False
268  dump = '\n'.join([' {e[0]:s}'.format(e=element) for element in missing_val])
269  logging.info('%s has %d item(s) missing with respect to the reference:\n%s',
270  text_name, len(missing_val), dump)
271  if good:
272  logging.info('%s is matching the reference', text_name)
273  results.append(0)
274  else:
275  results.append(1)
276  return max(results)
277 
278 
279 def print_counts(json_dict):
280  for text_name in json_dict.keys():
281  if text_name == total_events_key:
282  logging.info('%s: %d', text_name, json_dict[text_name]['count'])
283  continue
284  hist_name = json_dict[text_name]['hist_name']
285  counts = json_dict[text_name]['counts']
286  no_ref = True
287  for item_counts in counts.values():
288  if item_counts['ref_count'] != 'n/a':
289  no_ref = False
290  break
291  dump_lines = []
292  for item_name, item_counts in counts.items():
293  v = item_counts['count']
294  line = ' {name:{nw}s} {val:>{w}s}'.format(name=item_name, val=str(v), nw=name_width, w=column_width)
295  if not no_ref:
296  ref_v = item_counts['ref_count']
297  diff = item_counts['ref_diff']
298  line += ' {val:>{w}s}'.format(val=str(ref_v), w=column_width)
299  if diff:
300  line += ' <<<<<<<<<<'
301  dump_lines.append(line)
302  logging.info('Writing %s counts from histogram %s:\n%s', text_name, hist_name, '\n'.join(dump_lines))
303 
304 
305 def format_txt_count(count):
306  if type(count) is int:
307  return '{val:>{w}d}'.format(val=count, w=column_width)
308  elif type(count) is not str:
309  logging.error('Unexpected count type %s', type(count))
310  count = 'ERROR'
311  if count == 'n/a':
312  count = '-'
313  return '{val:>{w}s}'.format(val=count, w=column_width)
314 
315 
316 def write_txt_output(json_dict, diff_only=False, printHeader=False):
317  for text_name in sorted(json_dict.keys()):
318  if text_name == total_events_key:
319  logging.info('Writing total event count to file %s.txt', text_name)
320  with open('{:s}.txt'.format(text_name), 'w') as outfile:
321  outfile.write('{:d}\n'.format(json_dict[text_name]['count']))
322  continue
323  hist_name = json_dict[text_name]['hist_name']
324  logging.info('Writing counts from histogram %s to file %s.txt', hist_name, text_name)
325  counts = json_dict[text_name]['counts']
326  no_ref = True
327  for item_counts in counts.values():
328  if item_counts['ref_count'] != 'n/a':
329  no_ref = False
330  break
331  with open('{:s}.txt'.format(text_name), 'w') as outfile:
332  if printHeader:
333  line = '{name:{nw}s}'.format(name='chain', nw=name_width)
334  if not no_ref:
335  line += '{name:{cw}s}'.format(name='test', cw=column_width) + 'ref \n'
336  else:
337  line += 'test \n'
338  outfile.write(line)
339  for item_name, item_counts in counts.items():
340  v = item_counts['count']
341  line = '{name:{nw}s} '.format(name=item_name, nw=name_width) + format_txt_count(v)
342  if not no_ref:
343  ref_v = item_counts['ref_count']
344  diff = item_counts['ref_diff']
345  line += ' ' + format_txt_count(ref_v)
346  if diff:
347  line += ' <<<<<<<<<<'
348  elif diff_only:
349  line = None
350  if line:
351  outfile.write(line+'\n')
352 
353 
354 def make_light_dict(full_dict, includeL1Counts):
355  # 3 nested dictionaries of int
356  light_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
357 
358  def extract_steps(in_name, out_name):
359  for name,c in full_dict[in_name]['counts'].items():
360  if c['count']==0:
361  continue
362  chain_name, chain_step = name.split('_Step')
363  light_dict[chain_name][out_name][int(chain_step)] = c['count']
364 
365  # Change step dictionary to consecutive list of steps
366  for chain_name in light_dict.keys():
367  steps = light_dict[chain_name][out_name]
368  light_dict[chain_name][out_name] = {i:steps[k] for i,k in enumerate(sorted(steps.keys()))}
369 
370  extract_steps('HLTStep', 'stepCounts')
371  extract_steps('HLTDecision', 'stepFeatures')
372 
373  # Add total chain count and skip total / groups / streams
374  for chain_name,c in full_dict['HLTChain']['counts'].items():
375  light_dict[chain_name]['eventCount'] = c['count']
376 
377  if any(chain_name.startswith(s) for s in ['All', 'grp_', 'str_']):
378  del light_dict[chain_name]
379 
380  if includeL1Counts and 'L1AV' in full_dict:
381  light_dict.update(
382  {name:{"eventCount": counts["count"]} for name,counts in full_dict["L1AV"]["counts"].items()}
383  )
384 
385  return light_dict
386 
387 
388 def main():
389  args = get_parser().parse_args()
390  logging.basicConfig(stream=sys.stdout,
391  format='%(levelname)-8s %(message)s',
392  level=logging.DEBUG if args.verbose else logging.INFO)
393 
394  name_dict = parse_name_dict(args.histDict)
395 
396 
399 
400  in_file = open_root_file(args.inputFile)
401  if not in_file:
402  logging.error('Failed to open input file %s', args.inputFile)
403  return 1
404  logging.debug('Opened input file %s', args.inputFile)
405 
406  if args.referenceFile:
407  ref_file = open_root_file(args.referenceFile)
408  if not ref_file:
409  logging.error('Failed to open input file %s', args.referenceFile)
410  return 1
411  logging.debug('Opened input file %s', args.referenceFile)
412 
413 
416 
417  in_hists = load_histograms(in_file, args.countHists)
418  if len(in_hists) == 0:
419  logging.error('No count histograms could be loaded.')
420  return 1
421  logging.info('Loaded count histograms: %s', sorted(in_hists.keys()))
422 
423  in_total_hists = load_histograms(in_file, args.totalHists)
424  if len(in_total_hists) == 0:
425  logging.error('No total-events histogram could be loaded')
426  return 1
427  items = list(in_total_hists.items())
428  in_total = items[0][1].GetEntries()
429  logging.info('Loaded total-events histogram %s, number of events: %d',
430  items[0][0], in_total)
431 
432  ref_hists = None
433  ref_total_hists = None
434  ref_total = None
435  if args.referenceFile:
436  ref_hists = load_histograms(ref_file, args.countHists)
437  logging.info('Loaded reference count histograms: %s', sorted(ref_hists.keys()))
438  missing_refs = [k for k in in_hists.keys() if k not in ref_hists.keys()]
439  if len(missing_refs) > 0:
440  logging.error('Count histogram(s) %s missing in the reference', missing_refs)
441  return 1
442  ref_total_hists = load_histograms(ref_file, args.totalHists)
443  if len(ref_total_hists) == 0:
444  logging.error('No total-events reference histogram could be loaded')
445  return 1
446  ref_total = list(ref_total_hists.values())[0].GetEntries()
447  logging.info('Loaded total-events reference histogram %s, number of events: %d',
448  list(ref_total_hists.keys())[0], ref_total)
449 
450 
453 
454  json_dict = OrderedDict()
455  json_dict[total_events_key] = OrderedDict()
456  json_dict[total_events_key]['hist_name'] = list(in_total_hists.keys())[0]
457  json_dict[total_events_key]['count'] = int(in_total)
458  json_dict[total_events_key]['ref_count'] = int(ref_total) if ref_total else 'n/a'
459 
460  for hist_name, hist in in_hists.items():
461  text_name = get_text_name(hist_name, name_dict)
462  if text_name in json_dict.keys():
463  logging.error(
464  'Name "%s" assigned to more than one histogram, ', text_name,
465  'results would be overwritten. Use --countHists and ',
466  '--histDict options to avoid duplicates. Exiting.')
467 
468  rowLabel = 'Express' if 'Express' in text_name else 'Output'
469  counts = get_2D_counts(hist) if text_name in ['HLTStep', 'HLTDecision'] else get_counts(hist, rowLabel)
470  ref_counts = {}
471  if ref_hists:
472  ref_hist = ref_hists[hist_name]
473  ref_counts = get_2D_counts(ref_hist) if text_name in ['HLTStep', 'HLTDecision'] else get_counts(ref_hist, rowLabel)
474  d = make_counts_json_dict(counts, ref_counts)
475 
476  json_dict[text_name] = OrderedDict()
477  json_dict[text_name]['hist_name'] = hist_name
478  json_dict[text_name]['counts'] = d
479 
480 
483 
484  retcode = 0
485  if args.referenceFile:
486  logging.info('Comparing counts to reference')
487  retcode = compare_ref(json_dict, args.fracTolerance, args.intTolerance)
488 
489  if args.printOnly and not args.diffOnly:
490  logging.info('Printing counts instead of dumping to files because of --printOnly option')
491  print_counts(json_dict)
492 
493  if not args.printOnly:
494  write_txt_output(json_dict, args.diffOnly, args.printHeader)
495 
496  if args.json:
497  logging.info('Writing results to %s', args.json)
498  with open(args.json, 'w') as outfile:
499  json.dump(json_dict, outfile, sort_keys=True)
500 
501  if args.yaml:
502  logging.info('Writing results extract to %s', args.yaml)
503  light_dict = make_light_dict(json_dict, includeL1Counts = args.yamlL1)
504  with open(args.yaml, 'w') as outfile:
505  yaml.dump(light_dict, outfile, sort_keys=True)
506 
507  return retcode
508 
509 
510 if '__main__' in __name__:
511  sys.exit(main())
chainDump.open_root_file
def open_root_file(file_path)
Definition: chainDump.py:114
max
#define max(a, b)
Definition: cfImp.cxx:41
vtune_athena.format
format
Definition: vtune_athena.py:14
chainDump.get_text_name
def get_text_name(hist_name, name_dict)
Definition: chainDump.py:211
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
chainDump.get_parser
def get_parser()
Definition: chainDump.py:24
chainDump.get_counts
def get_counts(hist, rowLabel='Output')
Definition: chainDump.py:134
chainDump.parse_name_dict
def parse_name_dict(name_dict_as_list)
Definition: chainDump.py:201
chainDump.print_counts
def print_counts(json_dict)
Definition: chainDump.py:279
chainDump.compare_ref
def compare_ref(json_dict, thr_frac, thr_num)
Definition: chainDump.py:229
chainDump.make_counts_json_dict
def make_counts_json_dict(in_counts, ref_counts)
Definition: chainDump.py:185
chainDump.main
def main()
Definition: chainDump.py:388
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
chainDump.make_light_dict
def make_light_dict(full_dict, includeL1Counts)
Definition: chainDump.py:354
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
chainDump.count_diff
def count_diff(count_in, count_ref, total_in, total_ref, thr_frac, thr_num)
Definition: chainDump.py:218
chainDump.get_2D_counts
def get_2D_counts(hist)
Definition: chainDump.py:160
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
chainDump.write_txt_output
def write_txt_output(json_dict, diff_only=False, printHeader=False)
Definition: chainDump.py:316
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:224
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
TrigJetMonitorAlgorithm.items
items
Definition: TrigJetMonitorAlgorithm.py:79
Trk::open
@ open
Definition: BinningType.h:40
chainDump.load_histograms
def load_histograms(root_file, hist_paths)
Definition: chainDump.py:122
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
generate::GetEntries
double GetEntries(TH1D *h, int ilow, int ihi)
Definition: rmsFrac.cxx:20
confTool.parse_args
def parse_args()
Definition: confTool.py:35
str
Definition: BTagTrackIpAccessor.cxx:11
readCCLHist.float
float
Definition: readCCLHist.py:83
chainDump.format_txt_count
def format_txt_count(count)
Definition: chainDump.py:305