ATLAS Offline Software
covarianceTool.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 
4 """
5 The `covarianceTool.py` executable dis a helper which uses the functions provided by `covarianceToolsLibrary.py` to perform the
6 comparison between MC predictions and HEPData records.
7 
8 However, the `local/bin/covarianceToolsLibrary.py` can also be used directly to manipulate covariance info and evaluate goodness of fit.
9 Examples of how to do this are provided in the `examples` subdirectory:
10 - `examples/covarianceTool-examples/README.md`
11 
12 The first thing to do is test you can access the `covarianceTool.py` executable and see available options:
13 ```
14 covarianceTool.py -h
15 ```
16 
17 Author: Louie D. Corpe (CERN)
18 Email: l.corpe@cern.ch
19 """
20 import os
21 import yaml
22 import yoda
23 import rivet
24 import optparse
25 import urllib2
26 import tarfile
27 import StringIO
28 import systematicsTool as st
29 import covarianceToolsLibrary as ct
30 
31 parser = optparse.OptionParser(usage="%prog [options]")
32 parser.add_option("--data", help="Input yoda file for data [%default]", dest="data", default="")
33 parser.add_option("--mc", help="Input yoda files for mc (comma separarated list), specify mc label with ':', eg mc1path:'MC1', mc2path:'MC2'. If no label is specified, the name of the yoda file is used as a label [%default]", dest="mc", default="")
34 parser.add_option("--use_toys", help="Force the covariance matrix to be produced with toys. By default the cov matrix is produced by direct propagation is errors are symmetric and with toys if errors are asymmetric. [%default]", dest="use_toys", default=False, action='store_true')
35 parser.add_option("--ntoys", help="number of toys to throw if cov matrix produced from toys [%default]", dest="ntoys", default=1000)
36 parser.add_option("--use_direct", help="Force the covariance matrix to be produced via direct propagation (the errors are symmetrized if needed). By default the cov matrix is produced by direct propagation if errors are symmetric and with toys if errors are asymmetric. [%default]", dest="use_direct", default=False, action='store_true')
37 parser.add_option("--corr_data", help="Input file/dir to build the covariance matrices (comma separarated list) [%default]", dest="corr_data", default=None)
38 parser.add_option("--corr_mc", help="Input file/dir to build the covariance matrices (comma separarated list) [%default]", dest="corr_mc", default=None)
39 parser.add_option("--ignore_corrs", help="ignore correlations in both data and mc and just assume total error is uncorrelated between bins [%default]", dest="ignore_corrs", default=False, action='store_true')
40 parser.add_option("--ignore_corrs_mc", help="Ignore correlations in the MC [%default]", dest="ignore_corrs_mc", default=False, action='store_true')
41 parser.add_option("--ignore_corrs_data", help="Ignore correlations in theData [%default]", dest="ignore_corrs_data", default=False, action='store_true')
42 parser.add_option("--ignore_errs_data", help="Ignore all errors in the data, and use only MC cov matrix [%default]", dest="ignore_errs_data", default=False, action='store_true')
43 parser.add_option("--ignore_errs_mc", help="Ignore all erros in the MC, and use only the data cov matrix [%default]", dest="ignore_errs_mc", default=False, action='store_true')
44 parser.add_option("--get_corr_from_web", help="Try to get correlation info from HEPMC record. Specify the inspire ID eg I1514251 [%default]", dest="get_corr_from_web", default=None)
45 parser.add_option("--filter", help="Only process a subset of histograms (comma separated) [%default]", dest="filter", default="all")
46 parser.add_option("-v", "--verbosity", help="Verbosity level (0-9) [%default]", dest="verbosity", default=0)
47 parser.add_option("--make_super_aos", help="Make super ao of all measuremnts passing --filter, and calculate the chi2 for all measurements using a super-cov-matrix [%default]", dest="make_super_aos", default=False, action='store_true')
48 parser.add_option("--slides", help="Make summary slides [%default]", dest="slides", default=False, action='store_true')
49 (opts, args) = parser.parse_args()
50 
51 os.environ['TERM'] = 'linux'
52 
53 histograms = {
54  'data': None,
55  'covariance': {},
56  'models': {},
57 }
58 
59 
60 def isFloat(x):
61  # check if the object can be cast as a float
62  try:
63  float(x)
64  return True
65  except ValueError:
66  return False
67 
68 
70  print("convertHistToScatter: This is a placeholder, need to implement")
71  exit(1)
72 
73 
74 def renameAOs(fin_name, aoMap):
75  # make a new YODA file where the AOs are renamed as per the reference file
76  fout_name = fin_name.replace(".yoda", ".renamed.yoda")
77  fout = open(fout_name, 'w')
78  aos = yoda.read(fin_name, unpatterns=".*RAW.*")
79  for aopath, ao in aos.items():
80  foundMatch = False
81  for ao1, ao2 in aoMap.items():
82  if ao1.name() == ao.name():
83  ao.setAnnotation("Path", ao2.path().replace("REF/", ""))
84  foundMatch = True
85  if ao2.name() == ao.name():
86  ao.setAnnotation("Path", ao1.path().replace("REF/", ""))
87  foundMatch = True
88  if foundMatch: yoda.writeYODA(ao, fout)
89  fout.close()
90  return fout_name
91 
92 
93 def makeBeamer(plotDictionary, outdir):
94  # produce summary slides using beamer
95  frames = []
96  dataDir = os.environ["SYSTTOOLSPATH"] + "/data/"
97  os.system("mkdir -p %s " % outdir)
98  samples = plotDictionary['chi2-value'].keys()
99  histos = plotDictionary['chi2-value'][samples[0]].keys()
100  os.system('cp %s/title.tex %s/title.tex' % (dataDir, outdir))
101  os.system('cp %s/title.tex %s/title.tex' % (dataDir, outdir))
102  os.system("sed -i -e 's|!TITLE!|%s|g' %s/title.tex" % (opts.data.replace('.yoda', '').replace('_', ' '), outdir))
103  frames.append("%s/title.tex" % outdir)
104  for histo in histos:
105  if 'superAO' in histo: continue
106  os.system('cp %s/section.tex %s/%s_section.tex' % (dataDir, outdir, histo))
107  os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_section.tex" % (histo, outdir, histo))
108  frames.append('%s/%s_section.tex' % (outdir, histo))
109  os.system('cp %s/errorAnalysis.tex %s/%s_%s_errorAnalysis.tex' % (dataDir, outdir, 'data', histo))
110  os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_%s_errorAnalysis.tex" % (histo, outdir, 'data', histo))
111  os.system("sed -i -e 's|!SAMPLE!|%s|g' %s/%s_%s_errorAnalysis.tex" % ('data', outdir, 'data', histo))
112  os.system("sed -i -e 's|!ERRORBREAKDOWN!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary['error-analysis']['data'][histo], outdir, 'data', histo))
113  os.system("sed -i -e 's|!COVMATRIX!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary['correlation-matrix']['data'][histo], outdir, 'data', histo))
114  os.system("sed -i -e 's|!COVDETAILS!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary['covDetails']['data'][histo], outdir, 'data', histo))
115  frames.append("%s/%s_%s_errorAnalysis.tex" % (outdir, 'data', histo))
116  for sample in samples:
117  sampleNameClean = sample.replace("_", " ").replace("-", " ")
118  os.system('cp %s/dataMCComparison.tex %s/%s_%s_dataMCComparison.tex' % (dataDir, outdir, sample, histo))
119  os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_%s_dataMCComparison.tex" % (histo, outdir, sample, histo))
120  os.system("sed -i -e 's|!SAMPLE!|%s|g' %s/%s_%s_dataMCComparison.tex" % (sampleNameClean, outdir, sample, histo))
121  os.system("sed -i -e 's|!DATAVSMC!|%s|g' %s/%s_%s_dataMCComparison.tex" % (plotDictionary['data-vs-mc'][sample][histo], outdir, sample, histo))
122  os.system("sed -i -e 's|!CHI2CONTRIBS!|%s|g' %s/%s_%s_dataMCComparison.tex" % (plotDictionary['chi2-contribs'][sample][histo], outdir, sample, histo))
123  frames.append("%s/%s_%s_dataMCComparison.tex" % (outdir, sample, histo))
124  # summary
125  os.system('cp %s/sectionSummary.tex %s/%s_sectionSummary.tex' % (dataDir, outdir, histo))
126  os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_sectionSummary.tex" % (histo, outdir, histo))
127  os.system("sed -i -e 's|!SUMMARYPLOT!|%s|g' %s/%s_sectionSummary.tex" % (plotDictionary['summary-plot'][histo], outdir, histo))
128  os.system("sed -i -e 's|!SUMMARYTABLE!|%s|g' %s/%s_sectionSummary.tex" % (plotDictionary['summary-table'][histo], outdir, histo))
129  frames.append("%s/%s_sectionSummary.tex" % (outdir, histo))
130  os.system('cp %s/section.tex %s/%s_section.tex' % (dataDir, outdir, 'overall'))
131  os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_section.tex" % ('overall summary', outdir, 'overall'))
132  frames.append('%s/%s_section.tex' % (outdir, 'overall'))
133  os.system('cp %s/overallSummary.tex %s/%s_overallSummary.tex' % (dataDir, outdir, 'overall'))
134  os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_overallSummary.tex" % ('overall summary', outdir, 'overall'))
135  os.system("sed -i -e 's|!SUMMARYTABLE!|%s|g' %s/%s_overallSummary.tex" % (plotDictionary['summary-table']['all'], outdir, 'overall'))
136  frames.append("%s/%s_overallSummary.tex" % (outdir, 'overall'))
137  frames.append("%s/end.tex" % dataDir)
138  catLine = " ".join(frames)
139  os.system(" cat %s > %s/slides_%s.tex" % (catLine, outdir, opts.data.replace('.yoda', '')))
140  return " %s/slides_%s.tex" % (outdir, opts.data.replace('.yoda', ''))
141 
142 
143 def makeSummaryTable(plotDictionary, outfile, hdataName=None):
144  # make a summary latex table of the chi2 values for each AO
145  outf = open(outfile, 'w')
146  samples = plotDictionary['chi2-value'].keys()
147  if (hdataName):
148  histos = [hdataName]
149  else:
150  histos = sorted(plotDictionary['chi2-value'][samples[0]].keys())
151 
152  line = "\\begin{tabular}{|l|"
153  for hist in histos:
154  line += "c|"
155  line += "}"
156  outf.write(line + '\n')
157  outf.write("\\hline " + '\n')
158  line = " $\\chi^2$ / ndof "
159  for hist in histos:
160  if 'superAO' in hist: hist = 'global agreement'
161  line += " & %s " % hist
162  line += "\\\\"
163  outf.write(line + '\n')
164  outf.write("\\hline" + '\n')
165  for sample in samples:
166  line = " %s " % sample.replace("-", " ").replace("_", " ")
167  for hist in histos:
168  line += " & %s" % plotDictionary['chi2-value'][sample][hist]
169  line += "\\\\"
170  outf.write(line + '\n')
171  outf.write("\\hline" + '\n')
172  outf.write("\\end{tabular}" + '\n')
173  outf.close()
174 
175 
176 def matchAOs(aoList1, aoList2, strategy=0, recursive=True):
177  # try to establish a 1-1 mapping between AOs in two YODA files
178  # eg in case of different naming conventions
179  aoMap = {}
180  for ao1 in aoList1:
181  aoMap.setdefault(ao1, [])
182  for ao2 in aoList2:
183  nBinsMatch = False
184  namesMatch = False
185  namesPartialMatch = False
186  binWidthsMatch = False
187  if (ao2.name() in ao1.name()): namesMatch = True
188  if (ao1.name() in ao2.name()): namesMatch = True
189  ao1NameTokens = ao1.name().split("/")[-1].split("-")
190  ao2NameTokens = ao2.name().split("/")[-1].split("-")
191  if len(list(set(ao1NameTokens).intersection(ao2NameTokens))) >= 2: namesPartialMatch = True
192  if (ao1.numPoints() == ao2.numPoints()):
193  nBinsMatch = True
194  for ipt in range(ao1.numPoints()):
195  ao1BinLabels = "%.2f - %.2f" % (ao1.points()[ipt].xMin(), ao1.points()[ipt].xMax())
196  ao2BinLabels = "%.2f - %.2f" % (ao2.points()[ipt].xMin(), ao2.points()[ipt].xMax())
197  if (ao1BinLabels == ao2BinLabels): binWidthsMatch = True
198  if strategy == 0:
199  if (namesMatch and nBinsMatch and binWidthsMatch):
200  aoMap.setdefault(ao1, []).append(ao2)
201  elif strategy == 1:
202  if (nBinsMatch and binWidthsMatch):
203  aoMap.setdefault(ao1, []).append(ao2)
204  elif strategy == 2:
205  if (nBinsMatch and binWidthsMatch and namesPartialMatch):
206  aoMap.setdefault(ao1, []).append(ao2)
207 
208  if (len(aoMap.keys()) == len(aoList1)) and ([len(aoMap[ao]) for ao in aoList1]) == ([1 for ao in aoList1]):
209  if (opts.verbosity > 0): print("[INFO] found 1-1 mapping between aoLists using strategy %d:" % strategy)
210  for ao1 in aoList1:
211  aoMap[ao1] = aoMap[ao1][0]
212  if (opts.verbosity > 1): print("%s --> %s" % (ao1.name(), aoMap[ao1].name()))
213  elif (recursive and strategy < 3):
214  if (opts.verbosity > 0):
215  print("[WARNING] Could not establish 1-1 mapping between aoLists using strategy", strategy, ", try strategy ", strategy + 1)
216  for ao1 in aoList1:
217  if (opts.verbosity > 1): print("%s --> %s" % (ao1.name(), aoMap[ao1]))
218  strategy += 1
219  aoMap = matchAOs(aoList1, aoList2, strategy)
220  else:
221  print("[ERROR] could not match AOS in the Data and MC files. Please make sure the AOs have the same binning/ number of bins (and name if possible!)")
222  print("[ERROR] aos from list 1", aoList1)
223  print("[ERROR] aos from list 2", aoList2)
224  exit(1)
225  return aoMap
226 
227 
228 def markupYODAwithCorrelationsFromYODAs(reference, correlationsDir):
229  # produce a new YODA file where the AOs have been marked up with
230  # covariance info from an uncertainty breakdown provided as
231  # a separate YODA file for each systematic variation
232  reference_out = reference.replace(".yoda", "_corrs.yoda")
233  reference_outf = open(reference_out, 'w')
234  infiles = {}
235  if correlationsDir == 'self':
236  infiles.setdefault(reference, [])
237  else:
238  for f in os.listdir(correlationsDir):
239  if "all" in f: continue
240  infiles.setdefault(correlationsDir + "/" + f, [])
241  nBins = -1
242  binLabels = {}
243  binValues = {}
244  binErrors = {}
245 
246  histos = [name for name in yoda.read(reference, unpatterns=".*RAW.*")]
247  for histname in histos:
248  hAll = yoda.read(reference, unpatterns=".*RAW.*")[histname]
249  if type(hAll) is yoda.core.Histo1D: hAll = hAll.mkScatter()
250  for f in infiles.keys():
251  systname = f.split("/")[-1].replace(".yoda", "")
252  h = yoda.read(f, unpatterns=".*RAW.*")[histname]
253  if type(h) is yoda.core.Scatter2D:
254  nBins = h.numPoints()
255  for ipt in range(nBins):
256  binValues[ipt] = h.points()[ipt].y()
257  binLabels[ipt] = "%f - %f" % (h.points()[ipt].xMin(), h.points()[ipt].xMax())
258  errs = h.points()[ipt].yErrs()
259  errAv = (abs(errs[1]) + abs(errs[0])) * 0.5
260  binErrors.setdefault(ipt, {}).setdefault(systname, {})['up'] = errs[1]
261  binErrors.setdefault(ipt, {}).setdefault(systname, {})['dn'] = -1 * errs[0]
262  elif type(h) is yoda.core.Histo1D:
263  nBins = h.numBins
264  for ipt in range(nBins):
265  binValues[ipt] = h.bins()[ipt].sumW()
266  binLabels[ipt] = "%f - %f" % (h.bins()[ipt].xMin(), h.bins()[ipt].xMax())
267  errAv = h.bins()[ipt].sumW2()
268  binErrors.setdefault(ipt, {}).setdefault(systname, {})['up'] = errAv
269  binErrors.setdefault(ipt, {}).setdefault(systname, {})['dn'] = -1 * errAv
270  else: continue
271  if (reference != ""):
272  smallestDiff = 999.
273  for f in reference.split(","):
274  hists = yoda.read(f, unpatterns=".*RAW.*")
275  for name in hists:
276  if not type(hists[name]) is yoda.core.Scatter2D: continue
277  nominal = [(hists[name].points()[i].y()) for i in range(hists[name].numPoints())]
278  if (len(nominal) != nBins): continue
279  totalDiff = 0
280  for i in range(nBins):
281  totalDiff += abs(binValues[i] - nominal[i])
282  if (totalDiff < smallestDiff):
283  smallestDiff = totalDiff
284 
285  corrs = yaml.dump(binErrors, default_flow_style=True, default_style='', width=1e6)
286  hAll.setAnnotation("ErrorBreakdown", corrs)
287  yoda.writeYODA(hAll, reference_outf)
288  reference_outf.close()
289  return reference_out
290 
291 
292 def markupYODAwithCorrelationsFromYAML(reference, correlationsDir):
293  # produce a new YODA file where the AOs have been marked up with
294  # covariance info from an uncertainty breakdown provided as
295  # one or more YAML files from HEPData
296  reference_out = reference.replace(".yoda", "_corrs.yoda")
297  reference_outf = open(reference_out, 'w')
298  infiles = {}
299  for f in os.listdir(correlationsDir):
300  if 'submission' in f: continue
301  if 'comb' in f: continue
302  if "syst" in f:
303  infiles.setdefault(correlationsDir + "/" + f.replace('syst', 'xsect'), [])
304  else:
305  infiles.setdefault(correlationsDir + "/" + f, [])
306 
307  for f in infiles.keys():
308  stream = open(f, "r")
309  doc = yaml.load(stream)
310  binLabels = {}
311  binValues = {}
312  binErrors = {}
313  for itemtype, itemarray in doc.items():
314  if itemtype == "independent_variables": # bin labels
315  for line in itemarray:
316  binLabels['name'] = itemarray[0]['header']['name']
317  counter = 0
318  for entry in itemarray[0]['values']:
319  if ('low' in entry.keys() and 'high' in entry.keys()):
320  binLabels[counter] = "%f - %f" % (entry['low'], entry['high'])
321  else:
322  binLabels[counter] = "%f" % (entry['value'])
323  counter += 1
324 
325  if itemtype == "dependent_variables": # values
326  for line in itemarray:
327  binValues['name'] = "%s [%s]" % (line['header']['name'], '')
328  binErrors['name'] = "uncertainties"
329  counter = 0
330  for entry in line['values']:
331  binValues[counter] = entry['value']
332  binErrors[counter] = {}
333  if 'errors' in entry.keys():
334  for error in entry['errors']:
335  binErrors[counter][error['label']] = {}
336  if 'symerror' in error.keys():
337  if not isFloat(error['symerror']) and '%' in (error['symerror']):
338  binErrors[counter][error['label']]['up'] = binValues[counter] * 0.01 * float(error['symerror'].replace('%', ''))
339  binErrors[counter][error['label']]['dn'] = binValues[counter] * -0.01 * float(error['symerror'].replace('%', ''))
340  else:
341  binErrors[counter][error['label']]['up'] = float(error['symerror'])
342  binErrors[counter][error['label']]['dn'] = -1 * float(error['symerror'])
343  elif 'asymerror' in error.keys():
344  if not isFloat(error['asymerror']['plus']) and '%' in error['asymerror']['plus']:
345  binErrors[counter][error['label']]['up'] = binValues[counter] * 0.01 * float(error['asymerror']['plus'].replace('%', ''))
346  binErrors[counter][error['label']]['dn'] = binValues[counter] * 0.01 * float(error['asymerror']['minus'].replace('%', ''))
347  else:
348  binErrors[counter][error['label']]['up'] = float(error['asymerror']['plus'])
349  binErrors[counter][error['label']]['dn'] = 1 * float(error['asymerror']['minus'])
350  else:
351  print('[ERROR] errors are neither symmetric or asymmetric... exit!')
352  exit(1)
353  counter += 1
354 
355  if 'xsect' in f:
356  systNames = binErrors[0].keys()
357  binErrorsGranular = {}
358  systFile = f.replace("xsect", "syst")
359  streamsyst = open(systFile, "r")
360  docsyst = yaml.load(streamsyst)
361  binErrorsGranular['name'] = "uncertainties"
362  for itemtype, itemarray in docsyst.items():
363  if "variables" not in itemtype: continue
364  if (itemtype == "independent_variables"):
365  systNames = [v['value'] for v in itemarray[0]['values']]
366  else:
367  binCounter = -1
368  for line in itemarray:
369  binCounter += 1
370  binErrorsGranular[binCounter] = {}
371  if 'values' not in line.keys(): continue
372  systCounter = -1
373  for error in line['values']:
374  systCounter += 1
375  binErrorsGranular[binCounter][systNames[systCounter]] = {}
376  binErrorsGranular[binCounter][systNames[systCounter]]['up'] = float(error['value']) * binValues[binCounter] * 0.01
377  binErrorsGranular[binCounter][systNames[systCounter]]['dn'] = -1 * float(error['value']) * binValues[binCounter] * 0.01
378  for i in range(len(binErrorsGranular.keys()) - 1):
379  binErrorsGranular[i]['stat'] = {}
380  binErrorsGranular[i]['stat']['up'] = binErrors[i]['stat']['up']
381  binErrorsGranular[i]['stat']['dn'] = binErrors[i]['stat']['dn']
382  binErrors = binErrorsGranular
383  nBins = len(binLabels) - 1
384  infiles[f] = [binValues, binErrors]
385 
386  hists = yoda.read(reference, unpatterns=".*RAW.*")
387  for name in hists:
388  nearestMatch = ""
389  smallestDiff = 999.
390  nearestMatchErrors = ""
391  nominal = [(hists[name].points()[i].y) for i in range(hists[name].numPoints())]
392  for f, corrs in infiles.items():
393  binValues = corrs[0]
394  binErrors = corrs[1]
395  nBins = len(binValues) - 1
396  if (len(nominal) != nBins): continue
397  totalDiff = 0
398  for i in range(nBins):
399  totalDiff += abs(binValues[i] - nominal[i])
400  if (totalDiff < smallestDiff):
401  nearestMatch = f
402  smallestDiff = totalDiff
403  nearestMatchErrors = binErrors
404  if (opts.verbosity > 1): print("[DEBUG] candidate", nominal, " nearest match is ", nearestMatch)
405  if smallestDiff < 1.0:
406  if (opts.verbosity > 1): print("[INFO] mapping", name, " ---> ", nearestMatch, (smallestDiff))
407  corrs = yaml.dump(nearestMatchErrors, default_flow_style=True, default_style='', width=1e6)
408  hists[name].setAnnotation("ErrorBreakdown", corrs)
409  yoda.writeYODA(hists[name], reference_outf)
410  else:
411  print("[WARNING] Warning, no match found for", name)
412  print("[INFO] %s has been marked up with correlation info from %s in this file:" % (reference, correlationsDir))
413  print("[INFO] ---> %s " % (reference_out))
414  print("[INFO] next time, you can use this file as an input instead of ", reference)
415  reference_outf.close()
416  return reference_out
417 
418 
419 def markupYODAwithCorrelations(ref, corrDir, ignoreCorrelations):
420  # produce a new YODA file where the AOs have been marked up with
421  # covariance info from an uncertainty breakdown provided as
422  # either YAML or YODA files
423  isYoda = False
424  if ignoreCorrelations:
425  return markupYODAwithCorrelationsFromYODAs(ref, 'self')
426  else:
427  if (corrDir is None): return ref
428  for f in os.listdir(corrDir):
429  if '.yoda' in f: isYoda = True
430  if isYoda:
431  return markupYODAwithCorrelationsFromYODAs(ref, corrDir)
432  else:
433  return markupYODAwithCorrelationsFromYAML(ref, corrDir)
434 
435 
436 def returnFileContents(filename, inputType='data'):
437  result = []
438  aos = yoda.read(filename, unpatterns=".*RAW.*")
439  for aopath, ao in aos.items():
440  aoType = type(ao)
441  if aoType in [yoda.Histo1D, yoda.Histo2D]: ao = ao.mkScatter()
442  if aoType in [yoda.Counter]: continue
443  if 'all' not in opts.filter and inputType == 'data':
444  for f in opts.filter:
445  if f in aopath:
446  result.append(ao)
447  else:
448  result.append(ao)
449 
450  covInfoMissing = []
451  for ao in result:
452  corr = ao.annotation("ErrorBreakdown")
453  if (corr is None):
454  covInfoMissing.append(ao)
455  if (len(covInfoMissing) and inputType == 'data' and not opts.ignore_corrs and not opts.corr_data):
456  print("[WARNING] These DATA analysis objects are missing the correlation/covariance information:", covInfoMissing)
457  print(" --> To add correlation information, you can use the eg options: --get_corr_from_web, --corr_data ")
458  print(" --> You can also proceed by adding the --ignore_corrs_data in which case the chi2 is calculated")
459  print(" --> from the total uncertainties and assumed to be bin-bin uncorrelated.")
460  inspireID = [t for t in opts.data.split("_") if "I" in t]
461  inspireID = inspireID[0].replace("I", "")
462  if os.path().isdir("corrInfo/ins%s" % inspireID):
463  print("[INFO] Attempting to use the information in corrInfo/ins%s to build correlation info " % inspireID)
464  opts.corr_data = "corrInfo/ins%s" % inspireID
465  else:
467 
468  if (len(covInfoMissing) and inputType == 'mc' and not opts.ignore_corrs and not opts.corr_mc):
469  print("[WARNING] These MC analysis objects are missing the correlation/covariance information:", covInfoMissing)
470  print(" --> To add correlation information, you can use the eg options: --corr_mc ")
471  print(" --> You can also proceed by adding the --ignore_corrs_mc in which case the chi2 is calculated")
472  print(" --> from the total uncertainties and assumed to be bin-bin uncorrelated.")
473  if os.path.isdir("corrInfoMC/%s" % filename.replace(".yoda", "")):
474  print("[INFO] Attempting to use the information in corrInfoMC/%s to build correlation info " % filename.replace(".yoda", ""))
475  opts.corr_mc = "corrInfoMC/%s" % filename.replace(".yoda", "")
476  return result
477 
478 
479 def getCorrelationInfoFromWeb(name=None, dlformat="yoda"):
480  # try to get the YAML files for the corresponding HEPData record
481  # download and untar them...
482  if (name is None): name = opts.data
483  inspireID = [t for t in name.split("_") if "I" in t]
484  inspireID = inspireID[0].replace("I", "")
485  hdurl = "http://www.hepdata.net/record/ins%s?format=%s" % (inspireID, dlformat)
486  print("[INFO] Trying to download information from %s", hdurl)
487  if hdurl:
488  response = urllib2.urlopen(hdurl)
489  download = response.read()
490  if not download or "<html" in download:
491  print(("Problem encountered when getting data from HepData (%s). No reference data file written." % hdurl))
492  else:
493  tar = tarfile.open(mode="r:gz", fileobj=StringIO.StringIO(download))
494  fnames = tar.getnames()
495  if len(fnames) > 0 and dlformat == 'yaml':
496  tar.extractall()
497  os.system("mkdir -p corrInfo/ins%s" % (inspireID))
498  os.system("mv %s/* corrInfo/ins%s/." % (fnames[0], inspireID))
499  os.system("rm -r %s" % (fnames[0]))
500  print("[INFO] successfully downloaded YAML files from HEPMC, will see if they can be used for correlations")
501  if len(fnames) == 1 and dlformat == 'yoda':
502  tar.extractall()
503  os.system("mkdir -p corrInfo/ins%s" % (inspireID))
504  os.system("mv %s corrInfo/ins%s/." % (fnames[0], inspireID))
505  print("[INFO] successfully downloaded YODA files from HEPMC, will see if they can be used for correlations")
506  opts.corr_data = "corrInfo/ins%s" % inspireID
507  response.close()
508 
509 
510 if __name__ == "__main__":
511 
512  opts.ntoys = int(opts.ntoys)
513  opts.verbosity = int(opts.verbosity)
514  opts.filter = opts.filter.split(',')
515  if opts.ignore_corrs:
516  opts.ignore_corrs_data = True
517  opts.ignore_corrs_mc = True
518 
519  mcNames = {}
520  plotDictionary = {'error-analysis': {}, 'covariance-matrix': {}, 'correlation-matrix': {}, 'data-vs-mc': {}, 'chi2-contribs': {}, 'chi2-value': {}, 'covDetails': {}, 'summary-plot': {}, 'summary-table': {}}
521  # access the data and mc histograms
522  if (opts.corr_data is None and opts.get_corr_from_web):
523  getCorrelationInfoFromWeb(opts.get_corr_from_web)
524  if opts.data == "":
525  pass
526  else:
527  histograms['data'] = returnFileContents(opts.data, 'data')
528  if opts.corr_data is not None:
529  opts.data = markupYODAwithCorrelations(opts.data, opts.corr_data, opts.ignore_corrs_data)
530  # if there are correlations to add, do so and then get the aos from the new file
531  histograms['data'] = returnFileContents(opts.data, 'data')
532  outdir = "outputs/%s/data/plots" % (opts.data.replace(".yoda", ""))
533  for hdata in histograms['data']:
534  if not ct.hasValidErrorBreakdown(hdata): continue
535  os.system("mkdir -p %s" % outdir)
536  plotparser = rivet.mkStdPlotParser([], [])
537  headers = plotparser.getHeaders(hdata.path())
538  if 'XLabel' in headers.keys(): XLabel = headers['XLabel']
539  else: XLabel = None
540  if 'Title' in headers.keys(): Title = headers['Title']
541  else: Title = None
542  ct.plotVariations(hdata, outdir, 'data-%s' % hdata.name(), 0.05, xLabel=XLabel, title=Title)
543  plotDictionary['error-analysis'].setdefault('data', {})
544  plotDictionary['error-analysis']['data'][hdata.name()] = outdir + '/data-%s-variations.pdf' % hdata.name()
545  print("[INFO] See plots of data error analysis here:", outdir)
546  if 'data' in plotDictionary['error-analysis'].keys():
547  for k, v in plotDictionary['error-analysis']['data'].items():
548  print(v)
549 
550  for mc in opts.mc.split(","):
551  if mc == "": continue
552  if ":" in mc:
553  mcName = mc.split(":")[1]
554  mc = mc.split(":")[0]
555  else:
556  mcName = mc.split("/")[-1].replace(".yoda", "")
557  histograms['models'][mc] = returnFileContents(mc, 'mc')
558  mcnew = markupYODAwithCorrelations(mc, opts.corr_mc, opts.ignore_corrs_mc)
559  opts.mc = opts.mc.replace(mc, mcnew)
560  # if there are correlations to add, do so and then get the aos from the new file
561  histograms['models'].pop(mc, None)
562  histograms['models'][mcnew] = returnFileContents(mcnew, 'mc')
563  if (opts.data != ''):
564  aoMap = matchAOs(histograms['data'], histograms['models'][mcnew])
565  newmc = renameAOs(mcnew, aoMap)
566  else: newmc = mc
567  opts.mc = opts.mc.replace(mc, newmc)
568  histograms['models'].pop(mcnew, None)
569  histograms['models'][newmc] = returnFileContents(newmc, 'mc')
570  mcNames[newmc] = mcName
571  for hmc in histograms['models'][newmc]:
572  outdir = "outputs/%s/%s/plots" % (opts.data.replace(".yoda", ""), mcName)
573  os.system("mkdir -p %s" % outdir)
574  if not (opts.ignore_errs_mc):
575  ct.plotVariations(hmc, outdir, '%s-%s' % (mcName, hmc.name()), 0.01)
576  plotDictionary['error-analysis'].setdefault(mcName, {})
577  plotDictionary['error-analysis'][mcName][hmc.name()] = outdir + '/%s-%s-variations.pdf' % (mcName, hmc.name())
578  if not (opts.ignore_errs_mc):
579  print("[INFO] See plots of %s variations here" % mcName, outdir + "/validation-plots/")
580  for k, v in plotDictionary['error-analysis'][mcName].items():
581  print(v)
582 
583  # make the super AOs
584  if (opts.make_super_aos):
585  dataSuperAO = ct.makeSuperAO(histograms['data'])
586  histograms['data'].append(dataSuperAO)
587  yoda.writeYODA(dataSuperAO, opts.data.replace('.yoda', '.super-ao.yoda'))
588  for mc in histograms['models'].keys():
589  mcSuperAO = ct.makeSuperAO(histograms['models'][mc])
590  histograms['models'][mc].append(mcSuperAO)
591  yoda.writeYODA(mcSuperAO, mc.replace('.yoda', 'super-ao.yoda'))
592 
593  # Check that both that and MC histograms are available before proceeding
594  if (len(opts.data) == 0):
595  print("[INFO] No data histograms to be processed. Cannot make comparisons, so exiting.")
596  exit(1)
597  if (len(opts.mc) == 0):
598  print("[INFO] No MC histograms to be processed. Cannot make comparisons, so exiting.")
599  exit(1)
600  for hdata in sorted(histograms['data']):
601  if not ct.hasValidErrorBreakdown(hdata): continue
602  mcResults = {}
603  if not ("all" in opts.filter):
604  passFilter = False
605  for fil in opts.filter:
606  if fil in hdata.name(): passFilter = True
607  if not (passFilter or 'superAO' in hdata.name()): continue
608  for model in histograms['models']:
609  aoMap = matchAOs(histograms['data'], histograms['models'][model])
610  hmc = aoMap[hdata]
611  mcName = mcNames[model]
612  outdir = "outputs/%s" % (opts.data.replace(".yoda", ""))
613  os.system("mkdir -p %s/data-vs-%s/plots" % (outdir, mcName))
614 
615 
616  # produce covariance matrix for data
617  # covDetailsData = "Data Covariance Matrix"
618  covDetailsData = "Size of uncertainties across range, and Data Correlation Matrix"
619  if ((opts.use_toys or ct.hasAsymmetricErrors(hdata)) and not opts.use_direct):
620  print("[WARNING] Data has asymmetric errors, producing covariance matrix from %d toys" % opts.ntoys)
621  covData = ct.makeCovarianceMatrixFromToys(hdata, opts.ntoys, opts.ignore_corrs_data)
622  covDetailsData += " produced from %d toys%s" % (opts.ntoys, ", ignoring correlations" if opts.ignore_corrs_data else "")
623  else:
624  covData = ct.makeCovarianceMatrix(hdata, opts.ignore_corrs_data)
625  covDetailsData += " produced from direct propagation%s" % (", ignoring correlations" if opts.ignore_corrs_data else "")
626  plotparser = rivet.mkStdPlotParser([], [])
627  headers = plotparser.getHeaders(hdata.path())
628  if 'XLabel' in headers.keys(): XLabel = headers['XLabel']
629  else: XLabel = None
630  ct.drawMatrix(covData, outdir + "/data/plots/data-%s-covariance-matrix" % hdata.name(), xLabel=XLabel)
631  plotDictionary['covariance-matrix'].setdefault('data', {})
632  plotDictionary['covariance-matrix']['data'][hdata.name()] = outdir + "/data/plots/data-%s-covariance-matrix.pdf" % hdata.name()
633  ct.drawMatrix(ct.correlationMatrix(covData), outdir + "/data/plots/data-%s-correlation-matrix" % hdata.name())
634  plotDictionary['correlation-matrix'].setdefault('data', {})
635  plotDictionary['correlation-matrix']['data'][hdata.name()] = outdir + "/data/plots/data-%s-correlation-matrix.pdf" % hdata.name()
636 
637  # produce covariance matrix for MC
638  covDetailsMC = "MC Covariance Matrix"
639  if ((opts.use_toys or ct.hasAsymmetricErrors(hmc)) and not opts.use_direct):
640  print("[WARNING] MC has asymmetric errors, producing covariance matrix from %d toys" % opts.ntoys)
641  covMC = ct.makeCovarianceMatrixFromToys(hmc, opts.ntoys, opts.ignore_corrs_mc)
642  covDetailsMC += " produced from %d toys%s" % (opts.ntoys, ", ignoring correlations" if opts.ignore_corrs_mc else "")
643  else:
644  covMC = ct.makeCovarianceMatrix(hmc, opts.ignore_corrs_mc)
645  covDetailsMC += " produced from direct propagation%s" % (", ignoring correlations" if opts.ignore_corrs_mc else "")
646  ct.drawMatrix(covMC, outdir + "/%s/plots/%s-%s-covariance-matrix" % (mcName, mcName, hdata.name()))
647  plotDictionary['covariance-matrix'].setdefault(mcName, {})
648  plotDictionary['covariance-matrix'][mcName][hmc.name()] = outdir + "/%s/plots/%s-%s-covariance-matrix.pdf" % (mcName, mcName, hmc.name())
649  ct.drawMatrix(ct.correlationMatrix(covMC), outdir + "/%s/plots/%s-%s-correlation-matrix" % (mcName, mcName, hmc.name()))
650  plotDictionary['correlation-matrix'].setdefault(mcName, {})
651  plotDictionary['correlation-matrix'][mcName][hmc.name()] = outdir + "/%s/plots/%s-%s-correlation-matrix.pdf" % (mcName, mcName, hmc.name())
652 
653  # sum covariance matrices
654  covTotal = covData + covMC
655  if opts.ignore_errs_data and opts.ignore_errs_mc:
656  print("[ERROR] cannot ignore both data errros and MC errors at the same time!")
657  exit(1)
658  elif opts.ignore_errs_data:
659  covTotal = covMC
660  covDetailsData = "Ignoring Data uncertainties (MC cov matrix only)"
661  elif opts.ignore_errs_mc:
662  covTotal = covData
663  covDetailsMC = "Ignoring MC uncertainties (Data cov matrix only)"
664  covDetailsData += ". Ignoring MC uncertainties"
665  plotDictionary['covDetails'].setdefault("%s" % mcName, {})
666  plotDictionary['covDetails'].setdefault("data", {})
667  plotDictionary['covDetails']["data"][hdata.name()] = covDetailsData
668  plotDictionary['covDetails'][mcName][hmc.name()] = covDetailsMC
669 
670  ct.drawMatrix(covMC, outdir + "/data-vs-%s/plots/data-vs-%s-%s-covariance-matrix" % (mcName, mcName, hdata.name()))
671  plotDictionary['covariance-matrix'].setdefault("data-vs-%s" % mcName, {})
672  plotDictionary['covariance-matrix']["data-vs-%s" % mcName][hmc.name()] = outdir + "/data-vs-%s/plots/data-vs-%s-%s-covariance-matrix.pdf" % (mcName, mcName, hdata.name())
673  ct.drawMatrix(ct.correlationMatrix(covMC), outdir + "/data-vs-%s/plots/data-vs-%s-%s-correlation-matrix" % (mcName, mcName, hdata.name()))
674  plotDictionary['correlation-matrix'].setdefault("data-vs-%s" % mcName, {})
675  plotDictionary['correlation-matrix']["data-vs-%s" % mcName][hmc.name()] = outdir + "/data-vs-%s/plots/data-vs-%s-%s-correlation-matrix.pdf" % (mcName, mcName, hdata.name())
676 
677 
678  chi2, ndf, prob, chi2contribs = ct.calculateChi2(hdata, hmc, covTotal)
679  ct.drawMatrix(chi2contribs, outdir + "/data-vs-%s/plots/data-vs-%s-%s-chi2-contribs" % (mcName, mcName, hdata.name()))
680  plotDictionary['chi2-contribs'].setdefault("data-vs-%s" % mcName, {})
681  plotDictionary['chi2-contribs']["data-vs-%s" % mcName][hmc.name()] = outdir + "/data-vs-%s/plots/data-vs-%s-%s-chi2-contribs.pdf" % (mcName, mcName, hdata.name())
682 
683  chi2ContribsByRow = ct.chi2ContribsByRow(chi2contribs)
684  chi2ContribsByRowYAML = yaml.dump(chi2ContribsByRow, default_flow_style=True, default_style='', width=1e6)
685  hmc.setAnnotation("Chi2Contribs", chi2ContribsByRowYAML)
686  ct.updateAOinYODA(hmc, model)
687  plotDictionary['chi2-value'].setdefault("data-vs-%s" % mcName, {})
688  plotDictionary['chi2-value']["data-vs-%s" % mcName][hmc.name()] = "$%.2f / %d$" % (chi2, ndf)
689 
690 
691  mcResults[model] = [chi2, ndf]
692  print("[INFO] %s:%s vs %s:%s --> chi2=%.2f/%d (prob=%.2f) " % (opts.data, hdata.name(), model, hmc.name(), chi2, ndf, prob))
693 
694  if "superAO" in hdata.name(): continue
695 
696  res = {'%s' % opts.data: '[Data]', '%s' % model: '[%s (# chi^2=%.2f/%d)]' % (mcName, chi2, ndf)}
697  outdirplots = outdir + "/data-vs-%s/plots/" % mcName
698  plots = st.makeSystematicsPlotsWithROOT(res, outdirplots, nominalName='Data', ratioZoom=None, regexFilter=".*%s.*" % hmc.name(), regexVeto=None)
699 
700  plotDictionary['data-vs-mc'].setdefault("data-vs-%s" % mcName, {})
701  plotDictionary['data-vs-mc']["data-vs-%s" % mcName][hmc.name()] = plots[0]
702 
703  outdir = "outputs/%s/summary" % (opts.data.replace(".yoda", ""))
704  pathName = hdata.path().replace("/REF", "")
705 
706  res = {'%s' % opts.data: '[Data]'}
707  for model in histograms['models']:
708  mcName = mcNames[model]
709  mcR = mcResults[model]
710  res[model] = '[%s (# chi^2=%.2f/%d)]' % (mcName, mcR[0], mcR[1])
711  if "superAO" in hdata.name(): continue
712 
713  if "superAO" in hdata.name(): continue
714  plots = st.makeSystematicsPlotsWithROOT(res, outdir, nominalName='Data', ratioZoom=None, regexFilter=".*%s.*" % hmc.name(), regexVeto=None)
715  plotDictionary['summary-plot'][hmc.name()] = plots[0]
716  makeSummaryTable(plotDictionary, outdir + "/summary_%s.tex" % hdata.name(), hdata.name())
717  plotDictionary['summary-table'][hmc.name()] = outdir + "/summary_%s.tex" % hdata.name()
718 
719  print("[INFO] output rivet files with Chi2: %s" % outdir)
720  os.system('ls %s/* | grep pdf' % outdir)
721  if opts.slides:
722  # having looped through all data:
723  outdir = "outputs/%s/summary" % (opts.data.replace(".yoda", ""))
724  if len(plotDictionary['chi2-value'].keys()) == 0:
725  print("[WARNING] no histograms were processed, could not make summary slides! Exit")
726  exit(1)
727  else:
728  makeSummaryTable(plotDictionary, outdir + "/summary_all.tex")
729  plotDictionary['summary-table']['all'] = outdir + "/summary_all.tex"
730  beamerPath = makeBeamer(plotDictionary, outdir)
731  os.system('rm slides_*')
732  os.system('pdflatex %s > /dev/null' % beamerPath)
733  os.system('pdflatex %s > /dev/null' % beamerPath)
734  beamerPath = beamerPath.replace("tex", "pdf").split("/")[-1]
735  print("[INFO] your summary slides are here: \n", beamerPath)
replace
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition: hcg.cxx:307
covarianceTool.markupYODAwithCorrelationsFromYAML
def markupYODAwithCorrelationsFromYAML(reference, correlationsDir)
Definition: covarianceTool.py:292
covarianceTool.returnFileContents
def returnFileContents(filename, inputType='data')
Definition: covarianceTool.py:436
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
covarianceTool.renameAOs
def renameAOs(fin_name, aoMap)
Definition: covarianceTool.py:74
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
intersection
std::vector< std::string > intersection(std::vector< std::string > &v1, std::vector< std::string > &v2)
Definition: compareFlatTrees.cxx:25
covarianceTool.makeSummaryTable
def makeSummaryTable(plotDictionary, outfile, hdataName=None)
Definition: covarianceTool.py:143
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
calibdata.exit
exit
Definition: calibdata.py:236
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
covarianceTool.getCorrelationInfoFromWeb
def getCorrelationInfoFromWeb(name=None, dlformat="yoda")
Definition: covarianceTool.py:479
covarianceTool.markupYODAwithCorrelationsFromYODAs
def markupYODAwithCorrelationsFromYODAs(reference, correlationsDir)
Definition: covarianceTool.py:228
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
TCS::join
std::string join(const std::vector< std::string > &v, const char c=',')
Definition: Trigger/TrigT1/L1Topo/L1TopoCommon/Root/StringUtils.cxx:10
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:221
TrigJetMonitorAlgorithm.items
items
Definition: TrigJetMonitorAlgorithm.py:79
covarianceTool.matchAOs
def matchAOs(aoList1, aoList2, strategy=0, recursive=True)
Definition: covarianceTool.py:176
covarianceTool.markupYODAwithCorrelations
def markupYODAwithCorrelations(ref, corrDir, ignoreCorrelations)
Definition: covarianceTool.py:419
Trk::open
@ open
Definition: BinningType.h:40
covarianceTool.convertHistToScatter
def convertHistToScatter(ao)
Definition: covarianceTool.py:69
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
python.Bindings.keys
keys
Definition: Control/AthenaPython/python/Bindings.py:798
dbg::print
void print(std::FILE *stream, std::format_string< Args... > fmt, Args &&... args)
Definition: SGImplSvc.cxx:70
readCCLHist.float
float
Definition: readCCLHist.py:83
Trk::split
@ split
Definition: LayerMaterialProperties.h:38
jobOptions.points
points
Definition: jobOptions.GenevaPy8_Zmumu.py:97
covarianceTool.makeBeamer
def makeBeamer(plotDictionary, outdir)
Definition: covarianceTool.py:93
covarianceTool.isFloat
def isFloat(x)
Definition: covarianceTool.py:60