ATLAS Offline Software
Loading...
Searching...
No Matches
covarianceTool.py
Go to the documentation of this file.
1#!/usr/bin/env python3
2# Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3
4"""
5The `covarianceTool.py` executable dis a helper which uses the functions provided by `covarianceToolsLibrary.py` to perform the
6comparison between MC predictions and HEPData records.
7
8However, the `local/bin/covarianceToolsLibrary.py` can also be used directly to manipulate covariance info and evaluate goodness of fit.
9Examples of how to do this are provided in the `examples` subdirectory:
10- `examples/covarianceTool-examples/README.md`
11
12The first thing to do is test you can access the `covarianceTool.py` executable and see available options:
13```
14covarianceTool.py -h
15```
16
17Author: Louie D. Corpe (CERN)
18Email: l.corpe@cern.ch
19"""
20import os
21import yaml
22import yoda
23import rivet
24import optparse
25import urllib2
26import tarfile
27import StringIO
28import systematicsTool as st
29import covarianceToolsLibrary as ct
30
31parser = optparse.OptionParser(usage="%prog [options]")
32parser.add_option("--data", help="Input yoda file for data [%default]", dest="data", default="")
33parser.add_option("--mc", help="Input yoda files for mc (comma separarated list), specify mc label with ':', eg mc1path:'MC1', mc2path:'MC2'. If no label is specified, the name of the yoda file is used as a label [%default]", dest="mc", default="")
34parser.add_option("--use_toys", help="Force the covariance matrix to be produced with toys. By default the cov matrix is produced by direct propagation is errors are symmetric and with toys if errors are asymmetric. [%default]", dest="use_toys", default=False, action='store_true')
35parser.add_option("--ntoys", help="number of toys to throw if cov matrix produced from toys [%default]", dest="ntoys", default=1000)
36parser.add_option("--use_direct", help="Force the covariance matrix to be produced via direct propagation (the errors are symmetrized if needed). By default the cov matrix is produced by direct propagation if errors are symmetric and with toys if errors are asymmetric. [%default]", dest="use_direct", default=False, action='store_true')
37parser.add_option("--corr_data", help="Input file/dir to build the covariance matrices (comma separarated list) [%default]", dest="corr_data", default=None)
38parser.add_option("--corr_mc", help="Input file/dir to build the covariance matrices (comma separarated list) [%default]", dest="corr_mc", default=None)
39parser.add_option("--ignore_corrs", help="ignore correlations in both data and mc and just assume total error is uncorrelated between bins [%default]", dest="ignore_corrs", default=False, action='store_true')
40parser.add_option("--ignore_corrs_mc", help="Ignore correlations in the MC [%default]", dest="ignore_corrs_mc", default=False, action='store_true')
41parser.add_option("--ignore_corrs_data", help="Ignore correlations in theData [%default]", dest="ignore_corrs_data", default=False, action='store_true')
42parser.add_option("--ignore_errs_data", help="Ignore all errors in the data, and use only MC cov matrix [%default]", dest="ignore_errs_data", default=False, action='store_true')
43parser.add_option("--ignore_errs_mc", help="Ignore all erros in the MC, and use only the data cov matrix [%default]", dest="ignore_errs_mc", default=False, action='store_true')
44parser.add_option("--get_corr_from_web", help="Try to get correlation info from HEPMC record. Specify the inspire ID eg I1514251 [%default]", dest="get_corr_from_web", default=None)
45parser.add_option("--filter", help="Only process a subset of histograms (comma separated) [%default]", dest="filter", default="all")
46parser.add_option("-v", "--verbosity", help="Verbosity level (0-9) [%default]", dest="verbosity", default=0)
47parser.add_option("--make_super_aos", help="Make super ao of all measuremnts passing --filter, and calculate the chi2 for all measurements using a super-cov-matrix [%default]", dest="make_super_aos", default=False, action='store_true')
48parser.add_option("--slides", help="Make summary slides [%default]", dest="slides", default=False, action='store_true')
49(opts, args) = parser.parse_args()
50
51os.environ['TERM'] = 'linux'
52
53histograms = {
54 'data': None,
55 'covariance': {},
56 'models': {},
57}
58
59
60def isFloat(x):
61 # check if the object can be cast as a float
62 try:
63 float(x)
64 return True
65 except ValueError:
66 return False
67
68
70 print("convertHistToScatter: This is a placeholder, need to implement")
71 exit(1)
72
73
74def renameAOs(fin_name, aoMap):
75 # make a new YODA file where the AOs are renamed as per the reference file
76 fout_name = fin_name.replace(".yoda", ".renamed.yoda")
77 fout = open(fout_name, 'w')
78 aos = yoda.read(fin_name, unpatterns=".*RAW.*")
79 for aopath, ao in aos.items():
80 foundMatch = False
81 for ao1, ao2 in aoMap.items():
82 if ao1.name() == ao.name():
83 ao.setAnnotation("Path", ao2.path().replace("REF/", ""))
84 foundMatch = True
85 if ao2.name() == ao.name():
86 ao.setAnnotation("Path", ao1.path().replace("REF/", ""))
87 foundMatch = True
88 if foundMatch: yoda.writeYODA(ao, fout)
89 fout.close()
90 return fout_name
91
92
93def makeBeamer(plotDictionary, outdir):
94 # produce summary slides using beamer
95 frames = []
96 dataDir = os.environ["SYSTTOOLSPATH"] + "/data/"
97 os.system("mkdir -p %s " % outdir)
98 samples = plotDictionary['chi2-value'].keys()
99 histos = plotDictionary['chi2-value'][samples[0]].keys()
100 os.system('cp %s/title.tex %s/title.tex' % (dataDir, outdir))
101 os.system('cp %s/title.tex %s/title.tex' % (dataDir, outdir))
102 os.system("sed -i -e 's|!TITLE!|%s|g' %s/title.tex" % (opts.data.replace('.yoda', '').replace('_', ' '), outdir))
103 frames.append("%s/title.tex" % outdir)
104 for histo in histos:
105 if 'superAO' in histo: continue
106 os.system('cp %s/section.tex %s/%s_section.tex' % (dataDir, outdir, histo))
107 os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_section.tex" % (histo, outdir, histo))
108 frames.append('%s/%s_section.tex' % (outdir, histo))
109 os.system('cp %s/errorAnalysis.tex %s/%s_%s_errorAnalysis.tex' % (dataDir, outdir, 'data', histo))
110 os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_%s_errorAnalysis.tex" % (histo, outdir, 'data', histo))
111 os.system("sed -i -e 's|!SAMPLE!|%s|g' %s/%s_%s_errorAnalysis.tex" % ('data', outdir, 'data', histo))
112 os.system("sed -i -e 's|!ERRORBREAKDOWN!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary['error-analysis']['data'][histo], outdir, 'data', histo))
113 os.system("sed -i -e 's|!COVMATRIX!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary['correlation-matrix']['data'][histo], outdir, 'data', histo))
114 os.system("sed -i -e 's|!COVDETAILS!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary['covDetails']['data'][histo], outdir, 'data', histo))
115 frames.append("%s/%s_%s_errorAnalysis.tex" % (outdir, 'data', histo))
116 for sample in samples:
117 sampleNameClean = sample.replace("_", " ").replace("-", " ")
118 os.system('cp %s/dataMCComparison.tex %s/%s_%s_dataMCComparison.tex' % (dataDir, outdir, sample, histo))
119 os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_%s_dataMCComparison.tex" % (histo, outdir, sample, histo))
120 os.system("sed -i -e 's|!SAMPLE!|%s|g' %s/%s_%s_dataMCComparison.tex" % (sampleNameClean, outdir, sample, histo))
121 os.system("sed -i -e 's|!DATAVSMC!|%s|g' %s/%s_%s_dataMCComparison.tex" % (plotDictionary['data-vs-mc'][sample][histo], outdir, sample, histo))
122 os.system("sed -i -e 's|!CHI2CONTRIBS!|%s|g' %s/%s_%s_dataMCComparison.tex" % (plotDictionary['chi2-contribs'][sample][histo], outdir, sample, histo))
123 frames.append("%s/%s_%s_dataMCComparison.tex" % (outdir, sample, histo))
124 # summary
125 os.system('cp %s/sectionSummary.tex %s/%s_sectionSummary.tex' % (dataDir, outdir, histo))
126 os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_sectionSummary.tex" % (histo, outdir, histo))
127 os.system("sed -i -e 's|!SUMMARYPLOT!|%s|g' %s/%s_sectionSummary.tex" % (plotDictionary['summary-plot'][histo], outdir, histo))
128 os.system("sed -i -e 's|!SUMMARYTABLE!|%s|g' %s/%s_sectionSummary.tex" % (plotDictionary['summary-table'][histo], outdir, histo))
129 frames.append("%s/%s_sectionSummary.tex" % (outdir, histo))
130 os.system('cp %s/section.tex %s/%s_section.tex' % (dataDir, outdir, 'overall'))
131 os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_section.tex" % ('overall summary', outdir, 'overall'))
132 frames.append('%s/%s_section.tex' % (outdir, 'overall'))
133 os.system('cp %s/overallSummary.tex %s/%s_overallSummary.tex' % (dataDir, outdir, 'overall'))
134 os.system("sed -i -e 's|!AOREF!|%s|g' %s/%s_overallSummary.tex" % ('overall summary', outdir, 'overall'))
135 os.system("sed -i -e 's|!SUMMARYTABLE!|%s|g' %s/%s_overallSummary.tex" % (plotDictionary['summary-table']['all'], outdir, 'overall'))
136 frames.append("%s/%s_overallSummary.tex" % (outdir, 'overall'))
137 frames.append("%s/end.tex" % dataDir)
138 catLine = " ".join(frames)
139 os.system(" cat %s > %s/slides_%s.tex" % (catLine, outdir, opts.data.replace('.yoda', '')))
140 return " %s/slides_%s.tex" % (outdir, opts.data.replace('.yoda', ''))
141
142
143def makeSummaryTable(plotDictionary, outfile, hdataName=None):
144 # make a summary latex table of the chi2 values for each AO
145 outf = open(outfile, 'w')
146 samples = plotDictionary['chi2-value'].keys()
147 if (hdataName):
148 histos = [hdataName]
149 else:
150 histos = sorted(plotDictionary['chi2-value'][samples[0]].keys())
151
152 line = "\\begin{tabular}{|l|"
153 for hist in histos:
154 line += "c|"
155 line += "}"
156 outf.write(line + '\n')
157 outf.write("\\hline " + '\n')
158 line = " $\\chi^2$ / ndof "
159 for hist in histos:
160 if 'superAO' in hist: hist = 'global agreement'
161 line += " & %s " % hist
162 line += "\\\\"
163 outf.write(line + '\n')
164 outf.write("\\hline" + '\n')
165 for sample in samples:
166 line = " %s " % sample.replace("-", " ").replace("_", " ")
167 for hist in histos:
168 line += " & %s" % plotDictionary['chi2-value'][sample][hist]
169 line += "\\\\"
170 outf.write(line + '\n')
171 outf.write("\\hline" + '\n')
172 outf.write("\\end{tabular}" + '\n')
173 outf.close()
174
175
176def matchAOs(aoList1, aoList2, strategy=0, recursive=True):
177 # try to establish a 1-1 mapping between AOs in two YODA files
178 # eg in case of different naming conventions
179 aoMap = {}
180 for ao1 in aoList1:
181 aoMap.setdefault(ao1, [])
182 for ao2 in aoList2:
183 nBinsMatch = False
184 namesMatch = False
185 namesPartialMatch = False
186 binWidthsMatch = False
187 if (ao2.name() in ao1.name()): namesMatch = True
188 if (ao1.name() in ao2.name()): namesMatch = True
189 ao1NameTokens = ao1.name().split("/")[-1].split("-")
190 ao2NameTokens = ao2.name().split("/")[-1].split("-")
191 if len(list(set(ao1NameTokens).intersection(ao2NameTokens))) >= 2: namesPartialMatch = True
192 if (ao1.numPoints() == ao2.numPoints()):
193 nBinsMatch = True
194 for ipt in range(ao1.numPoints()):
195 ao1BinLabels = "%.2f - %.2f" % (ao1.points()[ipt].xMin(), ao1.points()[ipt].xMax())
196 ao2BinLabels = "%.2f - %.2f" % (ao2.points()[ipt].xMin(), ao2.points()[ipt].xMax())
197 if (ao1BinLabels == ao2BinLabels): binWidthsMatch = True
198 if strategy == 0:
199 if (namesMatch and nBinsMatch and binWidthsMatch):
200 aoMap.setdefault(ao1, []).append(ao2)
201 elif strategy == 1:
202 if (nBinsMatch and binWidthsMatch):
203 aoMap.setdefault(ao1, []).append(ao2)
204 elif strategy == 2:
205 if (nBinsMatch and binWidthsMatch and namesPartialMatch):
206 aoMap.setdefault(ao1, []).append(ao2)
207
208 if (len(aoMap.keys()) == len(aoList1)) and ([len(aoMap[ao]) for ao in aoList1]) == ([1 for ao in aoList1]):
209 if (opts.verbosity > 0): print("[INFO] found 1-1 mapping between aoLists using strategy %d:" % strategy)
210 for ao1 in aoList1:
211 aoMap[ao1] = aoMap[ao1][0]
212 if (opts.verbosity > 1): print("%s --> %s" % (ao1.name(), aoMap[ao1].name()))
213 elif (recursive and strategy < 3):
214 if (opts.verbosity > 0):
215 print("[WARNING] Could not establish 1-1 mapping between aoLists using strategy", strategy, ", try strategy ", strategy + 1)
216 for ao1 in aoList1:
217 if (opts.verbosity > 1): print("%s --> %s" % (ao1.name(), aoMap[ao1]))
218 strategy += 1
219 aoMap = matchAOs(aoList1, aoList2, strategy)
220 else:
221 print("[ERROR] could not match AOS in the Data and MC files. Please make sure the AOs have the same binning/ number of bins (and name if possible!)")
222 print("[ERROR] aos from list 1", aoList1)
223 print("[ERROR] aos from list 2", aoList2)
224 exit(1)
225 return aoMap
226
227
228def markupYODAwithCorrelationsFromYODAs(reference, correlationsDir):
229 # produce a new YODA file where the AOs have been marked up with
230 # covariance info from an uncertainty breakdown provided as
231 # a separate YODA file for each systematic variation
232 reference_out = reference.replace(".yoda", "_corrs.yoda")
233 reference_outf = open(reference_out, 'w')
234 infiles = {}
235 if correlationsDir == 'self':
236 infiles.setdefault(reference, [])
237 else:
238 for f in os.listdir(correlationsDir):
239 if "all" in f: continue
240 infiles.setdefault(correlationsDir + "/" + f, [])
241 nBins = -1
242 binLabels = {}
243 binValues = {}
244 binErrors = {}
245
246 histos = [name for name in yoda.read(reference, unpatterns=".*RAW.*")]
247 for histname in histos:
248 hAll = yoda.read(reference, unpatterns=".*RAW.*")[histname]
249 if type(hAll) is yoda.core.Histo1D: hAll = hAll.mkScatter()
250 for f in infiles.keys():
251 systname = f.split("/")[-1].replace(".yoda", "")
252 h = yoda.read(f, unpatterns=".*RAW.*")[histname]
253 if type(h) is yoda.core.Scatter2D:
254 nBins = h.numPoints()
255 for ipt in range(nBins):
256 binValues[ipt] = h.points()[ipt].y()
257 binLabels[ipt] = "%f - %f" % (h.points()[ipt].xMin(), h.points()[ipt].xMax())
258 errs = h.points()[ipt].yErrs()
259 errAv = (abs(errs[1]) + abs(errs[0])) * 0.5
260 binErrors.setdefault(ipt, {}).setdefault(systname, {})['up'] = errs[1]
261 binErrors.setdefault(ipt, {}).setdefault(systname, {})['dn'] = -1 * errs[0]
262 elif type(h) is yoda.core.Histo1D:
263 nBins = h.numBins
264 for ipt in range(nBins):
265 binValues[ipt] = h.bins()[ipt].sumW()
266 binLabels[ipt] = "%f - %f" % (h.bins()[ipt].xMin(), h.bins()[ipt].xMax())
267 errAv = h.bins()[ipt].sumW2()
268 binErrors.setdefault(ipt, {}).setdefault(systname, {})['up'] = errAv
269 binErrors.setdefault(ipt, {}).setdefault(systname, {})['dn'] = -1 * errAv
270 else: continue
271 if (reference != ""):
272 smallestDiff = 999.
273 for f in reference.split(","):
274 hists = yoda.read(f, unpatterns=".*RAW.*")
275 for name in hists:
276 if not type(hists[name]) is yoda.core.Scatter2D: continue
277 nominal = [(hists[name].points()[i].y()) for i in range(hists[name].numPoints())]
278 if (len(nominal) != nBins): continue
279 totalDiff = 0
280 for i in range(nBins):
281 totalDiff += abs(binValues[i] - nominal[i])
282 if (totalDiff < smallestDiff):
283 smallestDiff = totalDiff
284
285 corrs = yaml.dump(binErrors, default_flow_style=True, default_style='', width=1e6)
286 hAll.setAnnotation("ErrorBreakdown", corrs)
287 yoda.writeYODA(hAll, reference_outf)
288 reference_outf.close()
289 return reference_out
290
291
292def markupYODAwithCorrelationsFromYAML(reference, correlationsDir):
293 # produce a new YODA file where the AOs have been marked up with
294 # covariance info from an uncertainty breakdown provided as
295 # one or more YAML files from HEPData
296 reference_out = reference.replace(".yoda", "_corrs.yoda")
297 reference_outf = open(reference_out, 'w')
298 infiles = {}
299 for f in os.listdir(correlationsDir):
300 if 'submission' in f: continue
301 if 'comb' in f: continue
302 if "syst" in f:
303 infiles.setdefault(correlationsDir + "/" + f.replace('syst', 'xsect'), [])
304 else:
305 infiles.setdefault(correlationsDir + "/" + f, [])
306
307 for f in infiles.keys():
308 stream = open(f, "r")
309 doc = yaml.load(stream)
310 binLabels = {}
311 binValues = {}
312 binErrors = {}
313 for itemtype, itemarray in doc.items():
314 if itemtype == "independent_variables": # bin labels
315 for line in itemarray:
316 binLabels['name'] = itemarray[0]['header']['name']
317 counter = 0
318 for entry in itemarray[0]['values']:
319 if ('low' in entry.keys() and 'high' in entry.keys()):
320 binLabels[counter] = "%f - %f" % (entry['low'], entry['high'])
321 else:
322 binLabels[counter] = "%f" % (entry['value'])
323 counter += 1
324
325 if itemtype == "dependent_variables": # values
326 for line in itemarray:
327 binValues['name'] = "%s [%s]" % (line['header']['name'], '')
328 binErrors['name'] = "uncertainties"
329 counter = 0
330 for entry in line['values']:
331 binValues[counter] = entry['value']
332 binErrors[counter] = {}
333 if 'errors' in entry.keys():
334 for error in entry['errors']:
335 binErrors[counter][error['label']] = {}
336 if 'symerror' in error.keys():
337 if not isFloat(error['symerror']) and '%' in (error['symerror']):
338 binErrors[counter][error['label']]['up'] = binValues[counter] * 0.01 * float(error['symerror'].replace('%', ''))
339 binErrors[counter][error['label']]['dn'] = binValues[counter] * -0.01 * float(error['symerror'].replace('%', ''))
340 else:
341 binErrors[counter][error['label']]['up'] = float(error['symerror'])
342 binErrors[counter][error['label']]['dn'] = -1 * float(error['symerror'])
343 elif 'asymerror' in error.keys():
344 if not isFloat(error['asymerror']['plus']) and '%' in error['asymerror']['plus']:
345 binErrors[counter][error['label']]['up'] = binValues[counter] * 0.01 * float(error['asymerror']['plus'].replace('%', ''))
346 binErrors[counter][error['label']]['dn'] = binValues[counter] * 0.01 * float(error['asymerror']['minus'].replace('%', ''))
347 else:
348 binErrors[counter][error['label']]['up'] = float(error['asymerror']['plus'])
349 binErrors[counter][error['label']]['dn'] = 1 * float(error['asymerror']['minus'])
350 else:
351 print('[ERROR] errors are neither symmetric or asymmetric... exit!')
352 exit(1)
353 counter += 1
354
355 if 'xsect' in f:
356 systNames = binErrors[0].keys()
357 binErrorsGranular = {}
358 systFile = f.replace("xsect", "syst")
359 streamsyst = open(systFile, "r")
360 docsyst = yaml.load(streamsyst)
361 binErrorsGranular['name'] = "uncertainties"
362 for itemtype, itemarray in docsyst.items():
363 if "variables" not in itemtype: continue
364 if (itemtype == "independent_variables"):
365 systNames = [v['value'] for v in itemarray[0]['values']]
366 else:
367 binCounter = -1
368 for line in itemarray:
369 binCounter += 1
370 binErrorsGranular[binCounter] = {}
371 if 'values' not in line.keys(): continue
372 systCounter = -1
373 for error in line['values']:
374 systCounter += 1
375 binErrorsGranular[binCounter][systNames[systCounter]] = {}
376 binErrorsGranular[binCounter][systNames[systCounter]]['up'] = float(error['value']) * binValues[binCounter] * 0.01
377 binErrorsGranular[binCounter][systNames[systCounter]]['dn'] = -1 * float(error['value']) * binValues[binCounter] * 0.01
378 for i in range(len(binErrorsGranular.keys()) - 1):
379 binErrorsGranular[i]['stat'] = {}
380 binErrorsGranular[i]['stat']['up'] = binErrors[i]['stat']['up']
381 binErrorsGranular[i]['stat']['dn'] = binErrors[i]['stat']['dn']
382 binErrors = binErrorsGranular
383 nBins = len(binLabels) - 1
384 infiles[f] = [binValues, binErrors]
385
386 hists = yoda.read(reference, unpatterns=".*RAW.*")
387 for name in hists:
388 nearestMatch = ""
389 smallestDiff = 999.
390 nearestMatchErrors = ""
391 nominal = [(hists[name].points()[i].y) for i in range(hists[name].numPoints())]
392 for f, corrs in infiles.items():
393 binValues = corrs[0]
394 binErrors = corrs[1]
395 nBins = len(binValues) - 1
396 if (len(nominal) != nBins): continue
397 totalDiff = 0
398 for i in range(nBins):
399 totalDiff += abs(binValues[i] - nominal[i])
400 if (totalDiff < smallestDiff):
401 nearestMatch = f
402 smallestDiff = totalDiff
403 nearestMatchErrors = binErrors
404 if (opts.verbosity > 1): print("[DEBUG] candidate", nominal, " nearest match is ", nearestMatch)
405 if smallestDiff < 1.0:
406 if (opts.verbosity > 1): print("[INFO] mapping", name, " ---> ", nearestMatch, (smallestDiff))
407 corrs = yaml.dump(nearestMatchErrors, default_flow_style=True, default_style='', width=1e6)
408 hists[name].setAnnotation("ErrorBreakdown", corrs)
409 yoda.writeYODA(hists[name], reference_outf)
410 else:
411 print("[WARNING] Warning, no match found for", name)
412 print("[INFO] %s has been marked up with correlation info from %s in this file:" % (reference, correlationsDir))
413 print("[INFO] ---> %s " % (reference_out))
414 print("[INFO] next time, you can use this file as an input instead of ", reference)
415 reference_outf.close()
416 return reference_out
417
418
419def markupYODAwithCorrelations(ref, corrDir, ignoreCorrelations):
420 # produce a new YODA file where the AOs have been marked up with
421 # covariance info from an uncertainty breakdown provided as
422 # either YAML or YODA files
423 isYoda = False
424 if ignoreCorrelations:
425 return markupYODAwithCorrelationsFromYODAs(ref, 'self')
426 else:
427 if (corrDir is None): return ref
428 for f in os.listdir(corrDir):
429 if '.yoda' in f: isYoda = True
430 if isYoda:
431 return markupYODAwithCorrelationsFromYODAs(ref, corrDir)
432 else:
433 return markupYODAwithCorrelationsFromYAML(ref, corrDir)
434
435
436def returnFileContents(filename, inputType='data'):
437 result = []
438 aos = yoda.read(filename, unpatterns=".*RAW.*")
439 for aopath, ao in aos.items():
440 aoType = type(ao)
441 if aoType in [yoda.Histo1D, yoda.Histo2D]: ao = ao.mkScatter()
442 if aoType in [yoda.Counter]: continue
443 if 'all' not in opts.filter and inputType == 'data':
444 for f in opts.filter:
445 if f in aopath:
446 result.append(ao)
447 else:
448 result.append(ao)
449
450 covInfoMissing = []
451 for ao in result:
452 corr = ao.annotation("ErrorBreakdown")
453 if (corr is None):
454 covInfoMissing.append(ao)
455 if (len(covInfoMissing) and inputType == 'data' and not opts.ignore_corrs and not opts.corr_data):
456 print("[WARNING] These DATA analysis objects are missing the correlation/covariance information:", covInfoMissing)
457 print(" --> To add correlation information, you can use the eg options: --get_corr_from_web, --corr_data ")
458 print(" --> You can also proceed by adding the --ignore_corrs_data in which case the chi2 is calculated")
459 print(" --> from the total uncertainties and assumed to be bin-bin uncorrelated.")
460 inspireID = [t for t in opts.data.split("_") if "I" in t]
461 inspireID = inspireID[0].replace("I", "")
462 if os.path().isdir("corrInfo/ins%s" % inspireID):
463 print("[INFO] Attempting to use the information in corrInfo/ins%s to build correlation info " % inspireID)
464 opts.corr_data = "corrInfo/ins%s" % inspireID
465 else:
467
468 if (len(covInfoMissing) and inputType == 'mc' and not opts.ignore_corrs and not opts.corr_mc):
469 print("[WARNING] These MC analysis objects are missing the correlation/covariance information:", covInfoMissing)
470 print(" --> To add correlation information, you can use the eg options: --corr_mc ")
471 print(" --> You can also proceed by adding the --ignore_corrs_mc in which case the chi2 is calculated")
472 print(" --> from the total uncertainties and assumed to be bin-bin uncorrelated.")
473 if os.path.isdir("corrInfoMC/%s" % filename.replace(".yoda", "")):
474 print("[INFO] Attempting to use the information in corrInfoMC/%s to build correlation info " % filename.replace(".yoda", ""))
475 opts.corr_mc = "corrInfoMC/%s" % filename.replace(".yoda", "")
476 return result
477
478
479def getCorrelationInfoFromWeb(name=None, dlformat="yoda"):
480 # try to get the YAML files for the corresponding HEPData record
481 # download and untar them...
482 if (name is None): name = opts.data
483 inspireID = [t for t in name.split("_") if "I" in t]
484 inspireID = inspireID[0].replace("I", "")
485 hdurl = "http://www.hepdata.net/record/ins%s?format=%s" % (inspireID, dlformat)
486 print("[INFO] Trying to download information from %s", hdurl)
487 if hdurl:
488 response = urllib2.urlopen(hdurl)
489 download = response.read()
490 if not download or "<html" in download:
491 print(("Problem encountered when getting data from HepData (%s). No reference data file written." % hdurl))
492 else:
493 tar = tarfile.open(mode="r:gz", fileobj=StringIO.StringIO(download))
494 fnames = tar.getnames()
495 if len(fnames) > 0 and dlformat == 'yaml':
496 tar.extractall()
497 os.system("mkdir -p corrInfo/ins%s" % (inspireID))
498 os.system("mv %s/* corrInfo/ins%s/." % (fnames[0], inspireID))
499 os.system("rm -r %s" % (fnames[0]))
500 print("[INFO] successfully downloaded YAML files from HEPMC, will see if they can be used for correlations")
501 if len(fnames) == 1 and dlformat == 'yoda':
502 tar.extractall()
503 os.system("mkdir -p corrInfo/ins%s" % (inspireID))
504 os.system("mv %s corrInfo/ins%s/." % (fnames[0], inspireID))
505 print("[INFO] successfully downloaded YODA files from HEPMC, will see if they can be used for correlations")
506 opts.corr_data = "corrInfo/ins%s" % inspireID
507 response.close()
508
509
510if __name__ == "__main__":
511
512 opts.ntoys = int(opts.ntoys)
513 opts.verbosity = int(opts.verbosity)
514 opts.filter = opts.filter.split(',')
515 if opts.ignore_corrs:
516 opts.ignore_corrs_data = True
517 opts.ignore_corrs_mc = True
518
519 mcNames = {}
520 plotDictionary = {'error-analysis': {}, 'covariance-matrix': {}, 'correlation-matrix': {}, 'data-vs-mc': {}, 'chi2-contribs': {}, 'chi2-value': {}, 'covDetails': {}, 'summary-plot': {}, 'summary-table': {}}
521 # access the data and mc histograms
522 if (opts.corr_data is None and opts.get_corr_from_web):
523 getCorrelationInfoFromWeb(opts.get_corr_from_web)
524 if opts.data == "":
525 pass
526 else:
527 histograms['data'] = returnFileContents(opts.data, 'data')
528 if opts.corr_data is not None:
529 opts.data = markupYODAwithCorrelations(opts.data, opts.corr_data, opts.ignore_corrs_data)
530 # if there are correlations to add, do so and then get the aos from the new file
531 histograms['data'] = returnFileContents(opts.data, 'data')
532 outdir = "outputs/%s/data/plots" % (opts.data.replace(".yoda", ""))
533 for hdata in histograms['data']:
534 if not ct.hasValidErrorBreakdown(hdata): continue
535 os.system("mkdir -p %s" % outdir)
536 plotparser = rivet.mkStdPlotParser([], [])
537 headers = plotparser.getHeaders(hdata.path())
538 if 'XLabel' in headers.keys(): XLabel = headers['XLabel']
539 else: XLabel = None
540 if 'Title' in headers.keys(): Title = headers['Title']
541 else: Title = None
542 ct.plotVariations(hdata, outdir, 'data-%s' % hdata.name(), 0.05, xLabel=XLabel, title=Title)
543 plotDictionary['error-analysis'].setdefault('data', {})
544 plotDictionary['error-analysis']['data'][hdata.name()] = outdir + '/data-%s-variations.pdf' % hdata.name()
545 print("[INFO] See plots of data error analysis here:", outdir)
546 if 'data' in plotDictionary['error-analysis'].keys():
547 for k, v in plotDictionary['error-analysis']['data'].items():
548 print(v)
549
550 for mc in opts.mc.split(","):
551 if mc == "": continue
552 if ":" in mc:
553 mcName = mc.split(":")[1]
554 mc = mc.split(":")[0]
555 else:
556 mcName = mc.split("/")[-1].replace(".yoda", "")
557 histograms['models'][mc] = returnFileContents(mc, 'mc')
558 mcnew = markupYODAwithCorrelations(mc, opts.corr_mc, opts.ignore_corrs_mc)
559 opts.mc = opts.mc.replace(mc, mcnew)
560 # if there are correlations to add, do so and then get the aos from the new file
561 histograms['models'].pop(mc, None)
562 histograms['models'][mcnew] = returnFileContents(mcnew, 'mc')
563 if (opts.data != ''):
564 aoMap = matchAOs(histograms['data'], histograms['models'][mcnew])
565 newmc = renameAOs(mcnew, aoMap)
566 else: newmc = mc
567 opts.mc = opts.mc.replace(mc, newmc)
568 histograms['models'].pop(mcnew, None)
569 histograms['models'][newmc] = returnFileContents(newmc, 'mc')
570 mcNames[newmc] = mcName
571 for hmc in histograms['models'][newmc]:
572 outdir = "outputs/%s/%s/plots" % (opts.data.replace(".yoda", ""), mcName)
573 os.system("mkdir -p %s" % outdir)
574 if not (opts.ignore_errs_mc):
575 ct.plotVariations(hmc, outdir, '%s-%s' % (mcName, hmc.name()), 0.01)
576 plotDictionary['error-analysis'].setdefault(mcName, {})
577 plotDictionary['error-analysis'][mcName][hmc.name()] = outdir + '/%s-%s-variations.pdf' % (mcName, hmc.name())
578 if not (opts.ignore_errs_mc):
579 print("[INFO] See plots of %s variations here" % mcName, outdir + "/validation-plots/")
580 for k, v in plotDictionary['error-analysis'][mcName].items():
581 print(v)
582
583 # make the super AOs
584 if (opts.make_super_aos):
585 dataSuperAO = ct.makeSuperAO(histograms['data'])
586 histograms['data'].append(dataSuperAO)
587 yoda.writeYODA(dataSuperAO, opts.data.replace('.yoda', '.super-ao.yoda'))
588 for mc in histograms['models'].keys():
589 mcSuperAO = ct.makeSuperAO(histograms['models'][mc])
590 histograms['models'][mc].append(mcSuperAO)
591 yoda.writeYODA(mcSuperAO, mc.replace('.yoda', 'super-ao.yoda'))
592
593 # Check that both that and MC histograms are available before proceeding
594 if (len(opts.data) == 0):
595 print("[INFO] No data histograms to be processed. Cannot make comparisons, so exiting.")
596 exit(1)
597 if (len(opts.mc) == 0):
598 print("[INFO] No MC histograms to be processed. Cannot make comparisons, so exiting.")
599 exit(1)
600 for hdata in sorted(histograms['data']):
601 if not ct.hasValidErrorBreakdown(hdata): continue
602 mcResults = {}
603 if not ("all" in opts.filter):
604 passFilter = False
605 for fil in opts.filter:
606 if fil in hdata.name(): passFilter = True
607 if not (passFilter or 'superAO' in hdata.name()): continue
608 for model in histograms['models']:
609 aoMap = matchAOs(histograms['data'], histograms['models'][model])
610 hmc = aoMap[hdata]
611 mcName = mcNames[model]
612 outdir = "outputs/%s" % (opts.data.replace(".yoda", ""))
613 os.system("mkdir -p %s/data-vs-%s/plots" % (outdir, mcName))
614
615
616 # produce covariance matrix for data
617 # covDetailsData = "Data Covariance Matrix"
618 covDetailsData = "Size of uncertainties across range, and Data Correlation Matrix"
619 if ((opts.use_toys or ct.hasAsymmetricErrors(hdata)) and not opts.use_direct):
620 print("[WARNING] Data has asymmetric errors, producing covariance matrix from %d toys" % opts.ntoys)
621 covData = ct.makeCovarianceMatrixFromToys(hdata, opts.ntoys, opts.ignore_corrs_data)
622 covDetailsData += " produced from %d toys%s" % (opts.ntoys, ", ignoring correlations" if opts.ignore_corrs_data else "")
623 else:
624 covData = ct.makeCovarianceMatrix(hdata, opts.ignore_corrs_data)
625 covDetailsData += " produced from direct propagation%s" % (", ignoring correlations" if opts.ignore_corrs_data else "")
626 plotparser = rivet.mkStdPlotParser([], [])
627 headers = plotparser.getHeaders(hdata.path())
628 if 'XLabel' in headers.keys(): XLabel = headers['XLabel']
629 else: XLabel = None
630 ct.drawMatrix(covData, outdir + "/data/plots/data-%s-covariance-matrix" % hdata.name(), xLabel=XLabel)
631 plotDictionary['covariance-matrix'].setdefault('data', {})
632 plotDictionary['covariance-matrix']['data'][hdata.name()] = outdir + "/data/plots/data-%s-covariance-matrix.pdf" % hdata.name()
633 ct.drawMatrix(ct.correlationMatrix(covData), outdir + "/data/plots/data-%s-correlation-matrix" % hdata.name())
634 plotDictionary['correlation-matrix'].setdefault('data', {})
635 plotDictionary['correlation-matrix']['data'][hdata.name()] = outdir + "/data/plots/data-%s-correlation-matrix.pdf" % hdata.name()
636
637 # produce covariance matrix for MC
638 covDetailsMC = "MC Covariance Matrix"
639 if ((opts.use_toys or ct.hasAsymmetricErrors(hmc)) and not opts.use_direct):
640 print("[WARNING] MC has asymmetric errors, producing covariance matrix from %d toys" % opts.ntoys)
641 covMC = ct.makeCovarianceMatrixFromToys(hmc, opts.ntoys, opts.ignore_corrs_mc)
642 covDetailsMC += " produced from %d toys%s" % (opts.ntoys, ", ignoring correlations" if opts.ignore_corrs_mc else "")
643 else:
644 covMC = ct.makeCovarianceMatrix(hmc, opts.ignore_corrs_mc)
645 covDetailsMC += " produced from direct propagation%s" % (", ignoring correlations" if opts.ignore_corrs_mc else "")
646 ct.drawMatrix(covMC, outdir + "/%s/plots/%s-%s-covariance-matrix" % (mcName, mcName, hdata.name()))
647 plotDictionary['covariance-matrix'].setdefault(mcName, {})
648 plotDictionary['covariance-matrix'][mcName][hmc.name()] = outdir + "/%s/plots/%s-%s-covariance-matrix.pdf" % (mcName, mcName, hmc.name())
649 ct.drawMatrix(ct.correlationMatrix(covMC), outdir + "/%s/plots/%s-%s-correlation-matrix" % (mcName, mcName, hmc.name()))
650 plotDictionary['correlation-matrix'].setdefault(mcName, {})
651 plotDictionary['correlation-matrix'][mcName][hmc.name()] = outdir + "/%s/plots/%s-%s-correlation-matrix.pdf" % (mcName, mcName, hmc.name())
652
653 # sum covariance matrices
654 covTotal = covData + covMC
655 if opts.ignore_errs_data and opts.ignore_errs_mc:
656 print("[ERROR] cannot ignore both data errros and MC errors at the same time!")
657 exit(1)
658 elif opts.ignore_errs_data:
659 covTotal = covMC
660 covDetailsData = "Ignoring Data uncertainties (MC cov matrix only)"
661 elif opts.ignore_errs_mc:
662 covTotal = covData
663 covDetailsMC = "Ignoring MC uncertainties (Data cov matrix only)"
664 covDetailsData += ". Ignoring MC uncertainties"
665 plotDictionary['covDetails'].setdefault("%s" % mcName, {})
666 plotDictionary['covDetails'].setdefault("data", {})
667 plotDictionary['covDetails']["data"][hdata.name()] = covDetailsData
668 plotDictionary['covDetails'][mcName][hmc.name()] = covDetailsMC
669
670 ct.drawMatrix(covMC, outdir + "/data-vs-%s/plots/data-vs-%s-%s-covariance-matrix" % (mcName, mcName, hdata.name()))
671 plotDictionary['covariance-matrix'].setdefault("data-vs-%s" % mcName, {})
672 plotDictionary['covariance-matrix']["data-vs-%s" % mcName][hmc.name()] = outdir + "/data-vs-%s/plots/data-vs-%s-%s-covariance-matrix.pdf" % (mcName, mcName, hdata.name())
673 ct.drawMatrix(ct.correlationMatrix(covMC), outdir + "/data-vs-%s/plots/data-vs-%s-%s-correlation-matrix" % (mcName, mcName, hdata.name()))
674 plotDictionary['correlation-matrix'].setdefault("data-vs-%s" % mcName, {})
675 plotDictionary['correlation-matrix']["data-vs-%s" % mcName][hmc.name()] = outdir + "/data-vs-%s/plots/data-vs-%s-%s-correlation-matrix.pdf" % (mcName, mcName, hdata.name())
676
677
678 chi2, ndf, prob, chi2contribs = ct.calculateChi2(hdata, hmc, covTotal)
679 ct.drawMatrix(chi2contribs, outdir + "/data-vs-%s/plots/data-vs-%s-%s-chi2-contribs" % (mcName, mcName, hdata.name()))
680 plotDictionary['chi2-contribs'].setdefault("data-vs-%s" % mcName, {})
681 plotDictionary['chi2-contribs']["data-vs-%s" % mcName][hmc.name()] = outdir + "/data-vs-%s/plots/data-vs-%s-%s-chi2-contribs.pdf" % (mcName, mcName, hdata.name())
682
683 chi2ContribsByRow = ct.chi2ContribsByRow(chi2contribs)
684 chi2ContribsByRowYAML = yaml.dump(chi2ContribsByRow, default_flow_style=True, default_style='', width=1e6)
685 hmc.setAnnotation("Chi2Contribs", chi2ContribsByRowYAML)
686 ct.updateAOinYODA(hmc, model)
687 plotDictionary['chi2-value'].setdefault("data-vs-%s" % mcName, {})
688 plotDictionary['chi2-value']["data-vs-%s" % mcName][hmc.name()] = "$%.2f / %d$" % (chi2, ndf)
689
690
691 mcResults[model] = [chi2, ndf]
692 print("[INFO] %s:%s vs %s:%s --> chi2=%.2f/%d (prob=%.2f) " % (opts.data, hdata.name(), model, hmc.name(), chi2, ndf, prob))
693
694 if "superAO" in hdata.name(): continue
695
696 res = {'%s' % opts.data: '[Data]', '%s' % model: '[%s (# chi^2=%.2f/%d)]' % (mcName, chi2, ndf)}
697 outdirplots = outdir + "/data-vs-%s/plots/" % mcName
698 plots = st.makeSystematicsPlotsWithROOT(res, outdirplots, nominalName='Data', ratioZoom=None, regexFilter=".*%s.*" % hmc.name(), regexVeto=None)
699
700 plotDictionary['data-vs-mc'].setdefault("data-vs-%s" % mcName, {})
701 plotDictionary['data-vs-mc']["data-vs-%s" % mcName][hmc.name()] = plots[0]
702
703 outdir = "outputs/%s/summary" % (opts.data.replace(".yoda", ""))
704 pathName = hdata.path().replace("/REF", "")
705
706 res = {'%s' % opts.data: '[Data]'}
707 for model in histograms['models']:
708 mcName = mcNames[model]
709 mcR = mcResults[model]
710 res[model] = '[%s (# chi^2=%.2f/%d)]' % (mcName, mcR[0], mcR[1])
711 if "superAO" in hdata.name(): continue
712
713 if "superAO" in hdata.name(): continue
714 plots = st.makeSystematicsPlotsWithROOT(res, outdir, nominalName='Data', ratioZoom=None, regexFilter=".*%s.*" % hmc.name(), regexVeto=None)
715 plotDictionary['summary-plot'][hmc.name()] = plots[0]
716 makeSummaryTable(plotDictionary, outdir + "/summary_%s.tex" % hdata.name(), hdata.name())
717 plotDictionary['summary-table'][hmc.name()] = outdir + "/summary_%s.tex" % hdata.name()
718
719 print("[INFO] output rivet files with Chi2: %s" % outdir)
720 os.system('ls %s/* | grep pdf' % outdir)
721 if opts.slides:
722 # having looped through all data:
723 outdir = "outputs/%s/summary" % (opts.data.replace(".yoda", ""))
724 if len(plotDictionary['chi2-value'].keys()) == 0:
725 print("[WARNING] no histograms were processed, could not make summary slides! Exit")
726 exit(1)
727 else:
728 makeSummaryTable(plotDictionary, outdir + "/summary_all.tex")
729 plotDictionary['summary-table']['all'] = outdir + "/summary_all.tex"
730 beamerPath = makeBeamer(plotDictionary, outdir)
731 os.system('rm slides_*')
732 os.system('pdflatex %s > /dev/null' % beamerPath)
733 os.system('pdflatex %s > /dev/null' % beamerPath)
734 beamerPath = beamerPath.replace("tex", "pdf").split("/")[-1]
735 print("[INFO] your summary slides are here: \n", beamerPath)
void print(char *figname, TCanvas *c1)
STL class.
std::vector< std::string > intersection(std::vector< std::string > &v1, std::vector< std::string > &v2)
std::string replace(std::string s, const std::string &s2, const std::string &s3)
Definition hcg.cxx:310
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
returnFileContents(filename, inputType='data')
matchAOs(aoList1, aoList2, strategy=0, recursive=True)
makeBeamer(plotDictionary, outdir)
markupYODAwithCorrelationsFromYAML(reference, correlationsDir)
makeSummaryTable(plotDictionary, outfile, hdataName=None)
getCorrelationInfoFromWeb(name=None, dlformat="yoda")
renameAOs(fin_name, aoMap)
markupYODAwithCorrelationsFromYODAs(reference, correlationsDir)
markupYODAwithCorrelations(ref, corrDir, ignoreCorrelations)