5 The `covarianceTool.py` executable dis a helper which uses the functions provided by `covarianceToolsLibrary.py` to perform the
6 comparison between MC predictions and HEPData records.
8 However, the `local/bin/covarianceToolsLibrary.py` can also be used directly to manipulate covariance info and evaluate goodness of fit.
9 Examples of how to do this are provided in the `examples` subdirectory:
10 - `examples/covarianceTool-examples/README.md`
12 The first thing to do is test you can access the `covarianceTool.py` executable and see available options:
17 Author: Louie D. Corpe (CERN)
18 Email: l.corpe@cern.ch
28 import systematicsTool
as st
29 import covarianceToolsLibrary
as ct
31 parser = optparse.OptionParser(usage=
"%prog [options]")
32 parser.add_option(
"--data", help=
"Input yoda file for data [%default]", dest=
"data", default=
"")
33 parser.add_option(
"--mc", help=
"Input yoda files for mc (comma separarated list), specify mc label with ':', eg mc1path:'MC1', mc2path:'MC2'. If no label is specified, the name of the yoda file is used as a label [%default]", dest=
"mc", default=
"")
34 parser.add_option(
"--use_toys", help=
"Force the covariance matrix to be produced with toys. By default the cov matrix is produced by direct propagation is errors are symmetric and with toys if errors are asymmetric. [%default]", dest=
"use_toys", default=
False, action=
'store_true')
35 parser.add_option(
"--ntoys", help=
"number of toys to throw if cov matrix produced from toys [%default]", dest=
"ntoys", default=1000)
36 parser.add_option(
"--use_direct", help=
"Force the covariance matrix to be produced via direct propagation (the errors are symmetrized if needed). By default the cov matrix is produced by direct propagation if errors are symmetric and with toys if errors are asymmetric. [%default]", dest=
"use_direct", default=
False, action=
'store_true')
37 parser.add_option(
"--corr_data", help=
"Input file/dir to build the covariance matrices (comma separarated list) [%default]", dest=
"corr_data", default=
None)
38 parser.add_option(
"--corr_mc", help=
"Input file/dir to build the covariance matrices (comma separarated list) [%default]", dest=
"corr_mc", default=
None)
39 parser.add_option(
"--ignore_corrs", help=
"ignore correlations in both data and mc and just assume total error is uncorrelated between bins [%default]", dest=
"ignore_corrs", default=
False, action=
'store_true')
40 parser.add_option(
"--ignore_corrs_mc", help=
"Ignore correlations in the MC [%default]", dest=
"ignore_corrs_mc", default=
False, action=
'store_true')
41 parser.add_option(
"--ignore_corrs_data", help=
"Ignore correlations in theData [%default]", dest=
"ignore_corrs_data", default=
False, action=
'store_true')
42 parser.add_option(
"--ignore_errs_data", help=
"Ignore all errors in the data, and use only MC cov matrix [%default]", dest=
"ignore_errs_data", default=
False, action=
'store_true')
43 parser.add_option(
"--ignore_errs_mc", help=
"Ignore all erros in the MC, and use only the data cov matrix [%default]", dest=
"ignore_errs_mc", default=
False, action=
'store_true')
44 parser.add_option(
"--get_corr_from_web", help=
"Try to get correlation info from HEPMC record. Specify the inspire ID eg I1514251 [%default]", dest=
"get_corr_from_web", default=
None)
45 parser.add_option(
"--filter", help=
"Only process a subset of histograms (comma separated) [%default]", dest=
"filter", default=
"all")
46 parser.add_option(
"-v",
"--verbosity", help=
"Verbosity level (0-9) [%default]", dest=
"verbosity", default=0)
47 parser.add_option(
"--make_super_aos", help=
"Make super ao of all measuremnts passing --filter, and calculate the chi2 for all measurements using a super-cov-matrix [%default]", dest=
"make_super_aos", default=
False, action=
'store_true')
48 parser.add_option(
"--slides", help=
"Make summary slides [%default]", dest=
"slides", default=
False, action=
'store_true')
49 (opts, args) = parser.parse_args()
51 os.environ[
'TERM'] =
'linux'
70 print(
"convertHistToScatter: This is a placeholder, need to implement")
76 fout_name = fin_name.replace(
".yoda",
".renamed.yoda")
77 fout =
open(fout_name,
'w')
78 aos = yoda.read(fin_name, unpatterns=
".*RAW.*")
79 for aopath, ao
in aos.items():
81 for ao1, ao2
in aoMap.items():
82 if ao1.name() == ao.name():
83 ao.setAnnotation(
"Path", ao2.path().
replace(
"REF/",
""))
85 if ao2.name() == ao.name():
86 ao.setAnnotation(
"Path", ao1.path().
replace(
"REF/",
""))
88 if foundMatch: yoda.writeYODA(ao, fout)
96 dataDir = os.environ[
"SYSTTOOLSPATH"] +
"/data/"
97 os.system(
"mkdir -p %s " % outdir)
98 samples = plotDictionary[
'chi2-value'].
keys()
99 histos = plotDictionary[
'chi2-value'][samples[0]].
keys()
100 os.system(
'cp %s/title.tex %s/title.tex' % (dataDir, outdir))
101 os.system(
'cp %s/title.tex %s/title.tex' % (dataDir, outdir))
102 os.system(
"sed -i -e 's|!TITLE!|%s|g' %s/title.tex" % (opts.data.replace(
'.yoda',
'').
replace(
'_',
' '), outdir))
103 frames.append(
"%s/title.tex" % outdir)
105 if 'superAO' in histo:
continue
106 os.system(
'cp %s/section.tex %s/%s_section.tex' % (dataDir, outdir, histo))
107 os.system(
"sed -i -e 's|!AOREF!|%s|g' %s/%s_section.tex" % (histo, outdir, histo))
108 frames.append(
'%s/%s_section.tex' % (outdir, histo))
109 os.system(
'cp %s/errorAnalysis.tex %s/%s_%s_errorAnalysis.tex' % (dataDir, outdir,
'data', histo))
110 os.system(
"sed -i -e 's|!AOREF!|%s|g' %s/%s_%s_errorAnalysis.tex" % (histo, outdir,
'data', histo))
111 os.system(
"sed -i -e 's|!SAMPLE!|%s|g' %s/%s_%s_errorAnalysis.tex" % (
'data', outdir,
'data', histo))
112 os.system(
"sed -i -e 's|!ERRORBREAKDOWN!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary[
'error-analysis'][
'data'][histo], outdir,
'data', histo))
113 os.system(
"sed -i -e 's|!COVMATRIX!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary[
'correlation-matrix'][
'data'][histo], outdir,
'data', histo))
114 os.system(
"sed -i -e 's|!COVDETAILS!|%s|g' %s/%s_%s_errorAnalysis.tex" % (plotDictionary[
'covDetails'][
'data'][histo], outdir,
'data', histo))
115 frames.append(
"%s/%s_%s_errorAnalysis.tex" % (outdir,
'data', histo))
116 for sample
in samples:
117 sampleNameClean = sample.replace(
"_",
" ").
replace(
"-",
" ")
118 os.system(
'cp %s/dataMCComparison.tex %s/%s_%s_dataMCComparison.tex' % (dataDir, outdir, sample, histo))
119 os.system(
"sed -i -e 's|!AOREF!|%s|g' %s/%s_%s_dataMCComparison.tex" % (histo, outdir, sample, histo))
120 os.system(
"sed -i -e 's|!SAMPLE!|%s|g' %s/%s_%s_dataMCComparison.tex" % (sampleNameClean, outdir, sample, histo))
121 os.system(
"sed -i -e 's|!DATAVSMC!|%s|g' %s/%s_%s_dataMCComparison.tex" % (plotDictionary[
'data-vs-mc'][sample][histo], outdir, sample, histo))
122 os.system(
"sed -i -e 's|!CHI2CONTRIBS!|%s|g' %s/%s_%s_dataMCComparison.tex" % (plotDictionary[
'chi2-contribs'][sample][histo], outdir, sample, histo))
123 frames.append(
"%s/%s_%s_dataMCComparison.tex" % (outdir, sample, histo))
125 os.system(
'cp %s/sectionSummary.tex %s/%s_sectionSummary.tex' % (dataDir, outdir, histo))
126 os.system(
"sed -i -e 's|!AOREF!|%s|g' %s/%s_sectionSummary.tex" % (histo, outdir, histo))
127 os.system(
"sed -i -e 's|!SUMMARYPLOT!|%s|g' %s/%s_sectionSummary.tex" % (plotDictionary[
'summary-plot'][histo], outdir, histo))
128 os.system(
"sed -i -e 's|!SUMMARYTABLE!|%s|g' %s/%s_sectionSummary.tex" % (plotDictionary[
'summary-table'][histo], outdir, histo))
129 frames.append(
"%s/%s_sectionSummary.tex" % (outdir, histo))
130 os.system(
'cp %s/section.tex %s/%s_section.tex' % (dataDir, outdir,
'overall'))
131 os.system(
"sed -i -e 's|!AOREF!|%s|g' %s/%s_section.tex" % (
'overall summary', outdir,
'overall'))
132 frames.append(
'%s/%s_section.tex' % (outdir,
'overall'))
133 os.system(
'cp %s/overallSummary.tex %s/%s_overallSummary.tex' % (dataDir, outdir,
'overall'))
134 os.system(
"sed -i -e 's|!AOREF!|%s|g' %s/%s_overallSummary.tex" % (
'overall summary', outdir,
'overall'))
135 os.system(
"sed -i -e 's|!SUMMARYTABLE!|%s|g' %s/%s_overallSummary.tex" % (plotDictionary[
'summary-table'][
'all'], outdir,
'overall'))
136 frames.append(
"%s/%s_overallSummary.tex" % (outdir,
'overall'))
137 frames.append(
"%s/end.tex" % dataDir)
138 catLine =
" ".
join(frames)
139 os.system(
" cat %s > %s/slides_%s.tex" % (catLine, outdir, opts.data.replace(
'.yoda',
'')))
140 return " %s/slides_%s.tex" % (outdir, opts.data.replace(
'.yoda',
''))
145 outf =
open(outfile,
'w')
146 samples = plotDictionary[
'chi2-value'].
keys()
150 histos =
sorted(plotDictionary[
'chi2-value'][samples[0]].
keys())
152 line =
"\\begin{tabular}{|l|"
156 outf.write(line +
'\n')
157 outf.write(
"\\hline " +
'\n')
158 line =
" $\\chi^2$ / ndof "
160 if 'superAO' in hist: hist =
'global agreement'
161 line +=
" & %s " % hist
163 outf.write(line +
'\n')
164 outf.write(
"\\hline" +
'\n')
165 for sample
in samples:
166 line =
" %s " % sample.replace(
"-",
" ").
replace(
"_",
" ")
168 line +=
" & %s" % plotDictionary[
'chi2-value'][sample][hist]
170 outf.write(line +
'\n')
171 outf.write(
"\\hline" +
'\n')
172 outf.write(
"\\end{tabular}" +
'\n')
176 def matchAOs(aoList1, aoList2, strategy=0, recursive=True):
181 aoMap.setdefault(ao1, [])
185 namesPartialMatch =
False
186 binWidthsMatch =
False
187 if (ao2.name()
in ao1.name()): namesMatch =
True
188 if (ao1.name()
in ao2.name()): namesMatch =
True
189 ao1NameTokens = ao1.name().
split(
"/")[-1].
split(
"-")
190 ao2NameTokens = ao2.name().
split(
"/")[-1].
split(
"-")
191 if len(
list(
set(ao1NameTokens).
intersection(ao2NameTokens))) >= 2: namesPartialMatch =
True
192 if (ao1.numPoints() == ao2.numPoints()):
194 for ipt
in range(ao1.numPoints()):
195 ao1BinLabels =
"%.2f - %.2f" % (ao1.points()[ipt].xMin(), ao1.points()[ipt].xMax())
196 ao2BinLabels =
"%.2f - %.2f" % (ao2.points()[ipt].xMin(), ao2.points()[ipt].xMax())
197 if (ao1BinLabels == ao2BinLabels): binWidthsMatch =
True
199 if (namesMatch
and nBinsMatch
and binWidthsMatch):
200 aoMap.setdefault(ao1, []).
append(ao2)
202 if (nBinsMatch
and binWidthsMatch):
203 aoMap.setdefault(ao1, []).
append(ao2)
205 if (nBinsMatch
and binWidthsMatch
and namesPartialMatch):
206 aoMap.setdefault(ao1, []).
append(ao2)
208 if (len(aoMap.keys()) == len(aoList1))
and ([len(aoMap[ao])
for ao
in aoList1]) == ([1
for ao
in aoList1]):
209 if (opts.verbosity > 0):
print(
"[INFO] found 1-1 mapping between aoLists using strategy %d:" % strategy)
211 aoMap[ao1] = aoMap[ao1][0]
212 if (opts.verbosity > 1):
print(
"%s --> %s" % (ao1.name(), aoMap[ao1].
name()))
213 elif (recursive
and strategy < 3):
214 if (opts.verbosity > 0):
215 print(
"[WARNING] Could not establish 1-1 mapping between aoLists using strategy", strategy,
", try strategy ", strategy + 1)
217 if (opts.verbosity > 1):
print(
"%s --> %s" % (ao1.name(), aoMap[ao1]))
219 aoMap =
matchAOs(aoList1, aoList2, strategy)
221 print(
"[ERROR] could not match AOS in the Data and MC files. Please make sure the AOs have the same binning/ number of bins (and name if possible!)")
222 print(
"[ERROR] aos from list 1", aoList1)
223 print(
"[ERROR] aos from list 2", aoList2)
232 reference_out = reference.replace(
".yoda",
"_corrs.yoda")
233 reference_outf =
open(reference_out,
'w')
235 if correlationsDir ==
'self':
236 infiles.setdefault(reference, [])
238 for f
in os.listdir(correlationsDir):
239 if "all" in f:
continue
240 infiles.setdefault(correlationsDir +
"/" + f, [])
246 histos = [name
for name
in yoda.read(reference, unpatterns=
".*RAW.*")]
247 for histname
in histos:
248 hAll = yoda.read(reference, unpatterns=
".*RAW.*")[histname]
249 if type(hAll)
is yoda.core.Histo1D: hAll = hAll.mkScatter()
250 for f
in infiles.keys():
251 systname = f.split(
"/")[-1].
replace(
".yoda",
"")
252 h = yoda.read(f, unpatterns=
".*RAW.*")[histname]
253 if type(h)
is yoda.core.Scatter2D:
254 nBins = h.numPoints()
255 for ipt
in range(nBins):
256 binValues[ipt] = h.points()[ipt].y()
257 binLabels[ipt] =
"%f - %f" % (h.points()[ipt].xMin(), h.points()[ipt].xMax())
258 errs = h.points()[ipt].yErrs()
259 errAv = (abs(errs[1]) + abs(errs[0])) * 0.5
260 binErrors.setdefault(ipt, {}).setdefault(systname, {})[
'up'] = errs[1]
261 binErrors.setdefault(ipt, {}).setdefault(systname, {})[
'dn'] = -1 * errs[0]
262 elif type(h)
is yoda.core.Histo1D:
264 for ipt
in range(nBins):
265 binValues[ipt] = h.bins()[ipt].sumW()
266 binLabels[ipt] =
"%f - %f" % (h.bins()[ipt].xMin(), h.bins()[ipt].xMax())
267 errAv = h.bins()[ipt].sumW2()
268 binErrors.setdefault(ipt, {}).setdefault(systname, {})[
'up'] = errAv
269 binErrors.setdefault(ipt, {}).setdefault(systname, {})[
'dn'] = -1 * errAv
271 if (reference !=
""):
273 for f
in reference.split(
","):
274 hists = yoda.read(f, unpatterns=
".*RAW.*")
276 if not type(hists[name])
is yoda.core.Scatter2D:
continue
277 nominal = [(hists[name].
points()[i].y())
for i
in range(hists[name].numPoints())]
278 if (len(nominal) != nBins):
continue
280 for i
in range(nBins):
281 totalDiff += abs(binValues[i] - nominal[i])
282 if (totalDiff < smallestDiff):
283 smallestDiff = totalDiff
285 corrs = yaml.dump(binErrors, default_flow_style=
True, default_style=
'', width=1e6)
286 hAll.setAnnotation(
"ErrorBreakdown", corrs)
287 yoda.writeYODA(hAll, reference_outf)
288 reference_outf.close()
296 reference_out = reference.replace(
".yoda",
"_corrs.yoda")
297 reference_outf =
open(reference_out,
'w')
299 for f
in os.listdir(correlationsDir):
300 if 'submission' in f:
continue
301 if 'comb' in f:
continue
303 infiles.setdefault(correlationsDir +
"/" + f.replace(
'syst',
'xsect'), [])
305 infiles.setdefault(correlationsDir +
"/" + f, [])
307 for f
in infiles.keys():
308 stream =
open(f,
"r")
309 doc = yaml.load(stream)
313 for itemtype, itemarray
in doc.items():
314 if itemtype ==
"independent_variables":
315 for line
in itemarray:
316 binLabels[
'name'] = itemarray[0][
'header'][
'name']
318 for entry
in itemarray[0][
'values']:
319 if (
'low' in entry.keys()
and 'high' in entry.keys()):
320 binLabels[counter] =
"%f - %f" % (entry[
'low'], entry[
'high'])
322 binLabels[counter] =
"%f" % (entry[
'value'])
325 if itemtype ==
"dependent_variables":
326 for line
in itemarray:
327 binValues[
'name'] =
"%s [%s]" % (line[
'header'][
'name'],
'')
328 binErrors[
'name'] =
"uncertainties"
330 for entry
in line[
'values']:
331 binValues[counter] = entry[
'value']
332 binErrors[counter] = {}
333 if 'errors' in entry.keys():
334 for error
in entry[
'errors']:
335 binErrors[counter][error[
'label']] = {}
336 if 'symerror' in error.keys():
337 if not isFloat(error[
'symerror'])
and '%' in (error[
'symerror']):
338 binErrors[counter][error[
'label']][
'up'] = binValues[counter] * 0.01 *
float(error[
'symerror'].
replace(
'%',
''))
339 binErrors[counter][error[
'label']][
'dn'] = binValues[counter] * -0.01 *
float(error[
'symerror'].
replace(
'%',
''))
341 binErrors[counter][error[
'label']][
'up'] =
float(error[
'symerror'])
342 binErrors[counter][error[
'label']][
'dn'] = -1 *
float(error[
'symerror'])
343 elif 'asymerror' in error.keys():
344 if not isFloat(error[
'asymerror'][
'plus'])
and '%' in error[
'asymerror'][
'plus']:
345 binErrors[counter][error[
'label']][
'up'] = binValues[counter] * 0.01 *
float(error[
'asymerror'][
'plus'].
replace(
'%',
''))
346 binErrors[counter][error[
'label']][
'dn'] = binValues[counter] * 0.01 *
float(error[
'asymerror'][
'minus'].
replace(
'%',
''))
348 binErrors[counter][error[
'label']][
'up'] =
float(error[
'asymerror'][
'plus'])
349 binErrors[counter][error[
'label']][
'dn'] = 1 *
float(error[
'asymerror'][
'minus'])
351 print(
'[ERROR] errors are neither symmetric or asymmetric... exit!')
356 systNames = binErrors[0].
keys()
357 binErrorsGranular = {}
358 systFile = f.replace(
"xsect",
"syst")
359 streamsyst =
open(systFile,
"r")
360 docsyst = yaml.load(streamsyst)
361 binErrorsGranular[
'name'] =
"uncertainties"
362 for itemtype, itemarray
in docsyst.items():
363 if "variables" not in itemtype:
continue
364 if (itemtype ==
"independent_variables"):
365 systNames = [v[
'value']
for v
in itemarray[0][
'values']]
368 for line
in itemarray:
370 binErrorsGranular[binCounter] = {}
371 if 'values' not in line.keys():
continue
373 for error
in line[
'values']:
375 binErrorsGranular[binCounter][systNames[systCounter]] = {}
376 binErrorsGranular[binCounter][systNames[systCounter]][
'up'] =
float(error[
'value']) * binValues[binCounter] * 0.01
377 binErrorsGranular[binCounter][systNames[systCounter]][
'dn'] = -1 *
float(error[
'value']) * binValues[binCounter] * 0.01
378 for i
in range(len(binErrorsGranular.keys()) - 1):
379 binErrorsGranular[i][
'stat'] = {}
380 binErrorsGranular[i][
'stat'][
'up'] = binErrors[i][
'stat'][
'up']
381 binErrorsGranular[i][
'stat'][
'dn'] = binErrors[i][
'stat'][
'dn']
382 binErrors = binErrorsGranular
383 nBins = len(binLabels) - 1
384 infiles[f] = [binValues, binErrors]
386 hists = yoda.read(reference, unpatterns=
".*RAW.*")
390 nearestMatchErrors =
""
391 nominal = [(hists[name].
points()[i].y)
for i
in range(hists[name].numPoints())]
392 for f, corrs
in infiles.items():
395 nBins = len(binValues) - 1
396 if (len(nominal) != nBins):
continue
398 for i
in range(nBins):
399 totalDiff += abs(binValues[i] - nominal[i])
400 if (totalDiff < smallestDiff):
402 smallestDiff = totalDiff
403 nearestMatchErrors = binErrors
404 if (opts.verbosity > 1):
print(
"[DEBUG] candidate", nominal,
" nearest match is ", nearestMatch)
405 if smallestDiff < 1.0:
406 if (opts.verbosity > 1):
print(
"[INFO] mapping", name,
" ---> ", nearestMatch, (smallestDiff))
407 corrs = yaml.dump(nearestMatchErrors, default_flow_style=
True, default_style=
'', width=1e6)
408 hists[name].setAnnotation(
"ErrorBreakdown", corrs)
409 yoda.writeYODA(hists[name], reference_outf)
411 print(
"[WARNING] Warning, no match found for", name)
412 print(
"[INFO] %s has been marked up with correlation info from %s in this file:" % (reference, correlationsDir))
413 print(
"[INFO] ---> %s " % (reference_out))
414 print(
"[INFO] next time, you can use this file as an input instead of ", reference)
415 reference_outf.close()
424 if ignoreCorrelations:
427 if (corrDir
is None):
return ref
428 for f
in os.listdir(corrDir):
429 if '.yoda' in f: isYoda =
True
438 aos = yoda.read(filename, unpatterns=
".*RAW.*")
439 for aopath, ao
in aos.items():
441 if aoType
in [yoda.Histo1D, yoda.Histo2D]: ao = ao.mkScatter()
442 if aoType
in [yoda.Counter]:
continue
443 if 'all' not in opts.filter
and inputType ==
'data':
444 for f
in opts.filter:
452 corr = ao.annotation(
"ErrorBreakdown")
454 covInfoMissing.append(ao)
455 if (len(covInfoMissing)
and inputType ==
'data' and not opts.ignore_corrs
and not opts.corr_data):
456 print(
"[WARNING] These DATA analysis objects are missing the correlation/covariance information:", covInfoMissing)
457 print(
" --> To add correlation information, you can use the eg options: --get_corr_from_web, --corr_data ")
458 print(
" --> You can also proceed by adding the --ignore_corrs_data in which case the chi2 is calculated")
459 print(
" --> from the total uncertainties and assumed to be bin-bin uncorrelated.")
460 inspireID = [t
for t
in opts.data.split(
"_")
if "I" in t]
461 inspireID = inspireID[0].
replace(
"I",
"")
462 if os.path().isdir(
"corrInfo/ins%s" % inspireID):
463 print(
"[INFO] Attempting to use the information in corrInfo/ins%s to build correlation info " % inspireID)
464 opts.corr_data =
"corrInfo/ins%s" % inspireID
468 if (len(covInfoMissing)
and inputType ==
'mc' and not opts.ignore_corrs
and not opts.corr_mc):
469 print(
"[WARNING] These MC analysis objects are missing the correlation/covariance information:", covInfoMissing)
470 print(
" --> To add correlation information, you can use the eg options: --corr_mc ")
471 print(
" --> You can also proceed by adding the --ignore_corrs_mc in which case the chi2 is calculated")
472 print(
" --> from the total uncertainties and assumed to be bin-bin uncorrelated.")
473 if os.path.isdir(
"corrInfoMC/%s" % filename.replace(
".yoda",
"")):
474 print(
"[INFO] Attempting to use the information in corrInfoMC/%s to build correlation info " % filename.replace(
".yoda",
""))
475 opts.corr_mc =
"corrInfoMC/%s" % filename.replace(
".yoda",
"")
482 if (name
is None): name = opts.data
483 inspireID = [t
for t
in name.split(
"_")
if "I" in t]
484 inspireID = inspireID[0].
replace(
"I",
"")
485 hdurl =
"http://www.hepdata.net/record/ins%s?format=%s" % (inspireID, dlformat)
486 print(
"[INFO] Trying to download information from %s", hdurl)
488 response = urllib2.urlopen(hdurl)
489 download = response.read()
490 if not download
or "<html" in download:
491 print((
"Problem encountered when getting data from HepData (%s). No reference data file written." % hdurl))
493 tar = tarfile.open(mode=
"r:gz", fileobj=StringIO.StringIO(download))
494 fnames = tar.getnames()
495 if len(fnames) > 0
and dlformat ==
'yaml':
497 os.system(
"mkdir -p corrInfo/ins%s" % (inspireID))
498 os.system(
"mv %s/* corrInfo/ins%s/." % (fnames[0], inspireID))
499 os.system(
"rm -r %s" % (fnames[0]))
500 print(
"[INFO] successfully downloaded YAML files from HEPMC, will see if they can be used for correlations")
501 if len(fnames) == 1
and dlformat ==
'yoda':
503 os.system(
"mkdir -p corrInfo/ins%s" % (inspireID))
504 os.system(
"mv %s corrInfo/ins%s/." % (fnames[0], inspireID))
505 print(
"[INFO] successfully downloaded YODA files from HEPMC, will see if they can be used for correlations")
506 opts.corr_data =
"corrInfo/ins%s" % inspireID
510 if __name__ ==
"__main__":
512 opts.ntoys =
int(opts.ntoys)
513 opts.verbosity =
int(opts.verbosity)
514 opts.filter = opts.filter.split(
',')
515 if opts.ignore_corrs:
516 opts.ignore_corrs_data =
True
517 opts.ignore_corrs_mc =
True
520 plotDictionary = {
'error-analysis': {},
'covariance-matrix': {},
'correlation-matrix': {},
'data-vs-mc': {},
'chi2-contribs': {},
'chi2-value': {},
'covDetails': {},
'summary-plot': {},
'summary-table': {}}
522 if (opts.corr_data
is None and opts.get_corr_from_web):
528 if opts.corr_data
is not None:
532 outdir =
"outputs/%s/data/plots" % (opts.data.replace(
".yoda",
""))
533 for hdata
in histograms[
'data']:
534 if not ct.hasValidErrorBreakdown(hdata):
continue
535 os.system(
"mkdir -p %s" % outdir)
536 plotparser = rivet.mkStdPlotParser([], [])
537 headers = plotparser.getHeaders(hdata.path())
538 if 'XLabel' in headers.keys(): XLabel = headers[
'XLabel']
540 if 'Title' in headers.keys(): Title = headers[
'Title']
542 ct.plotVariations(hdata, outdir,
'data-%s' % hdata.name(), 0.05, xLabel=XLabel, title=Title)
543 plotDictionary[
'error-analysis'].setdefault(
'data', {})
544 plotDictionary[
'error-analysis'][
'data'][hdata.name()] = outdir +
'/data-%s-variations.pdf' % hdata.name()
545 print(
"[INFO] See plots of data error analysis here:", outdir)
546 if 'data' in plotDictionary[
'error-analysis'].
keys():
547 for k, v
in plotDictionary[
'error-analysis'][
'data'].
items():
550 for mc
in opts.mc.split(
","):
551 if mc ==
"":
continue
553 mcName = mc.split(
":")[1]
554 mc = mc.split(
":")[0]
556 mcName = mc.split(
"/")[-1].
replace(
".yoda",
"")
559 opts.mc = opts.mc.replace(mc, mcnew)
561 histograms[
'models'].pop(mc,
None)
563 if (opts.data !=
''):
564 aoMap =
matchAOs(histograms[
'data'], histograms[
'models'][mcnew])
567 opts.mc = opts.mc.replace(mc, newmc)
568 histograms[
'models'].pop(mcnew,
None)
570 mcNames[newmc] = mcName
571 for hmc
in histograms[
'models'][newmc]:
572 outdir =
"outputs/%s/%s/plots" % (opts.data.replace(
".yoda",
""), mcName)
573 os.system(
"mkdir -p %s" % outdir)
574 if not (opts.ignore_errs_mc):
575 ct.plotVariations(hmc, outdir,
'%s-%s' % (mcName, hmc.name()), 0.01)
576 plotDictionary[
'error-analysis'].setdefault(mcName, {})
577 plotDictionary[
'error-analysis'][mcName][hmc.name()] = outdir +
'/%s-%s-variations.pdf' % (mcName, hmc.name())
578 if not (opts.ignore_errs_mc):
579 print(
"[INFO] See plots of %s variations here" % mcName, outdir +
"/validation-plots/")
580 for k, v
in plotDictionary[
'error-analysis'][mcName].
items():
584 if (opts.make_super_aos):
585 dataSuperAO = ct.makeSuperAO(histograms[
'data'])
586 histograms[
'data'].
append(dataSuperAO)
587 yoda.writeYODA(dataSuperAO, opts.data.replace(
'.yoda',
'.super-ao.yoda'))
588 for mc
in histograms[
'models'].
keys():
589 mcSuperAO = ct.makeSuperAO(histograms[
'models'][mc])
590 histograms[
'models'][mc].
append(mcSuperAO)
591 yoda.writeYODA(mcSuperAO, mc.replace(
'.yoda',
'super-ao.yoda'))
594 if (len(opts.data) == 0):
595 print(
"[INFO] No data histograms to be processed. Cannot make comparisons, so exiting.")
597 if (len(opts.mc) == 0):
598 print(
"[INFO] No MC histograms to be processed. Cannot make comparisons, so exiting.")
600 for hdata
in sorted(histograms[
'data']):
601 if not ct.hasValidErrorBreakdown(hdata):
continue
603 if not (
"all" in opts.filter):
605 for fil
in opts.filter:
606 if fil
in hdata.name(): passFilter =
True
607 if not (passFilter
or 'superAO' in hdata.name()):
continue
608 for model
in histograms[
'models']:
609 aoMap =
matchAOs(histograms[
'data'], histograms[
'models'][model])
611 mcName = mcNames[model]
612 outdir =
"outputs/%s" % (opts.data.replace(
".yoda",
""))
613 os.system(
"mkdir -p %s/data-vs-%s/plots" % (outdir, mcName))
618 covDetailsData =
"Size of uncertainties across range, and Data Correlation Matrix"
619 if ((opts.use_toys
or ct.hasAsymmetricErrors(hdata))
and not opts.use_direct):
620 print(
"[WARNING] Data has asymmetric errors, producing covariance matrix from %d toys" % opts.ntoys)
621 covData = ct.makeCovarianceMatrixFromToys(hdata, opts.ntoys, opts.ignore_corrs_data)
622 covDetailsData +=
" produced from %d toys%s" % (opts.ntoys,
", ignoring correlations" if opts.ignore_corrs_data
else "")
624 covData = ct.makeCovarianceMatrix(hdata, opts.ignore_corrs_data)
625 covDetailsData +=
" produced from direct propagation%s" % (
", ignoring correlations" if opts.ignore_corrs_data
else "")
626 plotparser = rivet.mkStdPlotParser([], [])
627 headers = plotparser.getHeaders(hdata.path())
628 if 'XLabel' in headers.keys(): XLabel = headers[
'XLabel']
630 ct.drawMatrix(covData, outdir +
"/data/plots/data-%s-covariance-matrix" % hdata.name(), xLabel=XLabel)
631 plotDictionary[
'covariance-matrix'].setdefault(
'data', {})
632 plotDictionary[
'covariance-matrix'][
'data'][hdata.name()] = outdir +
"/data/plots/data-%s-covariance-matrix.pdf" % hdata.name()
633 ct.drawMatrix(ct.correlationMatrix(covData), outdir +
"/data/plots/data-%s-correlation-matrix" % hdata.name())
634 plotDictionary[
'correlation-matrix'].setdefault(
'data', {})
635 plotDictionary[
'correlation-matrix'][
'data'][hdata.name()] = outdir +
"/data/plots/data-%s-correlation-matrix.pdf" % hdata.name()
638 covDetailsMC =
"MC Covariance Matrix"
639 if ((opts.use_toys
or ct.hasAsymmetricErrors(hmc))
and not opts.use_direct):
640 print(
"[WARNING] MC has asymmetric errors, producing covariance matrix from %d toys" % opts.ntoys)
641 covMC = ct.makeCovarianceMatrixFromToys(hmc, opts.ntoys, opts.ignore_corrs_mc)
642 covDetailsMC +=
" produced from %d toys%s" % (opts.ntoys,
", ignoring correlations" if opts.ignore_corrs_mc
else "")
644 covMC = ct.makeCovarianceMatrix(hmc, opts.ignore_corrs_mc)
645 covDetailsMC +=
" produced from direct propagation%s" % (
", ignoring correlations" if opts.ignore_corrs_mc
else "")
646 ct.drawMatrix(covMC, outdir +
"/%s/plots/%s-%s-covariance-matrix" % (mcName, mcName, hdata.name()))
647 plotDictionary[
'covariance-matrix'].setdefault(mcName, {})
648 plotDictionary[
'covariance-matrix'][mcName][hmc.name()] = outdir +
"/%s/plots/%s-%s-covariance-matrix.pdf" % (mcName, mcName, hmc.name())
649 ct.drawMatrix(ct.correlationMatrix(covMC), outdir +
"/%s/plots/%s-%s-correlation-matrix" % (mcName, mcName, hmc.name()))
650 plotDictionary[
'correlation-matrix'].setdefault(mcName, {})
651 plotDictionary[
'correlation-matrix'][mcName][hmc.name()] = outdir +
"/%s/plots/%s-%s-correlation-matrix.pdf" % (mcName, mcName, hmc.name())
654 covTotal = covData + covMC
655 if opts.ignore_errs_data
and opts.ignore_errs_mc:
656 print(
"[ERROR] cannot ignore both data errros and MC errors at the same time!")
658 elif opts.ignore_errs_data:
660 covDetailsData =
"Ignoring Data uncertainties (MC cov matrix only)"
661 elif opts.ignore_errs_mc:
663 covDetailsMC =
"Ignoring MC uncertainties (Data cov matrix only)"
664 covDetailsData +=
". Ignoring MC uncertainties"
665 plotDictionary[
'covDetails'].setdefault(
"%s" % mcName, {})
666 plotDictionary[
'covDetails'].setdefault(
"data", {})
667 plotDictionary[
'covDetails'][
"data"][hdata.name()] = covDetailsData
668 plotDictionary[
'covDetails'][mcName][hmc.name()] = covDetailsMC
670 ct.drawMatrix(covMC, outdir +
"/data-vs-%s/plots/data-vs-%s-%s-covariance-matrix" % (mcName, mcName, hdata.name()))
671 plotDictionary[
'covariance-matrix'].setdefault(
"data-vs-%s" % mcName, {})
672 plotDictionary[
'covariance-matrix'][
"data-vs-%s" % mcName][hmc.name()] = outdir +
"/data-vs-%s/plots/data-vs-%s-%s-covariance-matrix.pdf" % (mcName, mcName, hdata.name())
673 ct.drawMatrix(ct.correlationMatrix(covMC), outdir +
"/data-vs-%s/plots/data-vs-%s-%s-correlation-matrix" % (mcName, mcName, hdata.name()))
674 plotDictionary[
'correlation-matrix'].setdefault(
"data-vs-%s" % mcName, {})
675 plotDictionary[
'correlation-matrix'][
"data-vs-%s" % mcName][hmc.name()] = outdir +
"/data-vs-%s/plots/data-vs-%s-%s-correlation-matrix.pdf" % (mcName, mcName, hdata.name())
678 chi2, ndf, prob, chi2contribs = ct.calculateChi2(hdata, hmc, covTotal)
679 ct.drawMatrix(chi2contribs, outdir +
"/data-vs-%s/plots/data-vs-%s-%s-chi2-contribs" % (mcName, mcName, hdata.name()))
680 plotDictionary[
'chi2-contribs'].setdefault(
"data-vs-%s" % mcName, {})
681 plotDictionary[
'chi2-contribs'][
"data-vs-%s" % mcName][hmc.name()] = outdir +
"/data-vs-%s/plots/data-vs-%s-%s-chi2-contribs.pdf" % (mcName, mcName, hdata.name())
683 chi2ContribsByRow = ct.chi2ContribsByRow(chi2contribs)
684 chi2ContribsByRowYAML = yaml.dump(chi2ContribsByRow, default_flow_style=
True, default_style=
'', width=1e6)
685 hmc.setAnnotation(
"Chi2Contribs", chi2ContribsByRowYAML)
686 ct.updateAOinYODA(hmc, model)
687 plotDictionary[
'chi2-value'].setdefault(
"data-vs-%s" % mcName, {})
688 plotDictionary[
'chi2-value'][
"data-vs-%s" % mcName][hmc.name()] =
"$%.2f / %d$" % (chi2, ndf)
691 mcResults[model] = [chi2, ndf]
692 print(
"[INFO] %s:%s vs %s:%s --> chi2=%.2f/%d (prob=%.2f) " % (opts.data, hdata.name(), model, hmc.name(), chi2, ndf, prob))
694 if "superAO" in hdata.name():
continue
696 res = {
'%s' % opts.data:
'[Data]',
'%s' % model:
'[%s (# chi^2=%.2f/%d)]' % (mcName, chi2, ndf)}
697 outdirplots = outdir +
"/data-vs-%s/plots/" % mcName
698 plots = st.makeSystematicsPlotsWithROOT(res, outdirplots, nominalName=
'Data', ratioZoom=
None, regexFilter=
".*%s.*" % hmc.name(), regexVeto=
None)
700 plotDictionary[
'data-vs-mc'].setdefault(
"data-vs-%s" % mcName, {})
701 plotDictionary[
'data-vs-mc'][
"data-vs-%s" % mcName][hmc.name()] = plots[0]
703 outdir =
"outputs/%s/summary" % (opts.data.replace(
".yoda",
""))
706 res = {
'%s' % opts.data:
'[Data]'}
707 for model
in histograms[
'models']:
708 mcName = mcNames[model]
709 mcR = mcResults[model]
710 res[model] =
'[%s (# chi^2=%.2f/%d)]' % (mcName, mcR[0], mcR[1])
711 if "superAO" in hdata.name():
continue
713 if "superAO" in hdata.name():
continue
714 plots = st.makeSystematicsPlotsWithROOT(res, outdir, nominalName=
'Data', ratioZoom=
None, regexFilter=
".*%s.*" % hmc.name(), regexVeto=
None)
715 plotDictionary[
'summary-plot'][hmc.name()] = plots[0]
716 makeSummaryTable(plotDictionary, outdir +
"/summary_%s.tex" % hdata.name(), hdata.name())
717 plotDictionary[
'summary-table'][hmc.name()] = outdir +
"/summary_%s.tex" % hdata.name()
719 print(
"[INFO] output rivet files with Chi2: %s" % outdir)
720 os.system(
'ls %s/* | grep pdf' % outdir)
723 outdir =
"outputs/%s/summary" % (opts.data.replace(
".yoda",
""))
724 if len(plotDictionary[
'chi2-value'].
keys()) == 0:
725 print(
"[WARNING] no histograms were processed, could not make summary slides! Exit")
729 plotDictionary[
'summary-table'][
'all'] = outdir +
"/summary_all.tex"
731 os.system(
'rm slides_*')
732 os.system(
'pdflatex %s > /dev/null' % beamerPath)
733 os.system(
'pdflatex %s > /dev/null' % beamerPath)
734 beamerPath = beamerPath.replace(
"tex",
"pdf").
split(
"/")[-1]
735 print(
"[INFO] your summary slides are here: \n", beamerPath)