4 The `covarianceToolsLibrary.py` can be used directly to manipulate covariance info and evaluate goodness of fit, inmported into your own python script.
5 Examples of how to do this are provided in the `examples/covarianceTool-examples/` directory
7 Author: Louie D. Corpe (UCL)
15 from array
import array
18 from numpy
import linalg
as LA
19 from scipy
import stats
21 r.gErrorIgnoreLevel = r.kError
26 `h` AO (the Scatter you want to set the correlation info for)
29 corrIn = yaml.dump(binErrors, default_flow_style=
True, default_style=
'', width=1e6)
30 h.setAnnotation(
"ErrorBreakdown", corrIn)
34 corr = h.annotation(
'ErrorBreakdown')
35 binErrors = yaml.load(corr)
36 if binErrorsToAdd.keys() != binErrors.keys():
37 print(
"[ERROR], could not add ", systName,
" to ", h,
" because the bins are inconsistent")
38 print(binErrors.keys(),
" vs ", binErrorsToAdd.keys())
40 for k, v
in binErrorsToAdd.items():
41 binErrors[k][systName] = v
46 if not (h.hasAnnotation(
'ErrorBreakdown')):
47 print(
"[ERROR] analsyis object", h,
" has no stored correlation info!")
49 binErrors = yaml.load(h.annotation(
'ErrorBreakdown'))
51 binErrorsFiltered = {}
52 for k, v
in binErrors.items():
55 for k2, v2
in v.items():
58 binErrorsFiltered.setdefault(k, {})[k2] = v2
60 print(
"[WARNING] cannot find systematic with name ", name)
61 binErrors = binErrorsFiltered
66 nbins = ao.numPoints()
67 corr = yaml.load(ao.annotation(
'ErrorBreakdown'))
68 if len(corr) == 0:
return True
69 systList = corr[0].
keys()
71 for sname
in systList:
72 for ibin
in range(nbins):
73 up = ((corr[ibin][sname][
'up']))
74 dn = ((corr[ibin][sname][
'dn']))
82 nbins = ao.numPoints()
83 corr = ao.annotation(
'ErrorBreakdown')
if '1.7' in yoda.version()
else yaml.load(ao.annotation(
'ErrorBreakdown'))
85 covM = np.zeros(dummyM.shape)
87 for ibin
in range(nbins):
88 covM[ibin][ibin] = ((ao.points()[ibin].yErrs()[0] + ao.points()[ibin].yErrs()[1]) / 2) ** 2
89 if covM[ibin][ibin] == 0: covM[ibin][ibin] = 1
90 print(
"[WARNING], ao ", ao.path,
" has no errors. Setting cov martix to unit... but consider excluding it !")
92 systList = corr[0].
keys()
93 for sname
in systList:
94 systErrs = np.zeros(nbins)
95 for ibin
in range(nbins):
96 systErrs[ibin] = ((abs(corr[ibin][sname][
'up'])) + (abs(corr[ibin][sname][
'dn']))) * 0.5
97 if (ignore_corrs
or 'stat' in sname):
98 covM += np.diag(systErrs * systErrs)
100 covM += np.outer(systErrs, systErrs)
101 if LA.det(covM) == 0:
103 print(
"[ERROR], the above cov matrix is singular for ", ao.path(),
" ! exit")
104 print(
"(if this is a cov matrix for a super-AO, perhaps you have several highly correlated distributions in your set. Try vetoing some.)")
109 if not ao.hasAnnotation(
"ErrorBreakdown"):
return False
110 if len(ao.annotation(
"ErrorBreakdown")) == 0:
return False
112 for iBin, binErrs
in corrs.items():
114 for syst, err
in binErrs.items():
115 binTotal[0] = (binTotal[0] ** 2 + err[
'up'] ** 2) ** 0.5
116 binTotal[1] = (binTotal[1] ** 2 + err[
'dn'] ** 2) ** 0.5
117 if binTotal[0] == 0
and binTotal[1] == 0:
return False
122 nbins = ao.numPoints()
123 corr = yaml.load(ao.annotation(
'ErrorBreakdown'))
124 systList = corr[0].
keys()
125 toys = np.zeros([ntoys, nbins])
127 for itoy
in range(ntoys):
128 shift = np.zeros(len(systList))
129 delta = np.zeros(nbins)
131 for sname
in systList:
132 if (ignore_corrs
or 'stat' in sname):
133 for n
in range(nbins):
134 lrand = np.random.normal(0, 1)
135 s = ((corr[n][sname][
'up']))
if lrand >= 0.
else ((corr[n][sname][
'dn']))
136 delta[n] += abs(lrand) * s
138 lrand = np.random.normal(0, 1)
141 for n
in range(nbins):
142 s = ((corr[n][sname][
'up']))
if lrand >= 0.
else ((corr[n][sname][
'dn']))
143 delta[n] += abs(lrand) * s
146 for n
in range(nbins):
147 toys[itoy][n] = (1. + delta[n])
149 cov = np.cov(toys, rowvar=0)
152 print(
"[ERROR], the above cov matrix is singular for ", ao.path,
" ! exit")
153 print(
"(if this is a cov matrix for a super-AO, perhaps you have several highly correlated distributions in your set. Try vetoing some.)")
158 if ao.dim() != 2:
return
160 tgraphs[
'all'] = [r.TGraphAsymmErrors(), r.TGraphAsymmErrors(), r.TGraphAsymmErrors(), r.TGraphAsymmErrors()]
163 nbins = ao.numPoints()
164 corr = ao.annotation(
'ErrorBreakdown')
if '1.7' in yoda.version()
else yaml.load(ao.annotation(
'ErrorBreakdown'))
166 if len(corr) == 0:
return
167 systList = corr[0].
keys()
169 all_up = [0
for i
in range(nbins)]
170 all_dn = [0
for i
in range(nbins)]
171 for sname
in systList:
172 tgu = r.TGraphAsymmErrors()
173 tgu_line = r.TGraphAsymmErrors()
174 tgd = r.TGraphAsymmErrors()
175 tgd_line = r.TGraphAsymmErrors()
178 for i
in range(nbins):
179 nominalx = ao.points()[i].x()
180 nominaly = ao.points()[i].y()
181 xerr = abs(nominalx - ao.points()[i].xMin())
182 tgu.SetPoint(pt, nominalx, 1)
183 tgd.SetPoint(pt, nominalx, 1)
184 if nominaly == 0:
continue
185 eup = 1 - (((corr[i][sname][
'up'])) + nominaly) / nominaly
186 edn = 1 - (((corr[i][sname][
'dn'])) + nominaly) / nominaly
187 all_up[i] = (all_up[i] ** 2 + eup ** 2) ** 0.5
188 all_dn[i] = (all_dn[i] ** 2 + edn ** 2) ** 0.5
189 if (abs(eup) > threshold): draw = 1
190 if (abs(edn) > threshold): draw = 1
191 if (abs(edn) > threshold): draw = 1
192 if abs(all_up[i]) > maxErr: maxErr = abs(all_up[i])
193 tgu.SetPointError(pt, xerr, xerr, 0, eup)
194 tgd.SetPointError(pt, xerr, xerr, 0, edn)
195 tgu_line.SetPoint(pt, nominalx, 1 + eup)
196 tgd_line.SetPoint(pt, nominalx, 1 + edn)
197 tgu_line.SetPointError(pt, xerr, xerr, 0, 0)
198 tgd_line.SetPointError(pt, xerr, xerr, 0, 0)
199 tgraphs[
'all'][0].SetPoint(pt, nominalx, nominaly / nominaly)
200 tgraphs[
'all'][1].SetPoint(pt, nominalx, nominaly / nominaly)
201 tgraphs[
'all'][2].SetPoint(pt, nominalx, 1 - all_dn[i])
202 tgraphs[
'all'][3].SetPoint(pt, nominalx, 1 + all_up[i])
203 tgraphs[
'all'][0].SetPointError(pt, xerr, xerr, 0, -1 * all_dn[i])
204 tgraphs[
'all'][1].SetPointError(pt, xerr, xerr, 0, all_up[i])
205 tgraphs[
'all'][2].SetPointError(pt, xerr, xerr, 0, 0)
206 tgraphs[
'all'][3].SetPointError(pt, xerr, xerr, 0, 0)
208 if (draw): tgraphs[sname] = [tgd, tgu, tgd_line, tgu_line]
210 tc = r.TCanvas(
"c",
"c", 500, 500)
213 leg = r.TLegend(.13, .6, .9, .8)
219 leg.SetTextSize(0.025)
221 for tgname, tg
in sorted(tgraphs.items(), key=
lambda x: abs(x[1][1].GetErrorYhigh(0)), reverse=
True):
223 isCorr =
not ((
'stat' in tgname)
or (
'all' in tgname))
224 if 'nominal' == tgname:
continue
225 if 'mc' == tgname:
continue
226 tg[0].SetFillColorAlpha(0 + itg, 0.3)
227 tg[1].SetFillColorAlpha(0 + itg, 0.3)
228 tg[0].SetMarkerColor(1)
229 tg[2].SetMarkerStyle(23)
230 tg[2].SetMarkerColor(0 + itg)
231 tg[2].SetLineColor(0 + itg)
232 tg[3].SetMarkerStyle(22)
233 tg[3].SetMarkerColor(0 + itg)
234 tg[3].SetLineColor(0 + itg)
235 leg.AddEntry(tg[0], tgname,
"f")
237 tg[0].Draw(
"p2 same")
238 tg[1].Draw(
"p2 same")
239 if isCorr: tg[2].Draw(
"P same")
240 if isCorr: tg[3].Draw(
"P same ")
242 tg[0].SetMaximum((1. + maxErr) * 1.33)
243 tg[0].SetMinimum(1. - maxErr)
245 tg[1].Draw(
"P2 same ")
246 if isCorr: tg[2].Draw(
"P same")
247 if isCorr: tg[3].Draw(
"P same")
253 lat.SetTextSize(0.05)
254 lat.DrawLatex(0.1, 0.91,
"#it{ATLAS} #bf{Internal}")
255 lat.SetTextSize(0.035)
256 lat.DrawLatex(0.13, 0.83,
"#bf{All variations >%1.1f%% anywhere in range:}" % (threshold * 100))
259 lat.SetTextSize(0.04)
260 lat.DrawLatex(0.9, 0.05,
"#bf{%s}" % xLabel.replace(
"$",
"").
replace(
"\\text",
""))
262 lat.DrawLatex(0.02, 0.9,
"#bf{Relative Uncertainty}")
263 outfile =
"%s/%s-variations" % (outdir, label)
264 tc.SaveAs(outfile +
".pdf")
265 tc.SaveAs(outfile +
".png")
269 npts = scat.numPoints()
270 cov_up = np.zeros([npts, npts])
271 cov_dn = np.zeros([npts, npts])
272 for i
in range(npts):
273 yed_, yeu_ = scat.points()[i].yErrs()
274 cov_up[i][i] = yeu_ * yeu_
275 cov_dn[i][i] = yed_ * yed_
276 return[cov_dn, cov_up]
281 cov_up = np.zeros([npts, npts])
282 cov_dn = np.zeros([npts, npts])
283 for i
in range(npts):
284 err = scat.bins[i].xStdErr()
285 cov_up[i][i] = err * err
286 cov_dn[i][i] = err * err
287 return[cov_dn, cov_up]
293 mo = np.zeros([npts, npts])
294 for i
in range(npts):
295 for j
in range(npts):
296 mo[0][i] = h.points()[i].
val(dim)
298 width = (abs(h.points()[i].errs(dim)[0]))
299 width += (abs(h.points()[i].errs(dim)[1]))
300 mo[0][i] *= 1. / width
306 mo = np.zeros([npts, npts])
307 for i
in range(npts):
308 for j
in range(npts):
309 mo[0][i] = h.bins[i].sumW()
311 width = h.bins[i].xWidth()
312 mo[0][i] *= 1. / width
317 if isinstance(outfile,
str()): outfile =
open(
"%s.txt" % outfile,
'w')
318 nCols = matrix.shape[0]
319 nRows = matrix.shape[1]
321 for i
in range(nCols):
322 colNames +=
"%d " % i
323 if outfile
is not None: outfile.write(colNames +
"\n")
324 else:
print(colNames +
"\n")
326 for j
in range(nRows):
327 rowString =
"%d| " % j
328 for i
in range(nCols):
329 rowString +=
"%.6f " % matrix[i][j]
330 if outfile
is not None: outfile.write(rowString +
"\n")
331 else:
print(rowString +
"\n")
332 if outfile
is not None: outfile.write(
"Eigenvalues %s\n" % (
" ".
join([
"%.3f" % val
for val
in LA.eigvals((matrix))])))
333 else:
print(
"Eigenvalues %s\n" % (
" ".
join([
"%.6f" % val
for val
in LA.eigvals((matrix))])))
338 nCols = matrix.shape[0]
339 nRows = matrix.shape[1]
340 assert(nCols == nRows)
344 lat.SetTextSize(0.02)
347 Red =
array(
'd', [1.0, 1.0, 0.0])
348 Grn =
array(
'd', [0.0, 1.0, 0.0])
349 Blu =
array(
'd', [0.0, 1.0, 1.0])
350 Length =
array(
'd', [0.0, 0.5, 1.0])
351 r.TColor.CreateGradientColorTable(len(Length), Length, Red, Grn, Blu, 50)
352 if (
'chi2' in outfile): nRows += 1
353 th2 = r.TH2D(outfile.split(
"/")[-1], outfile.split(
"/")[-1], nRows, 0, nRows, nBins, 0, nBins)
357 for j
in range(nBins):
358 th2.GetYaxis().SetBinLabel(j + 1,
"%d" % (nBins - j))
360 for i
in range(nBins):
361 th2.GetXaxis().SetBinLabel(i + 1,
"%d" % (i + 1))
362 th2.Fill(i, nBins - j - 1, matrix[i][j])
363 total += matrix[i][j]
364 totalOverall += total
365 if(
'chi2' in outfile): th2.Fill(nBins, nBins - j - 1, total)
366 if(
'chi2' in outfile): th2.GetXaxis().SetBinLabel(nBins + 1,
"TOTAL =%.2f" % totalOverall)
367 totalsPerRow.append(total)
368 tc = r.TCanvas(
"c",
"c", 500, 500)
369 tc.SetRightMargin(0.2)
370 if(
'covariance' in outfile): tc.SetLogz()
372 for j
in range(nBins):
373 for i
in range(nBins):
374 lat.DrawLatex(i + 0.5, nBins - j - 0.5,
"%.2f" % matrix[i][j])
375 if(
'chi2' in outfile): lat.DrawLatex(nBins + 0.5, nBins - j - 0.5,
"%.2f" % totalsPerRow[j])
376 th2.SetMaximum(
max(abs(th2.GetMaximum()), abs(th2.GetMinimum())))
377 th2.SetMinimum(-1 * (th2.GetMaximum()))
381 lat.SetTextSize(0.05)
382 lat.DrawLatex(0.1, 0.91,
"#it{ATLAS} #bf{Internal}")
383 lat.SetTextSize(0.035)
386 lat.SetTextSize(0.04)
387 lat.DrawLatex(0.9, 0.05,
"#bf{%s}" % xLabel.replace(
"$",
"").
replace(
"\\text",
""))
389 lat.DrawLatex(0.02, 0.9,
"#bf{%s}" % xLabel.replace(
"$",
"").
replace(
"\\text",
""))
390 tc.SaveAs(outfile +
".pdf")
391 tc.SaveAs(outfile +
".png")
398 print(
"[INFO] Manual Chi2 Calculation, (verbosity)= ", (verbosity > 0))
401 for i
in range(Nbins):
405 sigma = (sigmaSq) ** 0.5
406 residual = nEvData - nEvMC
408 print(
"[INFO] Manual Chi2 Bin i=%d: data=%f, mc=%f, res=%f vs sigma=%f res/sigma=%.2f, (r/s) ** 2=%.2f" % (i, nEvData, nEvMC, residual, sigma,
float(residual) / sigma, (
float(residual) / sigma) ** 2))
409 chi2 += (residual / sigma) ** 2
411 print(
"[INFO] FINAL MANUAL CHI2= ", chi2)
416 ndf = data.numPoints()
418 for name, obj
in {
'data': data,
'mc': mc}.
items():
419 if type(obj)
in (yoda.Scatter1D, yoda.Scatter2D, yoda.Scatter3D):
424 v = values[
'data'].
copy()
425 v -= values[
'mc'].
copy()
429 cov = covData + covMC
431 if (cov.shape[0] != cov.shape[1]):
432 print(
"[ERROR] the specified covariance matrix is not square!", cov)
434 if (cov.shape[0] != v.shape[0]):
435 print(
"[ERROR] the specified covariance matrix is incompatible with the vector of data-MC", cov, v)
438 precision_matrix = LA.inv(cov.copy())
441 print(
"[INFO]: precision matrix:", precision_matrix)
443 vT = v.copy().transpose()
444 chi2tmp = (v.dot(precision_matrix)).
dot(vT)
446 pvalue = (1 - stats.chi2.cdf(chi2, ndf))
452 nbins = covMatrix.shape[0]
453 corr = np.zeros([nbins, nbins])
454 for i
in range(nbins):
455 sigma_i = covMatrix[i][i]
456 for j
in range(nbins):
457 sigma_j = covMatrix[j][j]
458 corr[i][j] = covMatrix[i][j] / np.sqrt(sigma_i * sigma_j)
464 aos = yoda.read(infile)
466 yoda.write(
sorted(aos.values()), infile)
470 nbins = prec.shape[0]
471 diffT = diff.copy().transpose()
472 mo = np.zeros([nbins, nbins])
473 for j
in range(nbins):
474 for i
in range(nbins):
475 mo[i][j] = diff[0][j] * prec[i][j] * diffT[i][0]
480 nbins = chi2contribs.shape[0]
482 for j
in range(nbins):
484 for i
in range(nbins):
485 mo[j] +=
float(chi2contribs[i][j])
490 aoOut = yoda.core.Scatter2D()
496 corrIn = aoIn.annotation(
'ErrorBreakdown')
if '1.7' in yoda.version()
else yaml.load(aoIn.annotation(
'ErrorBreakdown'))
497 if len(corrIn) == 0:
continue
498 for ipt
in range(aoIn.numPoints()):
499 for syst
in corrIn[ipt].
keys():
500 if syst
not in allSystematics: allSystematics.append(syst)
502 for aoIn
in sorted(aoList, key=
lambda x: x.name):
503 corrIn = aoIn.annotation(
'ErrorBreakdown')
if '1.7' in yoda.version()
else yaml.load(aoIn.annotation(
'ErrorBreakdown'))
504 if len(corrIn) == 0:
continue
505 for ipt
in range(aoIn.numPoints()):
506 aoOut.addPoint(yoda.core.Point2D(pointCounter, aoIn.points()[ipt].y()))
508 for syst
in allSystematics:
509 if syst
in corrIn[ipt].
keys():
510 corrOut.setdefault(pointCounter, {})[syst] = corrIn[ipt][syst]
511 totalErrorUp += corrIn[ipt][syst][
'up'] * corrIn[ipt][syst][
'up']
513 corrOut.setdefault(pointCounter, {})[syst] = {
'up': 0.0,
'dn': 0.0}
514 pointsOut.append(aoIn.points()[ipt])
516 for i
in range(aoOut.numPoints()):
517 if (verbosity) > 1:
print(
"[INFO] Super AO points ", aoOut.points()[i])
518 corrs = yaml.dump(corrOut, default_flow_style=
True, default_style=
'', width=1e6)
519 aoOut.setAnnotation(
"ErrorBreakdown", corrs)
520 name = aoList[0].path.replace(aoList[0].name,
"")
522 for ao
in sorted(aoList, key=
lambda x: x.name):
523 name +=
"_%s" % ao.name
524 aoOut.setAnnotation(
"Path", name)