ATLAS Offline Software
Loading...
Searching...
No Matches
CalibrationDataEigenVariations.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
3*/
4
6// CalibrationDataEigenVariations.cxx, (c) ATLAS Detector software
8
12
13#include <iomanip>
14#include <iostream>
15#include <limits>
16#include <set>
17#include <algorithm>
18#include <string>
19#include <cmath>
20#include <vector>
21
22#include "TH1.h"
23#include "TH2.h"
24#include "TVectorT.h"
25#include "TDecompSVD.h"
26#include "TMatrixDSymEigen.h"
27#include "TMatrixT.h"
28#include "TROOT.h"
29#include "TFile.h"
30
31#include <memory.h>
32
36using std::string;
37using std::map;
38using std::vector;
39using std::set;
40using std::pair;
41using std::setw;
42using std::setprecision;
43
44namespace {
45
46 // Utility functions...
47 void validate_reduction(int matrixsize, TMatrixDSym& corr, std::vector<std::pair<TH1*, TH1*>>& eigenvars, TH1* result, const std::string& scheme, const std::string& tagger, const std::string& wp, const std::string& jetauthor){
48 // Validating the reduction schemes - you can compare the correlation matrix constructed before and after eigenvector decomposition
49 // using whatever eigenvectors are available at either stage. The less difference, the better.
50
51 // This function simply constructs the relevant matrix relative percentage difference comparison, and saves to file of your choosing.
52 // Saving to EOS is recommended - this can then be accessed, downloaded, and plotted with whatever plotting script you prefer.
53
54 // Each systematic strategy scheme has a different way to construct the approximate covariance matrix
55 TMatrixDSym approx_cov(matrixsize); // matrix with zeros
56
57 if (scheme == "SFEigen"){
58 for (const std::pair<TH1*,TH1*>& var : eigenvars){
59 // we want to make the tensor product of (purely) the variation with itself, and store it in a TMatrix
60 // ---> get the pure variations by subtracting the result from the up_variation combined eigenvariation histogram
61 TH1* pure_var = static_cast<TH1*>(var.first->Clone()); pure_var->SetDirectory(0);
62 pure_var->Add(result, -1.0); // do the subtraction, get variation value histogram
63 // ---> convert the pure variation into a vector of doubles
64 std::vector<double> pure_var_vec;
65 for (int binz = 1 ; binz <= pure_var->GetNbinsZ() ; ++binz){
66 for (int biny = 1 ; biny <= pure_var->GetNbinsY() ; ++biny){
67 for (int binx = 1 ; binx <= pure_var->GetNbinsX() ; ++binx){
68 pure_var_vec.push_back(pure_var->GetBinContent(pure_var->GetBin(binx,biny,binz)));
69 }
70 }
71 }
72 // then compute the covariance matrix, and sum into the total (approximate) covariance matrix
73 for(int i = 0 ; i < corr.GetNrows() ; i++){
74 for(int j = 0 ; j < corr.GetNcols() ; j++){
75 approx_cov(i,j) += (pure_var_vec[i])*(pure_var_vec[j]); // do the tensor product
76 }
77 }
78 } // end of (reduced set of) eigenvariations loop
79
80 } else if (scheme == "SFGlobalEigen"){
81 TH1* comb_result = result; // renaming to match the intended usage...
82
83 for (const std::pair<TH1*,TH1*>& var : eigenvars){
84 // we want to make the tensor product of (purely) the variation with itself, and store it in a TMatrix
85 // get the pure variations by subtracting the comb_result from the up_variation combined eigenvariation histogram
86 TH1* pure_var = static_cast<TH1*>(var.first->Clone()); pure_var->SetDirectory(0);
87 pure_var->Add(comb_result, -1.0); // do the subtraction, get variation value histogram
88 for(int i = 0 ; i < approx_cov.GetNrows() ; i++){
89 for(int j = 0 ; j < approx_cov.GetNcols() ; j++){
90 approx_cov(i,j) += (pure_var->GetBinContent(i))*(pure_var->GetBinContent(j)); // do the tensor product
91 }
92 }
93 } // end of (reduced set of) eigenvariations loop
94
95 } else {
96 std::cout << " Error: The scheme " << scheme << " didn't match SFEigen or SFGlobalEigen." << std::endl;
97 return;
98 }
99
100 // make the total correlation matrix from the (approximate) total covariance matrix
101 TMatrixDSym approx_corr(approx_cov);
102 for(int row = 0 ; row < approx_cov.GetNrows() ; row++){
103 for(int col = 0 ; col < approx_cov.GetNcols() ; col++){
104 double rowvar = sqrt(approx_cov(row,row));
105 double colvar = sqrt(approx_cov(col,col));
106 approx_corr(row,col) = approx_cov(row,col)/(rowvar * colvar); // divide each element by the variance
107 }
108 }
109 // subtract the approximate correlation matrix from the true correlation matrix, and save to file
110 TMatrixDSym numerator = (approx_corr - corr);
111 TMatrixDSym denominator = corr;
112 TMatrixDSym correlation_comparison(matrixsize);
113 for(int row = 0 ; row < approx_corr.GetNrows() ; row++){
114 for(int col = 0 ; col < approx_corr.GetNcols() ; col++){
115 if (denominator(row,col)==0.0){
116 correlation_comparison(row,col) = 0.0;
117 } else {
118 correlation_comparison(row,col) = 100*std::abs(numerator(row,col)/denominator(row,col)); // store relative percent errors
119 }
120 }
121 }
122
123 string outputname = "Validate_" + scheme + "_" + tagger + "_" + wp + "_" + jetauthor + "_" + std::to_string(eigenvars.size()) + "vars" ;
124 string eospath = "/your/path/here/plotting/"; // <------ Place you want to store plots. Obviously, change this
125 string outputfilename = eospath + outputname + ".root";
126 std::cout << "ATTEMPTING TO WRITE " << outputfilename << " TO FILE..." << std::endl;
127 TFile fout(outputfilename.c_str(), "recreate");
128 fout.cd();
129 std::unique_ptr<TH2D> hout(new TH2D(correlation_comparison));
130
131 // Note: you can also look at the other matrices involved - save them to file and plot them if you wish
132 hout->SetTitle("Correlation Matrix Comparison");
133 hout->Write();
134 fout.Close();
135
136 }
137
138
139
140
141 // The covariance matrices constructed by the following two functions adhere to the TH1 binning conventions.
142 // But in order to avoid unnecessary overhead, the actual dimensionality of the histograms is accounted for.
143
144 // Construct the (diagonal) covariance matrix for the statistical uncertainties on the "ref" results
145 // Note that when statistical uncertainties are stored as variations, they are already accounted for and hence should not be duplicated; hence they are dummy here
146 // (this is not very nice, in that the result of this function will technically be wrong,
147 // but the statistical uncertainty covariance matrix is anyway not separately visible to the outside world)
148 TMatrixDSym getStatCovarianceMatrix(const TH1* hist, bool zero) {
149 Int_t nbinx = hist->GetNbinsX()+2, nbiny = hist->GetNbinsY()+2, nbinz = hist->GetNbinsZ()+2;
150 Int_t rows = nbinx;
151 if (hist->GetDimension() > 1) rows *= nbiny;
152 if (hist->GetDimension() > 2) rows *= nbinz;
153
154 // // the "2" below doesn't actually imply that two bins are used...
155 // // this is just to make the loops below work
156 // if (ref->GetDimension() <= 1) nbiny = 2;
157 // if (ref->GetDimension() <= 2) nbinz = 2;
158
159 TMatrixDSym stat(rows);
160 for (Int_t binx = 1; binx < nbinx-1; ++binx)
161 for (Int_t biny = 1; biny < nbiny-1; ++biny)
162 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
163 Int_t bin = hist->GetBin(binx, biny, binz);
164 double err = zero ? 0 : hist->GetBinError(bin);
165 stat(bin, bin) = err*err;
166 }
167 return stat;
168 }
169
170 // Construct the covariance matrix assuming that histogram "unc" contains systematic uncertainties
171 // pertaining to the "ref" results, and that the uncertainties are fully correlated from bin to bin
172 // (unless option "doCorrelated" is false, in which bins are assumed uncorrelated).
173 // One exception is made for the uncorrelated case: if we are dealing with a "continuous" calibration
174 // object, then a full correlation is still assumed between different tag weight bins.
175 TMatrixDSym getSystCovarianceMatrix(const TH1* ref, const TH1* unc, bool doCorrelated, int tagWeightAxis) {
176 Int_t nbinx = ref->GetNbinsX()+2, nbiny = ref->GetNbinsY()+2, nbinz = ref->GetNbinsZ()+2;
177 Int_t rows = nbinx;
178 if (ref->GetDimension() > 1) rows *= nbiny;
179 if (ref->GetDimension() > 2) rows *= nbinz;
180
181 TMatrixDSym cov(rows);
182
183 // Carry out a minimal consistency check
184 if (unc->GetNbinsX()+2 != nbinx || unc->GetNbinsY()+2 != nbiny || unc->GetNbinsZ()+2 != nbinz || unc->GetDimension() != ref->GetDimension()) {
185 std::cout << "getSystCovarianceMatrix: inconsistency found in histograms " << ref->GetName() << " and " << unc->GetName() << std::endl;
186 return cov;
187 }
188
189 // // the "2" below doesn't actually imply that two bins are used...
190 // // this is just to make the loops below work
191 // if (ref->GetDimension() <= 1) nbiny = 2;
192 // if (ref->GetDimension() <= 2) nbinz = 2;
193
194 // Special case: uncertainties not correlated from bin to bin.
195 // The exception here is for tag weight bins, which are always assumed to be fully correlated.
196 if (! doCorrelated) {
197 if (tagWeightAxis < 0) {
198 // truly uncorrelated uncertainties
199 for (Int_t binx = 1; binx < nbinx-1; ++binx)
200 for (Int_t biny = 1; biny < nbiny-1; ++biny)
201 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
202 Int_t bin = ref->GetBin(binx, biny, binz);
203 double err = unc->GetBinContent(bin);
204 cov(bin,bin) = err*err;
205 }
206 return cov;
207 } else if (tagWeightAxis == 0) {
208 // continuous histogram with tag weight X axis
209 for (Int_t biny = 1; biny < nbiny-1; ++biny)
210 for (Int_t binz = 1; binz < nbinz-1; ++binz)
211 for (Int_t binx = 1; binx < nbinx-1; ++binx) {
212 Int_t bin = ref->GetBin(binx, biny, binz);
213 double err = unc->GetBinContent(bin);
214 for (Int_t binx2 = 1; binx2 < nbinx-1; ++binx2) {
215 Int_t bin2 = ref->GetBin(binx2, biny, binz);
216 double err2 = unc->GetBinContent(bin2);
217 cov(bin,bin2) = err*err2;
218 }
219 }
220 return cov;
221 } else if (tagWeightAxis == 1) {
222 // continuous histogram with tag weight Y axis
223 for (Int_t binx = 1; binx < nbinx-1; ++binx)
224 for (Int_t binz = 1; binz < nbinz-1; ++binz)
225 for (Int_t biny = 1; biny < nbiny-1; ++biny) {
226 Int_t bin = ref->GetBin(binx, biny, binz);
227 double err = unc->GetBinContent(bin);
228 for (Int_t biny2 = 1; biny2 < nbiny-1; ++biny2) {
229 Int_t bin2 = ref->GetBin(binx, biny2, binz);
230 double err2 = unc->GetBinContent(bin2);
231 cov(bin,bin2) = err*err2;
232 }
233 }
234 return cov;
235 } else if (tagWeightAxis == 2) {
236 // continuous histogram with tag weight Z axis
237 for (Int_t binx = 1; binx < nbinx-1; ++binx)
238 for (Int_t biny = 1; biny < nbiny-1; ++biny)
239 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
240 Int_t bin = ref->GetBin(binx, biny, binz);
241 double err = unc->GetBinContent(bin);
242 for (Int_t binz2 = 1; binz2 < nbinz-1; ++binz2) {
243 Int_t bin2 = ref->GetBin(binx, biny, binz2);
244 double err2 = unc->GetBinContent(bin2);
245 cov(bin,bin2) = err*err2;
246 }
247 }
248 return cov;
249 }
250 }
251
252 for (Int_t binx = 1; binx < nbinx-1; ++binx)
253 for (Int_t biny = 1; biny < nbiny-1; ++biny)
254 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
255 Int_t bin = ref->GetBin(binx, biny, binz);
256 double err = unc->GetBinContent(bin); // <------------- For every bin in the "ref" ("result") TH1*, GetBinContents of the corresponding uncertainty bin
257 for (Int_t binx2 = 1; binx2 < nbinx-1; ++binx2)
258 for (Int_t biny2 = 1; biny2 < nbiny-1; ++biny2)
259 for (Int_t binz2 = 1; binz2 < nbinz-1; ++binz2) {
260 Int_t bin2 = ref->GetBin(binx2, biny2, binz2);
261 double err2 = unc->GetBinContent(bin2); // <------- Grab the unc contents of every bin, and compute the covariance matrix element
262 cov(bin, bin2) = err*err2; // <------- err1 and err2 are the uncertainty content of the bins, so "cov" is real, symmetric
263 } // <------- "cov" would imply that the "hunc" histogram stores "x - E[x]" differences from the mean. So in the end, it computes the covariance (as a sum of these)
264 }
265 return cov;
266 }
267
268
269 // <---------------- Custom covariance matrix method - W.L.
270 TMatrixDSym getSystCovarianceMatrix(const vector<double>& unc){
271 int rows = unc.size();
272 TMatrixDSym cov(rows);
273
274 for(int i = 0 ; i < rows ; i++){
275 for(int j = 0 ; j < rows ; j++){
276 double err1 = unc[i];
277 double err2 = unc[j];
278 cov(i,j) = err1*err2;
279 }
280 }
281 return cov;
282 }
283
284 // <---------------- Custom covariance matrix method - W.L.
285 TMatrixDSym getStatCovarianceMatrix(const std::vector<double>& unc, bool zero) {
286 // This is only a stop-gap solution, need to better consider how to do this for 1D,2D,3D binning...
287 int rows = unc.size();
288 TMatrixDSym stat(rows);
289 for (int i = 0; i < rows; i++) {
290 double err = zero ? 0 : unc[i];
291 stat(i,i) = err*err;
292 }
293 return stat;
294 }
295
296
297}
298
300// //
301// CalibrationDataEigenVariations //
302// //
303// This class is intended to provide a more proper way to deal with correlations between //
304// calibration bins than would be possible using directly the calibration containers' //
305// methods (the general issue is with the fact that the interface methods are intended to //
306// be use on a jet by jet basis; in this context it is not straightforward to account for //
307// correlations). //
308// // <-------- Many CDIHistogramContainers in one CDI File
309// A CalibrationDataEigenVariations object is associated with a specific // <-------- one CDEigenVariations object per CDHistogramContainer
310// CalibrationDataHistogramContainer. It starts by constructing the covariance matrix from //
311// the information provided by the container. Subsequently it diagonalises this covariance // <-------- Each CDHistogramContainer stores hists of central values + systematic variations
312// matrix (this is a standard eigenvalue problem, hence the name of the class), and stores //
313// the result as 'variation' histograms (representing the eigenvectors multiplied by the //
314// square root of the corresponding eigenvalues). //
315// Since it is possible for systematic uncertainty contributions to be correlated with //
316// corresponding uncertainties in physics analyses, it is possible to exclude such sources //
317// of uncertainty from being used in the construction of the covariance matrix (this is //
318// since effects from the original sources of uncertainty cannot be traced anymore after //
319// application of the eigenvalue decomposition). It is still possible to evaluate correctly //
320// these uncertainties in the form of so-called 'named variations' (see class //
321// CalibrationDataInterfaceROOT); however this will always treat uncertainties as being //
322// fully correlated (or anti-correlated) between calibration bins, so it is recommended not //
323// to exclude uncertainties that are not correlated between bins from the eigenvector //
324// decomposition. //
326
327#ifndef __CINT__
329#endif
330//________________________________________________________________________________
331CalibrationDataEigenVariations::CalibrationDataEigenVariations(const std::string& cdipath, const std::string& tagger, const std::string& wp, const std::string& jetcollection, CalibrationDataHistogramContainer* cnt, bool excludeRecommendedUncertaintySet, bool base) :
332m_cnt(cnt), m_initialized(false), m_validate(false), m_namedExtrapolation(-1), m_statVariations(false), m_cdipath(cdipath), m_taggername(tagger), m_wp(wp), m_jetauthor(jetcollection), m_totalvariance(0), m_capturedvariance(0), m_verbose(false)
333{
334
335 if (m_verbose) std::cout << " CDEV Constructor : info " << cdipath << " " << tagger << " " << wp << " " << jetcollection << std::endl;
336 // if specified, add items recommended for exclusion from EV decomposition by the calibration group to the 'named uncertainties' list
337 if (excludeRecommendedUncertaintySet && base) {
338 std::vector<std::string> to_exclude = split(m_cnt->getExcludedUncertainties());
339 for (const auto& name : to_exclude) {
340 excludeNamedUncertainty(name, const_cast<CalibrationDataHistogramContainer*>(cnt));
341 }
342 if (to_exclude.size() == 0) {
343 std::cerr << "CalibrationDataEigenVariations warning: exclusion of pre-set uncertainties list requested but no (or empty) list found" <<std::endl;
344 }
345 }
346 // also flag if statistical uncertainties stored as variations (this typically happens as a result of smoothing / pruning of SF results)
347 vector<string> uncs = m_cnt->listUncertainties(); // <-------- The "listUncertainties" method retrieves the list of "information" in the CDIHistogramContainer, basically the "list" output from "showCalibration"
348 for (const auto& name : uncs) {
349 if (name.find("stat_np") != string::npos) m_statVariations = true;
350 }
351
352}
353
354//________________________________________________________________________________
356{
357 // delete all variation histograms owned by us
358 for (vector<pair<TH1*, TH1*> >::iterator it = m_eigen.begin(); it != m_eigen.end(); ++it) {
359 delete it->first;
360 delete it->second;
361 }
362
363 for (vector<pair<TH1*, TH1*> >::iterator it = m_named.begin(); it != m_named.end(); ++it) {
364 delete it->first;
365 delete it->second;
366 }
367}
368
369//________________________________________________________________________________
370void
372{
373 // Exclude the source of uncertainty identified by the given name from being used
374 // in the construction of the covariance matrix to be diagonalised.
375 // Notes:
376 // - Some names returned by CalibrationDataContainer::listUncertainties() are not
377 // meaningful in this context, and specifying them is not allowed.
378 // - Once the eigenvector diagonalisation has been carried out, this method may
379 // not be used anymore.
380
381 if (m_initialized) {
382 std::cerr << "CalibrationDataEigenVariations::excludeNamedUncertainty error:" << " initialization already done" << std::endl;
383 } else if (name == "comment" || name == "result" || name == "systematics" ||
384 name == "statistics" || name == "combined" || name == "extrapolation" ||
385 name == "MCreference" || name == "MChadronisation" || name == "ReducedSets" ||
386 name == "excluded_set" || name == "" || name == " ") // <--- these last two handle some cases that may turn up
387 {
388 std::cerr << "CalibrationDataEigenVariations::excludeNamedUncertainty error:" << " name " << name << " not allowed" << std::endl;
389 }
390 // in case multiple uncertainties should be discarded
391 else if (name.back() == '*'){
392 std::string temp_name = name.substr(0, name.size()-1); //remove "*"
393 std::vector<std::string> uncs = m_cnt->listUncertainties();
394 std::vector<std::string> unc_subgroup;
395 std::copy_if(uncs.begin(), uncs.end(), back_inserter(unc_subgroup),
396 [&temp_name](const std::string& el) {
397 return el.compare(0, temp_name.size(), temp_name) == 0;
398 });
399 if (m_verbose) std::cout <<"Found a group of uncertainties to exclude: " <<name <<" found " <<unc_subgroup.size() <<" uncertainties corresponding to the query" <<std::endl;
400 for (const auto& single_name : unc_subgroup){
401 // only really add if the entry is not yet in the list
402 if (m_namedIndices.find(single_name) == m_namedIndices.end()) {
403 if (m_verbose) std::cout << "Name : " << single_name << std::endl;
404 m_named.push_back(std::pair<TH1*, TH1*>(0, 0));
405 m_namedIndices[single_name] = m_named.size()-1;
406 }
407 }
408 }
409 else if (! cnt->GetValue(name.c_str())){
410 std::cerr << "CalibrationDataEigenVariations::excludeNamedUncertainty error:" << " uncertainty named " << name << " not found" << std::endl;
411 }
412}
413
414//________________________________________________________________________________
415TMatrixDSym
417{
418 // Construct the covariance matrix that is to be diagonalised.
419 // Note that extrapolation uncertainties (identified by the keyword "extrapolation,
420 // this will pertain mostly to the extrapolation to high jet pt) are always excluded
421 // since by definition they will not apply to the normal calibration bins. Instead
422 // this uncertainty has to be dealt with as a named variation. In addition there are
423 // other items ("combined", "systematics") that will not be dealt with correctly
424 // either and hence are excluded as well).
425 //
426 // Note that if an explicit covariance matrix is supplied (at present this may be
427 // the case only for statistical uncertainties: in the case of "continuous tagging",
428 // multinomial statistics applies so bin-to-bin correlations exist) this will be
429 // used instead of constructing the statistical uncertainties' covariance matrix on
430 // the fly.
431
432 // retrieve the central calibration
433 TH1* result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
434 if (not result){
435 std::cerr<<"CalibrationDataEigenVariations::getEigenCovarianceMatrix(): failed dynamic cast to TH1*\n";
436 return TMatrixDSym();
437 }
438
439 // loop over the uncertainties to construct the covariance matrix for all uncertainties
440 // to be addressed using the eigenvector method.
441
442 // First, treat the statistics separately.
443 // Account for the possibility that this is handled as a (non-trivial) covariance matrix
444 TMatrixDSym* sCov = dynamic_cast<TMatrixDSym*>(m_cnt->GetValue("statistics"));
445
446 // Alternatively, statistical uncertainties may be accounted for as variations (i.e., much like systematic uncertainties).
447 // In that case, we create a null matrix here and add the statistical contributions along with the systematic ones.
448 TMatrixDSym cov = (sCov) ? *sCov : getStatCovarianceMatrix(result, m_statVariations);
449
450 // Then loop through the list of (other) uncertainties
451 std::vector<string> uncs = m_cnt->listUncertainties();
452 for (unsigned int t = 0; t < uncs.size(); ++t) {
453 // entries that should never be included
454 if (uncs[t] == "comment" || uncs[t] == "result" || uncs[t] == "combined" ||
455 uncs[t] == "statistics" || uncs[t] == "systematics" || uncs[t] == "MCreference" ||
456 uncs[t] == "MChadronisation" || uncs[t] == "extrapolation" || uncs[t] == "ReducedSets" ||
457 uncs[t] == "excluded_set") continue;
458 // entries that can be excluded if desired
459 if (m_namedIndices.find(uncs[t]) != m_namedIndices.end()) continue;
460 TH1* hunc = dynamic_cast<TH1*>(m_cnt->GetValue(uncs[t].c_str()));
461 if (not hunc){
462 std::cerr<< "CalibrationDataEigenVariations::getEigenCovarianceMatrix(): dynamic cast failed\n";
463 continue;
464 }
465 cov += getSystCovarianceMatrix(result, hunc, m_cnt->isBinCorrelated(uncs[t]), m_cnt->getTagWeightAxis()); // <-------- This is where we add the covariance matrices together to form the "total covariance"
466 }
467
468 return cov;
469}
470
471//________________________________________________________________________________
472TMatrixDSym
474{
475 // Construct the (Eigen-variation part of the) covariance matrix from the individual variations.
476 // This must be called _after_ initialize()!
477
478 // retrieve the central calibration
479 TH1 *result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
480 if (!result){
481 std::cerr<<"CalibrationDataEigenVariations::getEigenCovarianceMatrixFromVariations(): dynamic cast failed\n";
482 return TMatrixDSym();
483 }
484 TMatrixD jac = getJacobianReductionMatrix();
485 int nbins = jac.GetNcols();
486 TMatrixDSym cov(nbins);
487 auto variation = std::make_unique<double[]>(nbins);
488
489 for (const std::pair<TH1*, TH1*>& it : m_eigen){ // m_eigen is vector of pairs of TH1* which point to TH1's representing the upVariation and downVariation respectively
490 TH1* resultVariedUp = it.first; // <--------------- This is the "result" but "varied" upwards - i.e. how the result would look like if the systematic "it" was imposed
491 for (unsigned int u = 0; u < (unsigned int) nbins; ++u) variation[u] = resultVariedUp->GetBinContent(u) - result->GetBinContent(u);
492 for (int u = 0; u < nbins; ++u){
493 for (int v = 0; v < nbins; ++v){
494 cov(u, v) += variation[u]*variation[v];
495 }
496 }
497 }
498
499 return cov;
500}
501
502//________________________________________________________________________________
503TMatrixD
505{
506 // Construct the matrix that removes the rows and columns that fail to influence
507 // the eigen-variations. To reduce the covariance matrix, do the following:
508 //
509 // TMatrixDSym cov = getEigenCovarianceMatrix();
510 // TMatrixDSym jac = getJacobianReductionMatrix(); // <------------ This is an upper triangular matrix of 1's. Similarity transformation will do...
511 // TMatrixDSym redSystematicCovMatrix = cov.Similarity(jac);
512
513 // retrieve the central calibration
514 TH1* result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
515 if (not result){
516 std::cerr<<"CalibrationDataEigenVariations::getJacobianReductionMatrix(): dynamic cast failed\n";
517 return TMatrixD();
518 }
519
520 // loop over the uncertainties to construct the covariance matrix for all uncertainties
521 // to be addressed using the eigenvector method.
522
523 // Retrieve the un-compressed Eigenvector variation covariance matrix
524 // (only needed to check for unexpected singularities)
525 TMatrixDSym cov = getEigenCovarianceMatrix();
526
527 // Compute the original number of bins
528 int nbins = result->GetNbinsX()+2;
529 int ndim = result->GetDimension();
530 if (ndim > 1) nbins*= (result->GetNbinsY()+2);
531 if (ndim > 2) nbins*= (result->GetNbinsZ()+2);
532
533 // Start by "compressing" the covariance matrix (removing columns/rows containing zeros only)
534 int nZeros=0;
535 std::vector<int> zeroComponents;
536 if (cov.GetNrows() != nbins) {
537 std::cerr << " error: covariance matrix size (" << cov.GetNrows() << ") doesn't match histogram size (" << nbins << ")" << std::endl;
538 return TMatrixDSym();
539 }
540
541 // First flag all the zeros
542 for (int i = 0; i < nbins; ++i) {
543 // Directly identify the under- and overflow bins
544 Int_t binx, biny, binz;
545 result->GetBinXYZ(i, binx, biny, binz);
546 if ((binx == 0 || binx == result->GetNbinsX()+1) ||
547 (ndim > 1 && (biny == 0 || biny == result->GetNbinsY()+1)) ||
548 (ndim > 2 && (binz == 0 || binz == result->GetNbinsZ()+1)))
549 {
550 ++nZeros;
551 zeroComponents.push_back(i);
552 }
553 // Try a first (quick) identification of rows/columns of zeros by the first element in each row
554 // If "successful", check the whole row in more detail
555 else if (fabs(cov(i,0)) < 1e-10) {
556 bool isThereANonZero(false);
557 for (int j = 0; j < nbins; ++j) {
558 if (fabs(cov(i,j)) > 1e-10) {
559 isThereANonZero=true; break;
560 }
561 }
562 if (! isThereANonZero) {
563 ++nZeros;
564 zeroComponents.push_back(i);
565 }
566 }
567 }
568
569 // **** COMMENTED OUT FOR NOW.
570 // Leave it here in case the calibration method will change again in the future.
571 // No need to reweight the SF by the efficiency of that bin (MCreference always = 0)
572
573 // Determine whether the container is for "continuous" calibration.
574 // This is important since the number of independent scale factors (for each pt or eta bin)
575 // is reduced by 1 compared to the number of tag weight bins (related to the fact that the fractions
576 // of events in tag weight bins have to sum up to unity).
577 // int axis = m_cnt->getTagWeightAxis();
578 // bool doContinuous = false; unsigned int weightAxis = 0;
579
580 // if (axis >= 0) {
581 // doContinuous = true;
582 // // weightAxis = (unsigned int) axis;
583 // // In this case, verify that the special "uncertainty" entry that is in fact the reference MC tag
584 // // weight fractions is present. These tag weight fractions are needed in order to carry out the
585 // // diagonalisation successfully.
586 // if (! dynamic_cast<TH1*>(m_cnt->GetValue("MCreference"))) {
587 // std::cerr << " Problem: continuous calibration object found without MC reference tag weight histogram " << std::endl;
588 // return TMatrixDSym();
589 // }
590 // }
591
592 // Only relevant for continuous calibration containers, but in order to void re-computation we
593 // retrieve them here
594 // Int_t nbinx = result->GetNbinsX()+2, nbiny = result->GetNbinsY()+2, nbinz = result->GetNbinsZ()+2;
595
596 // // If we are indeed dealing with a "continuous" calibration container, ignore one tag weight row
597 // const int skipTagWeightBin = 1; // NB this follows the histogram's bin numbering
598 // if (doContinuous) {
599 // for (Int_t binx = 1; binx < nbinx-1; ++binx)
600 // for (Int_t biny = 1; biny < nbiny-1; ++biny)
601 // for (Int_t binz = 1; binz < nbinz-1; ++binz) {
602 // if ((weightAxis == 0 && binx == skipTagWeightBin) ||
603 // (weightAxis == 1 && biny == skipTagWeightBin) ||
604 // (weightAxis == 2 && binz == skipTagWeightBin)) {
605 // // At this point we simply add these to the 'null' elements
606 // ++nZeros;
607 // zeroComponents.push_back(result->GetBin(binx, biny, binz));
608 // }
609 // }
610 // }
611
612 if (nZeros >= nbins) {
613 std::cerr << " Problem. Found n. " << nZeros << " while size of matrix is " << nbins << std::endl;
614 return TMatrixDSym();
615 }
616
617 int size = nbins - nZeros; // <--- Tis the size of the matrix removing zeros from the covariance matrix
618
619 TMatrixT<double> matrixVariationJacobian(size,nbins);
620 int nMissed=0;
621 for (int i = 0; i < nbins; ++i) { //full size
622 bool missed=false;
623 for (unsigned int s = 0; s < zeroComponents.size(); ++s) { // <-------- Basically what this does is it flags "missed" for a given "i" of the full bin size
624 if (zeroComponents.at(s) == i) { // <-------- if "i" is in "zeroComponents". Breaks (because it found that it's to be missed)
625 missed = true;
626 break;
627 }
628 }
629 if (missed) { // <-------- Finally, if "i" is to be missed, increase "nMissed" by one, and....
630 ++nMissed;
631 continue;
632 }
633 matrixVariationJacobian(i-nMissed,i)=1; // <-------- ... this ALWAYS adds a one. If zero "nMissed", add to diagonal. otherwise off-diagonal
634 } // <-------- This matrix only add 1's on/off diagonal, upper triangular matrix
635
636 return matrixVariationJacobian;
637}
638
639//________________________________________________________________________________
640void
642{
643 // This is this class's most important method, in the sense that it does all the
644 // math and constructs all "variation" histograms (for both eigenvector and named
645 // named variations). This constitutes the full initialisation of the class.
646 // This method is meant to be called only after all calls (if any) to the
647 // CalibrationDataEigenVariations::excludeNamedUncertainty() method.
648
649 // retrieve the central calibration
650 TH1* result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
651 if (not result){
652 std::cerr<<"CalibrationDataEigenVariations::initialize: dynamic cast failed\n";
653 return;
654 }
655 // First step: construct the original covariance matrix
656 TMatrixDSym cov = getEigenCovarianceMatrix();
657 TMatrixDSym corr(result->GetNbinsX()*result->GetNbinsY()*result->GetNbinsZ()); // We want to construct the correlation matrix in order to compare the final eigenvariations correlation matrix to it
658
659 int rowc = 0;
660 for (int row = 0 ; row < cov.GetNrows() ; row++){
661 int colc = 0;
662 bool colb = false;
663 for (int col = 0 ; col < cov.GetNcols() ; col++){
664 if(!cov(row,col) || !cov(row,row) || !cov(col,col) || cov(row,row)<1.0e-6 || cov(col,col)<1.0e-6 ) continue; // really don't want to divide by zero...
665 colb=true;
666 long double rowvar = sqrt(cov(row,row));
667 long double colvar = sqrt(cov(col,col));
668 corr(rowc,colc) = cov(row,col)/(rowvar * colvar); // divide each element by the variance
669 colc++;
670 }
671 if (colb){
672 rowc++;
673 }
674 }
675
676 // Second step: create the variations for the named sources of uncertainty (easy...)
677 for (map<string, unsigned int>::iterator it = m_namedIndices.begin(); it != m_namedIndices.end(); ++it) {
678 pair<TH1*, TH1*>& p = m_named[it->second];
679 TH1* hunc = (TH1*) m_cnt->GetValue(it->first.c_str());
680 TString namedvar("namedVar");
681 namedvar += it->first.c_str();
682 TString namedvarUp(namedvar); namedvarUp += "_up";
683 TString namedvarDown(namedvar); namedvarDown += "_down";
684 TH1* resultVariedUp = (TH1*)result->Clone(namedvarUp); resultVariedUp->Add(hunc, 1.0); resultVariedUp->SetDirectory(0);
685 TH1* resultVariedDown = (TH1*)result->Clone(namedvarDown); resultVariedDown->Add(hunc, -1.0); resultVariedDown->SetDirectory(0);
686 p.first = resultVariedUp;
687 p.second = resultVariedDown;
688 }
689 // Refinement: add the "extrapolation" uncertainty as a named uncertainty, if the histogram is provided
690 // This is a bit special, since the extrapolation uncertainty histogram has a different size than other histograms
691 if (TH1* hunc = (TH1*) m_cnt->GetValue("extrapolation")) { // this is just saying "if it exists"...
692 TH1* resultVariedUp = (TH1*) hunc->Clone("extrapolation_up"); resultVariedUp->SetDirectory(0);
693 TH1* resultVariedDown = (TH1*) hunc->Clone("extrapolation_down"); resultVariedDown->SetDirectory(0);
694 Int_t nbinx = hunc->GetNbinsX()+2, nbiny = hunc->GetNbinsY()+2, nbinz = hunc->GetNbinsZ()+2;
695 Int_t firstbinx = hunc->GetXaxis()->FindFixBin(result->GetXaxis()->GetBinCenter(1));
696 Int_t firstbiny = result->GetDimension() > 1 ? hunc->GetYaxis()->FindFixBin(result->GetYaxis()->GetBinCenter(1)) : 1;
697 Int_t firstbinz = result->GetDimension() > 2 ? hunc->GetZaxis()->FindFixBin(result->GetZaxis()->GetBinCenter(1)) : 1;
698 for (Int_t binx = 1; binx < nbinx-1; ++binx) {
699 Int_t binxResult = binx - firstbinx + 1;
700 if (binxResult < 1) binxResult = 1;
701 if (binxResult > result->GetNbinsX()) binxResult = result->GetNbinsX();
702 for (Int_t biny = 1; biny < nbiny-1; ++biny) {
703 Int_t binyResult = biny - firstbiny + 1;
704 if (binyResult > result->GetNbinsY()) binyResult = result->GetNbinsY();
705 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
706 Int_t binzResult = binz - firstbinz + 1;
707 if (binzResult > result->GetNbinsZ()) binzResult = result->GetNbinsZ();
708 Int_t bin = hunc->GetBin(binx, biny, binz);
709 double contResult = result->GetBinContent(binxResult, binyResult, binzResult);
710 resultVariedUp->SetBinContent(bin, contResult + hunc->GetBinContent(bin));
711 resultVariedDown->SetBinContent(bin, contResult - hunc->GetBinError(bin));
712 }
713 }
714 }
715 m_named.push_back(std::make_pair(resultVariedUp, resultVariedDown));
716 m_namedExtrapolation = m_namedIndices["extrapolation"] = m_named.size()-1;
717 }
718
719 // Third step: compute the eigenvector variations corresponding to the remaining sources of uncertainty
720 int nbins = result->GetNbinsX()+2;
721 int ndim = result->GetDimension();
722 if (ndim > 1) nbins*= (result->GetNbinsY()+2);
723 if (ndim > 2) nbins*= (result->GetNbinsZ()+2);
724
725
726 // // Determine whether the container is for "continuous" calibration.
727 // // This is important since the number of independent scale factors (for each pt or eta bin)
728 // // is reduced by 1 compared to the number of tag weight bins (related to the fact that the fractions
729 // // of events in tag weight bins have to sum up to unity).
730 // int axis = m_cnt->getTagWeightAxis();
731 // bool doContinuous = false; unsigned int weightAxis = 0;
732
733 //if (axis >= 0) {
734 // doContinuous = true;
735 // weightAxis = (unsigned int) axis;
736 // // In this case, verify that the special "uncertainty" entry that is in fact the reference MC tag
737 // // weight fractions is present. These tag weight fractions are needed in order to carry out the
738 // // diagonalisation successfully.
739 // NOTE: MCreference is not used at the moment (always 0 in the CDI). Comment it out.
740 // if (! dynamic_cast<TH1*>(m_cnt->GetValue("MCreference"))) {
741 // std::cerr << " Problem: continuous calibration object found without MC reference tag weight histogram " << std::endl;
742 // return;
743 // }
744 //}
745
746 // Only relevant for continuous calibration containers, but in order to void re-computation we
747 // retrieve them here
748 // Int_t nbinx = result->GetNbinsX()+2, nbiny = result->GetNbinsY()+2, nbinz = result->GetNbinsZ()+2;
749
750 TMatrixT<double> matrixVariationJacobian = getJacobianReductionMatrix();
751 int size = matrixVariationJacobian.GetNrows();
752
753 // Reduce the matrix to one without the zeros, using a "similarity" transformation
754
755 const TMatrixDSym matrixCovariance = cov.Similarity(matrixVariationJacobian);
756
757 // Carry out the Eigenvector decomposition on this matrix
758 TMatrixDSymEigen eigenValueMaker (matrixCovariance);
759 TVectorT<double> eigenValues = eigenValueMaker.GetEigenValues();
760 TMatrixT<double> eigenVectors = eigenValueMaker.GetEigenVectors();
761 TMatrixT<double> matrixVariations (size,size);
762
763 //compute the total variance by summing the eigenvalues
764 m_totalvariance = eigenValues.Sum(); //
765
766 for (int i = 0; i < size; ++i) {
767 //now go back
768 for (int r = 0; r < size; ++r)
769 //first index is the variation number, second corresponds to the pT bin //<--------- The "eigenvariations" matrix is the "C" matrix which has CKC^T = I with "K" being the o.g. covariance matrix, and "I" is the identity
770 matrixVariations(i,r) = -1.0*eigenVectors[r][i]*sqrt(fabs(eigenValues[i])); // <--------- So the result is a matrix (eigenvariation) which is the eigenvector scaled by the sqrt(eigenvalue)
771 } // <------- matrixVariations: each row is one variation, each column is the pT bin.
772
773 TMatrixT<double> matrixVariationsWithZeros = matrixVariations * matrixVariationJacobian;
774
775 // Construct the initial set of variations from this
776 for (int i = 0; i < matrixVariationsWithZeros.GetNrows(); ++i) {
777 // TString superstring(result->GetName());
778 // superstring += "_eigenVar";
779 TString superstring("eigenVar");
780 superstring+=i;
781
782 TString nameUp(superstring); nameUp += "_up";
783 TString nameDown(superstring); nameDown += "_down";
784 // TString nameUnc(superstring); nameUnc+= "_unc";
785
786 TH1* resultVariedUp = (TH1*)result->Clone(nameUp); resultVariedUp->SetDirectory(0); // <------- clone "result" hist, call it "up", and SetDirectory(0) means histogram doesn't belong to any directory.
787 TH1* resultVariedDown = (TH1*)result->Clone(nameDown); resultVariedDown->SetDirectory(0);
788
789 for (int u = 0; u < nbins; ++u) {
790 resultVariedUp->SetBinContent(u,result->GetBinContent(u) + matrixVariationsWithZeros(i,u)); // <----- This actually constructs the up-variation histogram!
791 resultVariedDown->SetBinContent(u,result->GetBinContent(u) - matrixVariationsWithZeros(i,u)); // <--- This likewise constructs the down-var., bin-by-bin.
792 }
793
794 m_eigen.push_back(std::make_pair(resultVariedUp, resultVariedDown));
795
796 } //end eigenvector size
797
798 // Remove variations that are below the given tolerance (effectively meaning that they don't have any effect) // <--------- Here we prune systematics
799 IndexSet final_set; // <-------- What's IndexSet? It's a typedef of std::set<size_t>, where size_t = int
800 size_t current_set = 0;
801 for (size_t index = 0; index < m_eigen.size(); ++index) {
802 bool keep_variation = false;
803 TH1 *up = static_cast<TH1*>(m_eigen[index].first->Clone()); up->SetDirectory(0);
804
805 up->Add(result, -1.0);
806
807 for (int bin = 1; bin <= nbins; ++bin) {
808 if (fabs(up->GetBinContent(bin)) > min_variance) { // <----- If you find even ONE bin with big enough variance, we keep the whole systematic.
809 keep_variation = true;
810 break;
811 }
812 }
813 delete up;
814 if (!keep_variation){ // <------ At this stage, if we find no bins in the systematic with large enough variation, we insert it to "final_set" for removal/pruning
815 final_set.insert(current_set);
816 } else {
817 m_capturedvariance += eigenValues[index]; // let's add up the variance that the eigenvariation contributes to the total
818 }
819 ++current_set;
820 }
821 if (final_set.size() > 0)
822 if (m_verbose) std::cout << "CalibrationDataEigenVariations: Removing " << final_set.size() << " eigenvector variations leading to sub-tolerance effects, retaining " << m_eigen.size()-final_set.size() << " variations" << std::endl;
823
824 removeVariations(final_set); // <----------------- This method actually performs the reduction. The above logic simply flags which variations to GET RID OF, inserting them into "final_set"
825
826 if (m_verbose) std::cout << " The total variance is " << m_totalvariance << " and the reduction captured " << 100*(m_capturedvariance/m_totalvariance) << "% of this." << std::endl;
830 if (m_validate){ // <---- this flag can be set manually in a local checkout of this package (temporary fix)
831 validate_reduction(cov.GetNrows(), corr, m_eigen, result, "SFEigen", m_taggername, m_wp, m_jetauthor); // <------ This method is simply here to report the matrix comparison, for reduction scheme validation
832 }
833
834 m_initialized = true;
835}
836
837//________________________________________________________________________________
838void
840{
841 if (set.size() == 0) return;
842
843 std::vector<std::pair<TH1*, TH1*> > new_eigen;
844 for (size_t index = 0; index < m_eigen.size(); ++index){
845 if (set.count(index) == 0) new_eigen.push_back(m_eigen[index]);
846 else { delete m_eigen[index].first; delete m_eigen[index].second; }
847 }
848 m_eigen = std::move(new_eigen);
849}
850
851//________________________________________________________________________________
852void
854{
855 IndexSet simple_set;
856
857 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
858 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it)
859 simple_set.insert(*subset_it);
860 }
861
862 removeVariations(simple_set);
863}
864
865//________________________________________________________________________________
866void
868{
869 // Merge all systematic variation starting from the given index.
870 // The resulting merged variation replaces the first entry in the list
871 // (i.e., the entry specified by the index).
872 IndexSet simple_set;
873
874 for (size_t it = index; it < m_eigen.size(); ++it)
875 simple_set.insert(it);
876 mergeVariations(simple_set);
877}
878
879//________________________________________________________________________________
880void
882{
883 IndexSuperSet sset;
884 sset.insert(set);
885 mergeVariations(sset);
886}
887
888//________________________________________________________________________________
889void
891{
892 // check for overlap
893 IndexSet checker;
894 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
895 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it){
896 if (checker.count(*subset_it) == 0 && *subset_it <= m_eigen.size())
897 checker.insert(*subset_it);
898 else {
899 std::cerr << "Error in CalibrationDataEigenVariations::mergeVariations: \
900 IndexSets must not overlap and must lie between 1 and " << m_eigen.size() << ". Aborting!" << std::endl;
901 return;
902 }
903 }
904 }
905
906 // retrieve the central calibration
907 TH1 *result = static_cast<TH1*>(m_cnt->GetValue("result"));
908 IndexSet toDelete;
909 int nbins = result->GetNbinsX()+2;
910 int ndim = result->GetDimension();
911 if (ndim > 1) nbins *= (result->GetNbinsY()+2);
912 if (ndim > 2) nbins *= (result->GetNbinsZ()+2);
913
914 // TH1 *var_up_final = static_cast<TH1*>(result->Clone()),
915 // *var_down_final = static_cast<TH1*>(result->Clone());
916
917 // var_up_final->Reset();
918 // var_down_final->Reset();
919
920 // complex sum
921 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
922 if (set_it->empty()) continue;
923
924 double sum_H_up = 0.0, sum_H_down = 0.0;
925 size_t lowest_index = *set_it->lower_bound(0);
926 TH1 *total_var_up = static_cast<TH1*>(m_eigen[lowest_index].first->Clone()),
927 *total_var_down = static_cast<TH1*>(m_eigen[lowest_index].second->Clone());
928 total_var_up->SetDirectory(0);
929 total_var_down->SetDirectory(0);
930
931 total_var_up->Reset();
932 total_var_down->Reset();
933
934 // sum all other variations
935 for (IndexSet::iterator subset_it = set_it->begin();
936 subset_it != set_it->end(); ++subset_it) {
937 size_t actual_index = *subset_it;
938
939 if (actual_index != lowest_index) toDelete.insert(*subset_it);
940
941 TH1 *partial_var_up = static_cast<TH1*>(m_eigen[actual_index].first->Clone()),
942 *partial_var_down = static_cast<TH1*>(m_eigen[actual_index].second->Clone());
943 partial_var_up->SetDirectory(0);
944 partial_var_down->SetDirectory(0);
945
946 partial_var_up->Add(result, -1.0); // <----- Is this correct? Should it be +1?
947 partial_var_down->Add(result, -1.0);
948 for (int i = 0; i < nbins; ++i) {
949 partial_var_down->SetBinContent(i, -1.0*partial_var_down->GetBinContent(i));
950 }
951
952 for (int u = 0; u < nbins; ++u) {
953 double sum_up = total_var_up->GetBinContent(u),
954 sum_down = total_var_down->GetBinContent(u);
955 for (int v = 0; v < nbins; ++v) {
956 sum_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
957 sum_H_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
958 sum_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
959 sum_H_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
960 }
961 total_var_up->SetBinContent(u, sum_up);
962 total_var_down->SetBinContent(u, sum_down);
963 }
964 delete partial_var_up;
965 delete partial_var_down;
966 }
967
968 // final part of complex summing
969 for (int i = 0; i < nbins; ++i) {
970 if (sum_H_up != 0.0)
971 total_var_up->SetBinContent(i, total_var_up->GetBinContent(i)/sqrt(sum_H_up));
972 else
973 total_var_up->SetBinContent(i, 0.0);
974 if (sum_H_down != 0.0)
975 total_var_down->SetBinContent(i, -1.0*total_var_down->GetBinContent(i)/sqrt(sum_H_down));
976 else
977 total_var_down->SetBinContent(i, 0.0);
978 }
979
980 total_var_up->Add(result);
981 total_var_down->Add(result);
982
983 m_eigen[lowest_index].first = total_var_up;
984 m_eigen[lowest_index].second = total_var_down;
985 }
986
987 removeVariations(toDelete);
988}
989
990//________________________________________________________________________________
991unsigned int
992CalibrationDataEigenVariations::getNumberOfNamedVariations() const // <---- I'll have to either report all flavours or something for SFGLobalEigen
993{
994 // Returns the number of named variations
995
996 return m_namedIndices.size();
997}
998
999//________________________________________________________________________________
1000vector<string>
1002{
1003 // Provides the list of named variations
1004
1005 vector<string> names;
1006 for (map<string, unsigned int>::const_iterator it = m_namedIndices.begin(); it != m_namedIndices.end(); ++it){
1007 names.push_back(it->first);
1008 }
1009 return names;
1010}
1011
1012//________________________________________________________________________________
1013unsigned int
1019
1020//________________________________________________________________________________
1021bool
1023 TH1*& up, TH1*& down)
1024{
1025 // Return the pointers to the up- and downward variation histogram for the specified
1026 // eigenvector variation. In case of an invalid variation number, null pointers will
1027 // be returned and the return value will be false.
1028 //
1029 // variation: eigenvector variation number
1030 // up: (reference to) pointer to upward variation histogram
1031 // down: (reference to) pointer to downward variation histogram
1032
1033 if (! m_initialized) initialize();
1034
1035 if (variation < m_eigen.size()) {
1036 up = m_eigen[variation].first;
1037 down = m_eigen[variation].second;
1038 return true;
1039 }
1040
1041 up = down = 0;
1042 return false;
1043}
1044
1045//________________________________________________________________________________
1046bool
1048 TH1*& up, TH1*& down)
1049{
1050 // Return the pointers to the up- and downward variation histogram for the specified
1051 // named variation. In case of an invalid named variation, null pointers will
1052 // be returned and the return value will be false.
1053 //
1054 // name: named variation
1055 // up: (reference to) pointer to upward variation histogram
1056 // down: (reference to) pointer to downward variation histogram
1057
1059 if (it != m_namedIndices.end()) return getNamedVariation(it->second, up, down);
1060
1061 up = down = 0;
1062 return false;
1063}
1064
1065//________________________________________________________________________________
1066bool
1068 TH1*& up, TH1*& down)
1069{
1070 // Return the pointers to the up- and downward variation histogram for the specified
1071 // named variation. In case of an invalid named variation number, null pointers will
1072 // be returned and the return value will be false.
1073 //
1074 // nameIndex: named variation number
1075 // up: (reference to) pointer to upward variation histogram
1076 // down: (reference to) pointer to downward variation histogram
1077
1078 if (! m_initialized) initialize();
1079
1080 if (nameIndex < m_named.size()) {
1081 up = m_named[nameIndex].first;
1082 down = m_named[nameIndex].second;
1083 return true;
1084 }
1085
1086 up = down = 0;
1087 return false;
1088}
1089
1090//________________________________________________________________________________
1091unsigned int
1093{
1094 // Return the integer index corresponding to the named variation.
1095 // Note that no checks are made on the validity of the name provided.
1096
1098 return it->second;
1099}
1100
1101//________________________________________________________________________________
1102bool
1104{
1105 // Verifies whether the given named variation index corresponds to the extrapolation
1106 // uncertainty.
1107
1108 return (m_namedExtrapolation == int(nameIndex));
1109}
1110
1111//________________________________________________________________________________
1112bool
1114 std::map<std::string, std::map<std::string, float>> &coefficientMap)
1115{
1116 // Calculating eigen vector recomposition coefficient map and pass to
1117 // user by reference. Return true if method success. Return false and
1118 // will not modify coefficientMap if function failed.
1119 //
1120 // label: flavour label
1121 // coefficientMap: (reference to) coefficentMap which will be used as return value.
1122
1123 if (! m_initialized) initialize();
1124
1125 std::vector<TH1*> originSF_hvec;
1126 std::vector<TH1*> eigenSF_hvec;
1127
1128 // Retrieving information for calculation
1129 std::vector<string>fullUncList = m_cnt->listUncertainties();
1130 std::vector<string> uncList;
1131 for (unsigned int t = 0; t < fullUncList.size(); ++t) {
1132 // entries that should never be included
1133 if (fullUncList[t] == "comment" || fullUncList[t] == "result" || fullUncList[t] == "combined" ||
1134 fullUncList[t] == "statistics" || fullUncList[t] == "systematics" || fullUncList[t] == "MCreference" ||
1135 fullUncList[t] == "MChadronisation" || fullUncList[t] == "extrapolation" || fullUncList[t] == "ReducedSets" ||
1136 fullUncList[t] == "excluded_set") continue;
1137
1138 // entries that can be excluded if desired
1139 if (m_namedIndices.find(fullUncList[t]) != m_namedIndices.end()) continue;
1140
1141 TH1* hunc = dynamic_cast<TH1*>(m_cnt->GetValue(fullUncList[t].c_str()));
1142 if (not hunc){
1143 std::cerr<<"CalibrationDataEigenVariations::EigenVectorRecomposition: dynamic cast failed\n";
1144 continue;
1145 }
1146
1147 Int_t nx = hunc->GetNbinsX();
1148 Int_t ny = hunc->GetNbinsY();
1149 Int_t nz = hunc->GetNbinsZ();
1150 Int_t bin = 0;
1151 bool retain = false; // Retain the histogram?
1152
1153 // discard empty histograms
1154 // Read all bins without underflow&overflow
1155 for(Int_t binx = 1; binx <= nx; binx++)
1156 for(Int_t biny = 1; biny <= ny; biny++)
1157 for(Int_t binz = 1; binz <= nz; binz++){
1158 bin = hunc->GetBin(binx, biny, binz);
1159 if (fabs(hunc->GetBinContent(bin)) > 1E-20){
1160 retain = true;
1161 break;
1162 }
1163 }// end hist bin for-loop
1164 if (!retain){
1165 if (m_verbose) std::cout<<"Eigenvector Recomposition: Empty uncertainty "<<fullUncList.at(t)<<" is discarded."<<std::endl;
1166 continue; // discard the vector
1167 }
1168
1169 uncList.push_back(fullUncList.at(t));
1170 originSF_hvec.push_back(hunc);
1171 }
1172
1173 TH1* nom = dynamic_cast<TH1*>(m_cnt->GetValue("result")); // Nominal SF hist
1174 if (not nom){
1175 if (m_verbose) std::cout<<"Eigenvector Recomposition: dynamic cast failed\n";
1176 return false;
1177 }
1178 int dim = nom->GetDimension();
1179 Int_t nx = nom->GetNbinsX();
1180 Int_t ny = nom->GetNbinsY();
1181 Int_t nz = nom->GetNbinsZ();
1182 Int_t nbins = nx;
1183 if(dim > 1) nbins *= ny;
1184 if(dim > 2) nbins *= nz;
1185 TMatrixD matSF(uncList.size(), nbins);
1186 Int_t col = 0; // mark the column number
1187 // Fill the Delta SF Matrix
1188 for(unsigned int i = 0; i < uncList.size(); i++){
1189 col = 0;
1190 // Loop all bins except underflow&overflow bin
1191 for(int binz = 1; binz <= nz; binz++)
1192 for(int biny = 1; biny <= ny; biny++)
1193 for(int binx = 1; binx <= nx; binx++){
1194 Int_t bin = originSF_hvec.at(i)->GetBin(binx, biny, binz);
1195 TMatrixDRow(matSF,i)[col] = originSF_hvec[i]->GetBinContent(bin);
1196 col++;
1197 }
1198 }
1199
1200 // get eigen vectors of scale factors. Note that this is not the original eigen-vector.
1201 int nEigen = getNumberOfEigenVariations();
1202 TH1* up = nullptr;
1203 TH1* down = nullptr;
1204 for (int i = 0; i < nEigen; i++){
1205 if (!getEigenvectorVariation(i, up, down)){
1206 std::cerr<<"EigenVectorRecomposition: Error on retrieving eigenvector "<<i<<std::endl;
1207 return false;
1208 }
1209 //Need uncertainty value so subtract central calibration here.
1210 up->Add(nom, -1);
1211 eigenSF_hvec.push_back(up);
1212 }
1213 TMatrixD matEigen(nEigen, nbins);
1214
1215 // Fill the Eigen Matrix
1216 for(int i = 0; i < nEigen; i++){
1217 col = 0;
1218 // Read 300 bins without underflow&overflow
1219 for(int binz = 1; binz <= nz; binz++)
1220 for(int biny = 1; biny <= ny; biny++)
1221 for(int binx = 1; binx <= nx; binx++){
1222 Int_t bin = eigenSF_hvec.at(i)->GetBin(binx, biny, binz);
1223 TMatrixDRow(matEigen,i)[col] = eigenSF_hvec[i]->GetBinContent(bin);
1224 col++;
1225 }
1226 }
1227
1228 // Sanity check:
1229 TMatrixD matEigenOriginal = matEigen;
1230 TMatrixD matEigenTranspose = matEigen;
1231 matEigenTranspose = matEigenTranspose.T();
1232 TMatrixD matOriginalTimesTranspose = matEigenOriginal*matEigenTranspose;
1233 TMatrixD matEigenInvert = matEigenTranspose*matOriginalTimesTranspose.Invert();
1234 //(matEigenOriginal*matEigenInvert).DrawClone("colz"); // This should give us an identity matrix
1235
1236 TMatrixD matCoeff = matSF*matEigenInvert;
1237 int nRows = matCoeff.GetNrows();
1238 int nCols = matCoeff.GetNcols();
1239 std::map<std::string, float> temp_map;
1240 for (int col = 0; col < nCols; col++){
1241 temp_map.clear();
1242 for(int row = 0; row < nRows; row++){
1243 temp_map[uncList[row]] = TMatrixDRow(matCoeff, row)[col];
1244 }
1245 coefficientMap["Eigen_"+label+"_"+std::to_string(col)] = temp_map;
1246 }
1247
1248 return true;
1249}
1250
1254
1255
1257// //
1258// CalibrationDataGlobalEigenVariations //
1259// //
1260// This class is intended to provide a more proper way to deal with correlations between //
1261// calibration bins across all jet jet flavours of a tagger/author/wp combination. //
1262// //
1263// A CalibrationDataGlobalEigenVariations object is associated with a specific set of //
1264// CalibrationDataHistogramContainer objects. It starts by constructing the covariance matrix from //
1265// the information provided by the container. Subsequently it diagonalises this covariance //
1266// matrix (this is a standard eigenvalue problem, hence the name of the class), and stores //
1267// the result as 'variation' histograms (representing the eigenvectors multiplied by the //
1268// square root of the corresponding eigenvalues). //
1269// Since it is possible for systematic uncertainty contributions to be correlated with //
1270// corresponding uncertainties in physics analyses, it is possible to exclude such sources //
1271// of uncertainty from being used in the construction of the covariance matrix (this is //
1272// since effects from the original sources of uncertainty cannot be traced anymore after //
1273// application of the eigenvalue decomposition). It is still possible to evaluate correctly //
1274// these uncertainties in the form of so-called 'named variations' (see class //
1275// CalibrationDataInterfaceROOT); however this will always treat uncertainties as being //
1276// fully correlated (or anti-correlated) between calibration bins, so it is recommended not //
1277// to exclude uncertainties that are not correlated between bins from the eigenvector //
1278// decomposition. //
1280
1281
1282
1283
1284
1285//________________________________________________________________________________
1286CalibrationDataGlobalEigenVariations::CalibrationDataGlobalEigenVariations(const std::string& cdipath, const std::string& tagger, const std::string& wp, const std::string& jetcollection, const std::vector<std::string>& flavours, CalibrationDataHistogramContainer* cnt, bool excludeRecommendedUncertaintySet) :
1287CalibrationDataEigenVariations(cdipath, tagger, wp, jetcollection, cnt, excludeRecommendedUncertaintySet, false), m_flav_eigen(), m_flavours(flavours)
1288//m_blockmatrixsize(1200)
1289{
1290 m_blockmatrixsize = 0; // <------- This needs to be computed based off of the dimensions of the result vectors (currently 4x1x5 for continuous WP, 1x300x1 for fixed cut WP, circa 2022)
1291 m_initialized = false;
1293 m_statVariations = false;
1294 // We want to retrieve all the containers that belong in the tagger/jetcollection/wp group
1295 // These are then stored, with all uncertainties extracted and stored as well, for later use in EV decomposition
1296
1297 // For now we will open up the CDI file from within the constructor, to get at the data we need
1298 TString fileName(cdipath);
1299 TFile* f = TFile::Open(fileName.Data(), "READ");
1300 f->cd();
1301
1302 // This loop extracts all the "flavour containers", i.e. all CalibrationDataHistogramContainers pertaining to the same path, but with different flavours
1303 // It also puts all the uncertainties for all containers into a set, which we can then later loop over, to construct the total covariance matrix
1304 for (const auto& flavour : m_flavours){
1305 std::string dir = m_taggername + "/" + m_jetauthor + "/" + m_wp + "/" + flavour + "/" + "default_SF" ;
1306 TString contName(dir);
1308 f->GetObject(contName.Data(), c);
1309 if (c) {
1310 if (m_verbose) std::cout << "Found " << contName.Data() << std::endl;
1311 m_histcontainers.insert({flavour, c}); // Build the mapping between flavour and the corresponding "flavour container", i.e. the CalibrationDataHistogramContainer
1312 std::vector<std::string> uncs = c->listUncertainties();
1313 TH1* result = dynamic_cast<TH1*>(c->GetValue("result")); // let's get the size of this for later
1314 if (not result){
1315 if (m_verbose) std::cout << "Dynamic cast failed at "<<__LINE__<<"\n";
1316 continue;
1317 }
1318 m_blockmatrixsize+=result->GetNbinsX()*result->GetNbinsY()*result->GetNbinsZ(); // should be ~300 for fixed cut, something else for continuous
1319 if (m_verbose) std::cout << "m_blockmatrixsize is now " << m_blockmatrixsize << std::endl;
1320 for (const std::string& unc : uncs){
1321 if (unc.find("stat_np") != string::npos) m_statVariations = true;
1322 if ((unc=="result")||(unc=="comment")||(unc=="ReducedSets")||(unc=="systematics")||(unc=="statistics")||(unc=="extrapolation")||(unc=="MChadronisation")||(unc=="combined")||(unc=="extrapolation from charm")) {
1323 continue;
1324 } else {
1325 m_all_shared_systematics.insert(unc); // Construct the set of uncertainties to get a full tally of everything in the container group
1326 }
1327 }
1328 // If specified, add items recommended for exclusion from EV decomposition by the calibration group to the 'named uncertainties' list
1329 // This used to work on only one container, but now we do it on all four containers
1330 if (excludeRecommendedUncertaintySet) {
1331 std::vector<std::string> to_exclude = split(c->getExcludedUncertainties());
1332 if (to_exclude.size() == 0) {
1333 std::cerr << "CalibrationDataEigenVariations warning: exclusion of pre-set uncertainties list requested but no (or empty) list found" << std::endl;
1334 }
1335 for (const auto& name : to_exclude) {
1336 if (name == "") continue;
1337 excludeNamedUncertainty(name, flavour);
1338 }
1339 }
1340 }
1341 }
1342
1343 if (m_verbose) std::cout << "\n number of shared uncertainties is " << m_all_shared_systematics.size() << std::endl;
1344
1345 std::set<std::string>::iterator it = m_all_shared_systematics.begin();
1346 if (it != m_all_shared_systematics.end()){
1347 if (m_verbose) std::cout << "Printing out all shared uncertainties for " << tagger << "/" << jetcollection << "/" << wp << std::endl;
1348 if (m_verbose) std::cout << "| " << std::endl;
1349 while (it != m_all_shared_systematics.end()){
1350 if (m_verbose) std::cout << "|-- " << (*it) << std::endl;
1351 ++it;
1352 }
1353 } else {
1354 if (m_verbose) std::cout << "| no shared systematics between ";
1355 for (const auto& f : m_flavours){
1356 if (m_verbose) std::cout << f << ", ";
1357 } if (m_verbose) std::cout << std::endl;
1358 }
1359 // End of constructor
1360}
1361
1362//________________________________________________________________________________
1364{
1365 // delete all variation histograms owned by us
1366 for (const auto& flavour : m_flavours){
1367 for (vector<pair<TH1*, TH1*> >::iterator it = m_flav_eigen[flavour].begin(); it != m_flav_eigen[flavour].end(); ++it) {
1368 delete it->first;
1369 delete it->second;
1370 }
1371
1372 for (vector<pair<TH1*, TH1*> >::iterator it = m_flav_named[flavour].begin(); it != m_flav_named[flavour].end(); ++it) {
1373 delete it->first;
1374 delete it->second;
1375 }
1376 }
1377}
1378
1379//________________________________________________________________________________
1380TMatrixDSym
1382{
1383 // Construct the block (global) covariance matrix that is to be diagonalised.
1384 // Note that extrapolation uncertainties (identified by the keyword "extrapolation,
1385 // this will pertain mostly to the extrapolation to high jet pt) are always excluded
1386 // since by definition they will not apply to the normal calibration bins. Instead
1387 // this uncertainty has to be dealt with as a named variation. In addition there are
1388 // other items ("combined", "systematics") that will not be dealt with correctly
1389 // either and hence are excluded as well).
1390 //
1391 // Note that if an explicit covariance matrix is supplied (at present this may be
1392 // the case only for statistical uncertainties: in the case of "continuous tagging",
1393 // multinomial statistics applies so bin-to-bin correlations exist) this will be
1394 // used instead of constructing the statistical uncertainties' covariance matrix on
1395 // the fly.
1396
1397 std::map<std::string, TMatrixDSym> cov_matrices; // temporary store, just to aid in printing out the individual systematic covariance matrices
1398 TMatrixDSym global_covariance(m_blockmatrixsize);
1399 // Then loop through the list of (other) uncertainties
1400 for (const std::string& unc : m_all_shared_systematics){
1401 std::vector<int> flavs_in_common;
1402 TString tunc(unc);
1403 std::vector<double> comb_syst; // this vector combines the TH1 uncertainty bins for each flavour into one object -> stored in comb_systematics for now, but meant for covariance matrix method
1404 // For the fixed cut case, we want to bin by groups of flavour > pT (i.e. "blocks" of flavour)
1405 // For the continuous case, we want to bin by groups of flavour > tagweight > pT (i.e. "blocks" of flavour, containing tagweight blocks... more complicated, but works on same principle)
1406 for (const auto& flavour : m_flavours){
1407 Analysis::CalibrationDataHistogramContainer* c = m_histcontainers[flavour]; // pointer to the flavour container
1408 int flavour_size = 0; // Store the length of the uncertainty in the flavour, initialize to zero
1409 if (c) {
1410 // Now we want to get the histogram for this systematic, for this flavour
1411 TH1* hunc = dynamic_cast<TH1*>(c->GetValue(tunc.Data()));
1412 TH1* ref = dynamic_cast<TH1*>(c->GetValue("result")); // retrieving this just in case the uncertainty doesn't exist for this flavour, just need it to get dimensions right
1413 if (not ref){
1414 if (m_verbose) std::cout << " There was no uncertainty OR SF/EFF results... Are you sure you have the right CDIContainer path?" << std::endl;
1415 continue;
1416 }
1417 int tagweightax = c->getTagWeightAxis(); // for handling the continuous case(s)
1418 if (hunc){
1419 flavs_in_common.push_back(1); // report that we have this uncertainty in this flavour
1420 Int_t nbinx = hunc->GetNbinsX(), nbiny = hunc->GetNbinsY(), nbinz = hunc->GetNbinsZ();
1421 Int_t rows = nbinx;
1422 if (hunc->GetDimension() > 1) rows *= nbiny;
1423 if (hunc->GetDimension() > 2) rows *= nbinz;
1424 flavour_size = rows; // Record the number of bins in the uncertainty TH1
1425 } else {
1426 flavs_in_common.push_back(0); // If the uncertainty doesn't exist for that flavour, just report, we'll set to zero in the combined vector
1427 // Because the uncertainty doesn't exist for this flavour, we just get the dimensions we need
1428 Int_t nbinx = ref->GetNbinsX(), nbiny = ref->GetNbinsY(), nbinz = ref->GetNbinsZ();
1429 Int_t rows = nbinx;
1430 if (ref->GetDimension() > 1) rows *= nbiny;
1431 if (ref->GetDimension() > 2) rows *= nbinz;
1432 flavour_size = rows;
1433 }
1434
1435 // Now we can loop through the bins of the flavour uncertainty, adding them onto the combined systematic
1436 if (tagweightax == -1){ //<--- i.e. NOT continuous, but is a fixed cut WP
1437 for (int i = 1 ; i <= flavour_size ; i++){
1438 if (hunc){
1439 Int_t bin = hunc->GetBin(1,i,1);
1440 double unc_val = hunc->GetBinContent(bin);
1441 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1442 } else {
1443 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1444 }
1445 }
1446 } else if (tagweightax == 0){
1447 // X axis is the tagweight axis, meaning Y is pt, Z is abs(eta)
1448 for (Int_t xbins = 1 ; xbins <= ref->GetNbinsX() ; xbins++){
1449 for (Int_t zbins = 1 ; zbins <= ref->GetNbinsZ() ; zbins++){
1450 for (Int_t ybins = 1 ; ybins <= ref->GetNbinsY() ; ybins++){
1451 if (hunc){
1452 Int_t bin = hunc->GetBin(xbins,ybins,zbins);
1453 double unc_val = hunc->GetBinContent(bin);
1454 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1455 } else {
1456 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1457 }
1458 // And now we should be constructing the initial covariance matrix for block continuous correctly
1459 }
1460 }
1461 }
1462 } else if (tagweightax == 1){
1463 // Y axis is the tagweight axis, meaning X is pt, Z is abs(eta)
1464 for (Int_t ybins = 1 ; ybins <= ref->GetNbinsY() ; ybins++){
1465 for (Int_t zbins = 1 ; zbins <= ref->GetNbinsZ() ; zbins++){
1466 for (Int_t xbins = 1 ; xbins <= ref->GetNbinsX() ; xbins++){
1467 if (hunc){
1468 Int_t bin = hunc->GetBin(xbins,ybins,zbins);
1469 double unc_val = hunc->GetBinContent(bin);
1470 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1471 } else {
1472 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1473 }
1474 // And now we should be constructing the initial covariance matrix for block continuous correctly
1475 }
1476 }
1477 }
1478 } else if (tagweightax == 2){
1479 // Z axis is the tagweight axis, meaning X is pt, Y is abs(eta)
1480 for (Int_t zbins = 1 ; zbins <= ref->GetNbinsZ() ; zbins++){
1481 for (Int_t ybins = 1 ; ybins <= ref->GetNbinsY() ; ybins++){
1482 for (Int_t xbins = 1 ; xbins <= ref->GetNbinsX() ; xbins++){
1483 if (hunc){
1484 Int_t bin = hunc->GetBin(xbins,ybins,zbins);
1485 double unc_val = hunc->GetBinContent(bin);
1486 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1487 } else {
1488 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1489 }
1490 // And now we should be constructing the initial covariance matrix for block continuous correctly
1491 }
1492 }
1493 }
1494 }
1495 }
1496 }
1497 //comb_systematics.insert({unc, comb_syst}); // after having looped through the bins for each flavour, the comb_syst vector should be completed and added (or rather, made into a covariance matrix)
1498 TMatrixDSym unc_cov(comb_syst.size());
1499 if (unc == "statistics"){
1500 unc_cov = getStatCovarianceMatrix(comb_syst, m_statVariations); // we want to handle the "statistics" uncertainty differently
1501 } else {
1502 unc_cov = getSystCovarianceMatrix(comb_syst);
1503 }
1504 cov_matrices.insert({unc, unc_cov}); // add the covariance matrix for the uncertainty to this map, this is temporary for testing/plotting purposes
1505
1506 // To look only at uncertainties that pertain to more than one flavour (2, 3 or all 4) we can store the covariances separately for later saving and plotting
1507 if (flavs_in_common[0]+flavs_in_common[1]+flavs_in_common[2]+flavs_in_common[3] > 1){
1508 m_only_shared_systematics.insert(unc) ; // mark the shared systematics...
1509 }
1510
1511 global_covariance += unc_cov;
1512 }
1513 return global_covariance;
1514}
1515
1516
1517//________________________________________________________________________________
1518void
1519CalibrationDataGlobalEigenVariations::excludeNamedUncertainty(const std::string& name, const std::string& flavour)//, CalibrationDataContainer* cnt)
1520{
1521 // Exclude the source of uncertainty identified by the given name from being used
1522 // in the construction of the covariance matrix to be diagonalised.
1523 // Notes:
1524 // - Some names returned by CalibrationDataContainer::listUncertainties() are not
1525 // meaningful in this context, and specifying them is not allowed.
1526 // - Once the eigenvector diagonalisation has been carried out, this method may
1527 // not be used anymore.
1528
1529 if (m_initialized) std::cerr << "CalibrationDataGlobalEigenVariations::excludeNamedUncertainty error:" << " initialization already done" << std::endl;
1530
1531 else if (name == "comment" || name == "result" || name == "systematics" ||
1532 name == "statistics" || name == "combined" || name == "extrapolation" ||
1533 name == "MCreference" || name == "MChadronisation" || name == "ReducedSets" ||
1534 name == "excluded_set" || name == "" || name == " ")
1535 std::cerr << "CalibrationDataGlobalEigenVariations::excludeNamedUncertainty error:" << " name [" << name << "] not allowed" << std::endl;
1536
1537 // Note: we WANT to exclude named uncertainties FROM ALL FLAVOURS, even if they don't contain the uncertainty (just fill with zeros). So I won't do this check here.
1538 // only really add if the entry is not yet in the list
1539 else if (m_flav_namedIndices[flavour].find(name) == m_flav_namedIndices[flavour].end()) {
1540 m_flav_named[flavour].push_back(std::pair<TH1*, TH1*>(0, 0));
1541 m_flav_namedIndices[flavour][name] = m_flav_named[flavour].size()-1; // store the index that the name variation pair is stored with in m_named
1542 }
1543}
1544
1545
1546//________________________________________________________________________________
1547std::vector<std::string>
1549{
1550 // Provides the list of named variations
1551
1552 std::vector<std::string> names;
1553 for(const auto& namedar : m_flav_namedIndices.find(flavour)->second){
1554 names.push_back(namedar.first);
1555 }
1556 return names;
1557}
1558
1559
1560//________________________________________________________________________________
1561void
1563{
1564 // This is this class's most important method, in the sense that it does all the
1565 // math and constructs all "variation" histograms (for both eigenvector and named
1566 // named variations). This constitutes the full initialisation of the class.
1567 // This method is meant to be called only after all calls (if any) to the
1568 // CalibrationDataGlobalEigenVariations::excludeNamedUncertainty() method.
1569
1570
1571 // First step: construct the block covariance matrix
1572 TMatrixDSym cov = getEigenCovarianceMatrix();
1573
1574 TMatrixDSym corr(cov); // We want to construct the correlation matrix in order to compare the final eigenvariations correlation matrix to it
1575 for (int row = 0 ; row < cov.GetNrows() ; row++){
1576 for (int col = 0 ; col < cov.GetNcols() ; col++){
1577 double rowvar = sqrt(cov(row,row));
1578 double colvar = sqrt(cov(col,col));
1579 corr(row,col) = corr(row,col)/(rowvar * colvar); // divide each element by the variance
1580 }
1581 }
1582
1584
1585 // Second step: create the variations for the named sources of uncertainty (easy...)
1586 // News flash: This isn't so easy now, as it has to be done for all the flavours to which the uncertainty pertains
1587 // the following are temporary data structures
1588 std::vector<double> combined_result;
1589 std::map<std::string, int> flav_bins; // store the nbins of each flavour histogram for later use in "reporting"
1590 // and eventually, at the end, we will want to separate the flavour blocks out...
1591 for (const auto& flavour : m_flavours) {
1592 // for each flavour, we want to combine the results and uncertainty values across flavours into one "block" vector
1593 // retrieve the central calibration
1594 Analysis::CalibrationDataHistogramContainer* c = m_histcontainers[flavour]; // pointer to the flavour container
1595
1596 TH1* result = dynamic_cast<TH1*>(c->GetValue("result"));
1597 if (not result){
1598 std::cerr<<"CalibrationDataGlobalEigenVariations::initialize: dynamic cast failed\n ";
1599 continue;
1600 }
1601 //construct the combined_result and combined_named_variations (up and down)
1602 if (c->getTagWeightAxis() == -1){ // For fixed cut WP, the Y axis **should** be the pT axis (but can it can potentially be different in the future)
1603 flav_bins[flavour] = result->GetNbinsY(); // Add the number of bins of the result histogram with non-zero results...
1604 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1605 for(int i = 0 ; i < flav_bins[flavour] ; i++){
1606 // push bin content onto the combined_result vector
1607 Int_t bin = result->GetBin(1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1608 double res_value = result->GetBinContent(bin);
1609 combined_result.push_back(res_value);
1610 }
1611 } else if (c->getTagWeightAxis() == 0) { // for continuous WP, the taxweight axis determines which axis is pt and |eta|
1612 flav_bins[flavour] = result->GetNbinsX()*result->GetNbinsY();
1613 int tagbins = result->GetNbinsX();
1614 int ptbins = result->GetNbinsY();
1615 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1616 for(int i = 0 ; i < tagbins ; i++){
1617 for(int j = 0 ; j < ptbins ; j++){
1618 // push bin content onto the combined_result vector
1619 Int_t bin = result->GetBin(j+1,1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1620 double res_value = result->GetBinContent(bin);
1621 combined_result.push_back(res_value);
1622 }
1623 }
1624 } else if (c->getTagWeightAxis() == 1) {
1625 flav_bins[flavour] = result->GetNbinsX()*result->GetNbinsY();
1626 int tagbins = result->GetNbinsY();
1627 int ptbins = result->GetNbinsX();
1628 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1629 for(int i = 0 ; i < tagbins ; i++){
1630 for(int j = 0 ; j < ptbins ; j++){
1631 // push bin content onto the combined_result vector
1632 Int_t bin = result->GetBin(j+1,1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1633 double res_value = result->GetBinContent(bin);
1634 combined_result.push_back(res_value);
1635 }
1636 }
1637 } else if (c->getTagWeightAxis() == 2) {
1638 flav_bins[flavour] = result->GetNbinsX()*result->GetNbinsZ();
1639 int tagbins = result->GetNbinsZ();
1640 int ptbins = result->GetNbinsX();
1641 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1642 for(int i = 0 ; i < tagbins ; i++){
1643 for(int j = 0 ; j < ptbins ; j++){
1644 // push bin content onto the combined_result vector
1645 Int_t bin = result->GetBin(j+1,1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1646 double res_value = result->GetBinContent(bin);
1647 combined_result.push_back(res_value);
1648 }
1649 }
1650 }
1651
1652
1653 // loop over the excluded uncertainties for this flavour, and construct their actual variations...
1654 // the "m_flav_namedIndices" are constructed within "excludeNamedUncertainties", which is called in the constructor
1655 for (map<string, unsigned int>::iterator it = m_flav_namedIndices[flavour].begin(); it != m_flav_namedIndices[flavour].end(); ++it) {
1656 TH1* hunc = (TH1*) c->GetValue(it->first.c_str()); // this should store the name uncertainty, if it exists for this flavour
1657 // I need to test if this uncertainty actually exists, and if it doesn't just use a ZERO variation histogram explicitly
1658 // but even if it doesn't exist, we want to have it, so that the indices match between flavours
1659 pair<TH1*, TH1*>& p = m_flav_named[flavour][it->second];
1660 TString namedvar("namedVar");
1661 namedvar += it->first.c_str();
1662 TString namedvarUp(namedvar); namedvarUp += "_up";
1663 TString namedvarDown(namedvar); namedvarDown += "_down";
1664 TH1* resultVariedUp = (TH1*)result->Clone(namedvarUp);
1665 TH1* resultVariedDown = (TH1*)result->Clone(namedvarDown);
1666 if (hunc){
1667 resultVariedUp->Add(hunc, 1.0); resultVariedUp->SetDirectory(0);
1668 resultVariedDown->Add(hunc, -1.0); resultVariedDown->SetDirectory(0);
1669 } else {
1670 resultVariedUp->SetDirectory(0);
1671 resultVariedDown->SetDirectory(0);
1672 }
1673 p.first = resultVariedUp;
1674 p.second = resultVariedDown;
1675 } // End of uncertainty in flavour loop
1676
1677 // Now handle the extrapolation uncertainties per flavour...
1678 // Refinement: add the "extrapolation" uncertainty as a named uncertainty, if the histogram is provided
1679 // This is a bit special, since the extrapolation uncertainty histogram has a different size than other histograms
1680 if (TH1* hunc = (TH1*)c->GetValue("extrapolation")) { // this is just saying "if it exists"...
1681 TH1* resultVariedUp = (TH1*) hunc->Clone("extrapolation_up"); resultVariedUp->SetDirectory(0);
1682 TH1* resultVariedDown = (TH1*) hunc->Clone("extrapolation_down"); resultVariedDown->SetDirectory(0);
1683 Int_t nbinx = hunc->GetNbinsX()+2, nbiny = hunc->GetNbinsY()+2, nbinz = hunc->GetNbinsZ()+2;
1684 Int_t firstbinx = hunc->GetXaxis()->FindFixBin(result->GetXaxis()->GetBinCenter(1));
1685 Int_t firstbiny = result->GetDimension() > 1 ? hunc->GetYaxis()->FindFixBin(result->GetYaxis()->GetBinCenter(1)) : 1;
1686 Int_t firstbinz = result->GetDimension() > 2 ? hunc->GetZaxis()->FindFixBin(result->GetZaxis()->GetBinCenter(1)) : 1;
1687 for (Int_t binx = 1; binx < nbinx-1; ++binx) {
1688 Int_t binxResult = binx - firstbinx + 1;
1689 if (binxResult < 1) binxResult = 1;
1690 if (binxResult > result->GetNbinsX()) binxResult = result->GetNbinsX();
1691 for (Int_t biny = 1; biny < nbiny-1; ++biny) {
1692 Int_t binyResult = biny - firstbiny + 1;
1693 if (binyResult > result->GetNbinsY()) binyResult = result->GetNbinsY();
1694 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
1695 Int_t binzResult = binz - firstbinz + 1;
1696 if (binzResult > result->GetNbinsZ()) binzResult = result->GetNbinsZ();
1697 Int_t bin = hunc->GetBin(binx, biny, binz);
1698 double contResult = result->GetBinContent(binxResult, binyResult, binzResult);
1699 resultVariedUp->SetBinContent(bin, contResult + hunc->GetBinContent(bin));
1700 resultVariedDown->SetBinContent(bin, contResult - hunc->GetBinError(bin));
1701 }
1702 }
1703 }
1704 m_flav_named[flavour].push_back(std::make_pair(resultVariedUp, resultVariedDown));
1705 m_flav_namedExtrapolation[flavour] = m_flav_namedIndices[flavour]["extrapolation"] = m_flav_named[flavour].size()-1;
1706 }
1707
1708
1709 } // End flavour loop
1710
1711
1712
1713 // Third step: compute the eigenvector variations corresponding to the remaining sources of uncertainty
1714 // First, build the combined_result vector into a TH1
1715 std::unique_ptr<TH1> comb_result(new TH1D("combined_result", "", combined_result.size(), 0., 1.));
1716 int nbins = comb_result->GetNbinsX()+2;
1717 int ndim = comb_result->GetDimension();
1718 if (ndim > 1) nbins*= (comb_result->GetNbinsY()+2);
1719 if (ndim > 2) nbins*= (comb_result->GetNbinsZ()+2);
1720
1721 // I want to take the contents of "combined_result" and fill in "comb_result" without the under/overflow
1722 for (unsigned int i=0 ; i<combined_result.size() ; i++){
1723 // assuming dimension == 1, which should be the case...
1724 comb_result->SetBinContent(i+1, combined_result[i]); // setting i+1 so we don't start with the underflow bin
1725 }
1726
1727 // Get the portions of the covariance matrix that aren't zero, this next step
1728 TMatrixT<double> matrixVariationJacobian = getJacobianReductionMatrix(cov); // we pass the covariance matrix in as a starting point
1729
1730 int size = matrixVariationJacobian.GetNrows();
1731
1732 // Reduce the matrix to one without the zeros, using a "similarity" transformation
1733 const TMatrixDSym matrixCovariance = cov.Similarity(matrixVariationJacobian); // <--- This step removes the zeros
1734
1735 // Carry out the Eigenvector decomposition on this matrix
1736 TMatrixDSymEigen eigenValueMaker (matrixCovariance);
1737 TVectorT<double> eigenValues = eigenValueMaker.GetEigenValues();
1738 TMatrixT<double> eigenVectors = eigenValueMaker.GetEigenVectors();
1739 TMatrixT<double> matrixVariations (size,size);
1740
1741 //compute the total variance by summing the eigenvalues
1742 m_totalvariance = eigenValues.Sum();
1743
1744 for (int i = 0; i < size; ++i) {
1745 for (int r = 0; r < size; ++r) {
1746 //first index is the variation number, second corresponds to the pT bin // The "eigenvariations" matrix is the "C" matrix which has CKC^T = I with "K" being the o.g. covariance matrix, and "I" is the identity.
1747 matrixVariations(i,r) = -1.0*eigenVectors[r][i]*sqrt(fabs(eigenValues[i])); // So the result is a matrix (eigenvariation) which is the eigenvector scaled by the sqrt(eigenvalue)
1748 }
1749 } // <------- matrixVariations: each row is one variation, each column is the pT bin.
1750
1751 TMatrixT<double> matrixVariationsWithZeros = matrixVariations * matrixVariationJacobian; // This step adds in the zero rows again
1752
1753 // Construct the initial set of variations from this
1754 for (int i = 0; i < matrixVariationsWithZeros.GetNrows(); ++i) {
1755 TString superstring("eigenVar");
1756 superstring+=i;
1757
1758 TString nameUp(superstring); nameUp += "_up"; // In the end you get something like "eigenVar5_up"
1759 TString nameDown(superstring); nameDown += "_down";
1760 // TString nameUnc(superstring); nameUnc+= "_unc";
1761
1762 TH1* resultVariedUp = (TH1*)comb_result->Clone(nameUp); resultVariedUp->SetDirectory(0);
1763 TH1* resultVariedDown = (TH1*)comb_result->Clone(nameDown); resultVariedDown->SetDirectory(0);
1764
1765 for (int u = 0; u < comb_result->GetNbinsX(); ++u) {
1766 resultVariedUp->SetBinContent(u,(comb_result->GetBinContent(u) + matrixVariationsWithZeros(i,u)));
1767 resultVariedDown->SetBinContent(u,(comb_result->GetBinContent(u) - matrixVariationsWithZeros(i,u)));
1768 }
1769
1770 m_eigen.push_back(std::make_pair(resultVariedUp, resultVariedDown)); //<--- This is currently storing the FULL/combined variations, which aren't binned with proper bin widths etc.
1771 // The "proper binning" isn't necessary for pruning purposes. Later on, after pruning, separate the flavour blocks of each eigenvariation and construct the variations for each flavour, storing results in m_flav_eigen
1772
1773 } //end eigenvector size
1774
1775
1777 // Without any changes, all eigenvariations are kept due to the variation being non-negligible for SOME flavour...
1779 // Step 4 : Time for PRUNING
1780 // > Check the variation bins to see if any exceed a given threshold value of "min_variance"
1781 // > Value of E-6 seems to work well for SFGlobalEigen (found through a scan of possible thresholds)
1782 // but this is not necessarily going to hold for future CDI's. This needs to be checked through the
1783 // "validate_reduction" function, which can be used to compare how well the reduction captures the total covariance.
1785
1786 // Remove variations that are below the given tolerance (effectively meaning that they don't have any effect)
1787 IndexSet final_set;
1788 size_t current_set = 0;
1789
1790 // We set the custom min_variance here
1791 min_variance = 1.0E-6;
1792 for (size_t index = 0; index < m_eigen.size(); ++index) {
1793 bool keep_variation = false; // guilty until proven innocent
1794 TH1* up = static_cast<TH1*>(m_eigen[index].first->Clone()); up->SetDirectory(0); // clone the up variation and check it out
1795 up->Add(comb_result.get(), -1.0); // now we're left with decimal values centered around 0, i.e. 0.02 or -0.02
1796
1797 for (int bin = 1; bin <= nbins; ++bin) {
1798 if (fabs(up->GetBinContent(bin)) > min_variance) { // If you find even ONE bin with big enough variance, we keep the whole systematic.
1799 keep_variation = true;
1800 break;
1801 }
1802 }
1803 if (!keep_variation){ // At this stage, if we find no bins in the systematic with large enough variation, we insert it to "final_set" for removal/pruning
1804 final_set.insert(current_set);
1805 } else {
1806 m_capturedvariance += eigenValues[index];
1807 }
1808 delete up;
1809 ++current_set;
1810 }
1811 if (final_set.size() > 0){ // at this point, a set of the indices of negligible variations should have been gathered, proceed to remove them...
1812 if (m_verbose) std::cout << "CalibrationDataEigenVariations: Removing " << final_set.size() << " eigenvector variations leading to sub-tolerance effects, retaining " << m_eigen.size()-final_set.size() << " variations" << std::endl;
1813 }
1814
1815 CalibrationDataEigenVariations::removeVariations(final_set); // This method actually performs the reduction. The above logic simply flags which variations to get rid of, inserting them into "final_set"
1816
1817 // AT THIS STAGE: Pruning has already occurred, leaving us with a set of combined eigenvariations in "m_eigen", which we can thence recombine and compute the correlation matrix
1818 // That correlation matrix can then be compared to the original correlation matrix "corr", simply subtracting one from the other. Better approximations will have close to zero deviation.
1819 // What follows is the construction of this comparison, and the reporting of the comparison (saving it to file)...
1820 std::streamsize ss = std::cout.precision();
1821 if (m_verbose) std::cout << " The total variance is " << m_totalvariance << " and the reduction captured " << std::setprecision(9) << 100.0*(m_capturedvariance/m_totalvariance) << "% of this." << std::endl;
1822 std::cout.precision(ss); //restore precision
1826 if (m_validate){ // <---- this flag can be set manually in a local checkout of this package (temporary fix)
1827 validate_reduction(m_blockmatrixsize, corr, m_eigen, comb_result.get(), "SFGlobalEigen", m_taggername, m_wp, m_jetauthor); // This method is simply here to report the matrix comparison, for reduction scheme validation
1828 }
1829
1830
1832 // Let's separate out the flavour histograms now, this should be far simpler to ensure the binning is correct for each flavour.
1833 // At this stage, we should have constructed a set of combined eigenvariations stored in m_eigen.
1834 // Below, we take them one by one, separate, and store the flavour variations in m_flav_eigen, with the proper flavour heading
1835 // <----- The "mergeVariations" code originally merged the variations in m_eigen. But now that wouldn't work, since m_eigen stores
1836 // the combined histograms. We should instead merge the "reported" flavour histograms, as they're properly binned (and physically meaningful)
1837 // So if we construct the flavour variations first, keeping the indices all the same, then merge them.
1839
1840
1841 for(const std::pair<TH1*,TH1*>& var : m_eigen){
1842 // now we make use of the same old flavour loop and impose the flavour variation to the flavour container
1843 TString eigenvarup = var.first->GetName();
1844 TString eigenvardown = var.second->GetName();
1845 int bin_baseline = 0; // increment this by flav_bins after getting each flavour block
1846 for (const std::string& flavour : m_flavours){
1848 TH1* result = dynamic_cast<TH1*>(c->GetValue("result"));
1849 if (not result){
1850 std::cerr<<"CalibrationDataGlobalEigenVariations::initialize: dynamic cast failed\n";
1851 continue;
1852 }
1853 TH1* resultVariedUp = (TH1*)result->Clone(eigenvarup); resultVariedUp->SetDirectory(0); // copy flavour result, want to set bin contents according to the combined eigenvartion flavour block
1854 TH1* resultVariedDown = (TH1*)result->Clone(eigenvardown); resultVariedDown->SetDirectory(0);
1855 int up_to_bin = flav_bins[flavour];
1856 int current_bin = 1; // starting from 1 for filling histograms
1857 for(int flav_bin = bin_baseline+1 ; flav_bin < up_to_bin+1 ; flav_bin++){ // the +1's here are to deal with bin numbering yet again...
1858 Int_t bin = result->GetBin(1,current_bin); // increment along smaller (result) flavour variation TH1
1859 resultVariedUp->SetBinContent(bin, var.first->GetBinContent(flav_bin)); // Sets the result TH1 bin content to the eigenvariation bin content
1860 resultVariedDown->SetBinContent(bin, var.second->GetBinContent(flav_bin)); // In this way, we achieve flavour variations with proper binning
1861 current_bin+=1;
1862 }
1863 bin_baseline+=up_to_bin;
1864 m_flav_eigen[flavour].push_back(std::make_pair(resultVariedUp, resultVariedDown)); // <-------- add the flavour EV to the storage (indexed the same as in m_eigen!)
1865 }
1866 }
1867
1868 for(const auto& f : m_flavours){
1869 if (m_verbose) std::cout << " " << f << " m_flav_eigen has " << m_flav_eigen[f].size() << std::endl;
1870 }
1871
1872 m_initialized = true;
1873}
1874
1875
1876
1877//________________________________________________________________________________
1878TMatrixD
1880{
1881 // Construct the matrix that removes the rows and columns that fail to influence
1882 // the eigen-variations. To reduce the covariance matrix, do the following:
1883 // Note: Previous version called "getEigenCovariance" whereas here we pass it in by reference to save on compute
1884 // This "cov" is the "uncompressed" eigenvariation covariance matrix for all uncertainties.
1885 // We will subsequently compress it (removing columns/rows containing zeros only)
1886 // and then construct the hopefully rectangular matrix that can remove these ones from the covariance matrix
1887
1888 int nZeros = 0;
1889 std::vector<int> zeroComponents;
1890
1891 // First flag all the zeros
1892 for (int i = 0 ; i < m_blockmatrixsize ; i++){
1893 if (fabs(cov(i,0)) < 1e-10){
1894 bool isThereANonZero = false;
1895 for (int j = 0 ; j < m_blockmatrixsize ; j++){
1896 // Try a first (quick) identification of rows/columns of zeros by the first element in each row
1897 // If "successful", check the whole row in more detail
1898 if (fabs(cov(i,j)) > 1e-10){
1899 isThereANonZero = true;
1900 break;
1901 }
1902 }
1903 if (! isThereANonZero){
1904 ++nZeros;
1905 zeroComponents.push_back(i) ;
1906 }
1907 }
1908 }
1909
1910 if (nZeros >= m_blockmatrixsize) {
1911 std::cerr << " Problem. Found n. " << nZeros << " while size of matrix is " << m_blockmatrixsize << std::endl;
1912 return TMatrixDSym();
1913 }
1914
1915 int size = m_blockmatrixsize - nZeros;
1916 TMatrixT<double> matrixVariationJacobian(size,m_blockmatrixsize);
1917 int nMissed = 0;
1918
1919 for (int i = 0; i < m_blockmatrixsize; ++i) { // full size
1920 bool missed = false;
1921 for (unsigned int s = 0 ; s < zeroComponents.size(); ++s) { // <--- Basically what this does is it flags "missed" for a given "i" of the full bin size
1922 if (zeroComponents.at(s) == i) { // <--- if "i" is in "zeroComponents". Breaks (because it found that it's to be missed)
1923 missed = true;
1924 break;
1925 }
1926 }
1927 if (missed) { // <-------- Finally, if "i" is to be missed, increase "nMissed" by one, and....
1928 ++nMissed;
1929 continue;
1930 }
1931
1932 matrixVariationJacobian(i-nMissed,i)=1; // <-------- ... this ALWAYS adds a one. If zero "nMissed", add to diagonal. otherwise off-diagonal
1933 }
1934
1935 return matrixVariationJacobian;
1936}
1937
1938
1939
1940
1941
1942//________________________________________________________________________________
1943unsigned int
1945{
1946 if (! m_initialized) initialize();
1947
1948 if (m_flav_eigen.find(flavour) == m_flav_eigen.end()){
1949 if (m_verbose) std::cout << "No m_flav_eigen for flavour " << flavour << std::endl;
1950 return 0;
1951 }
1952 return (m_flav_eigen.find(flavour)->second).size();
1953}
1954
1955//________________________________________________________________________________
1956bool
1957CalibrationDataGlobalEigenVariations::getEigenvectorVariation(const std::string& flavour, unsigned int variation, TH1*& up, TH1*& down)
1958{
1959 // Return the pointers to the up- and downward variation histogram for the specified
1960 // eigenvector variation. In case of an invalid variation number, null pointers will
1961 // be returned and the return value will be false.
1962 //
1963 // variation: eigenvector variation number
1964 // up: (reference to) pointer to upward variation histogram
1965 // down: (reference to) pointer to downward variation histogram
1966
1967 if (! m_initialized) initialize();
1968 std::vector<std::pair<TH1*,TH1*>> flav_variations = m_flav_eigen.find(flavour)->second;
1969 if (variation < flav_variations.size()){
1970 up = flav_variations[variation].first;
1971 down = flav_variations[variation].second;
1972 return true;
1973 }
1974
1975 up = down = 0;
1976 return false;
1977}
1978
1979//________________________________________________________________________________
1980bool
1981CalibrationDataGlobalEigenVariations::getNamedVariation(const string& flavour, const string& name, TH1*& up, TH1*& down)
1982{
1983 // Return the pointers to the up- and downward variation histogram for the specified
1984 // named variation. In case of an invalid named variation, null pointers will
1985 // be returned and the return value will be false.
1986 //
1987 // name: named variation
1988 // up: (reference to) pointer to upward variation histogram
1989 // down: (reference to) pointer to downward variation histogram
1990
1991 // Find the named variation index (if it exists) and pass to "getNamedVariation"
1992 std::map<std::string, unsigned int>::const_iterator it = (m_flav_namedIndices.find(flavour)->second).find(name);
1993 if (it != (m_flav_namedIndices.find(flavour)->second).end()) return getNamedVariation(it->second, flavour, up, down);
1994
1995 up = down = 0;
1996 return false;
1997}
1998
1999//________________________________________________________________________________
2000bool
2001CalibrationDataGlobalEigenVariations::getNamedVariation(unsigned int nameIndex, const std::string& flavour, TH1*& up, TH1*& down)
2002{
2003 // Return the pointers to the up- and downward variation histogram for the specified
2004 // named variation. In case of an invalid named variation number, null pointers will
2005 // be returned and the return value will be false.
2006 //
2007 // nameIndex: named variation number
2008 // up: (reference to) pointer to upward variation histogram
2009 // down: (reference to) pointer to downward variation histogram
2010
2011 if (! m_initialized) initialize();
2012
2013 if (nameIndex < m_flav_named[flavour].size()) {
2014 up = m_flav_named[flavour][nameIndex].first;
2015 down = m_flav_named[flavour][nameIndex].second;
2016 return true;
2017 }
2018
2019 up = down = 0;
2020 return false;
2021}
2022
2023
2024//________________________________________________________________________________
2025unsigned int
2026CalibrationDataGlobalEigenVariations::getNamedVariationIndex(const std::string& name, const std::string& flavour) const
2027{
2028 // Return the integer index corresponding to the named variation.
2029 // Note that no checks are made on the validity of the name provided.
2030 std::map<std::string, unsigned int>::const_iterator it = (m_flav_namedIndices.find(flavour)->second).find(name);
2031 return it->second;
2032}
2033
2034
2035//________________________________________________________________________________
2036bool
2037CalibrationDataGlobalEigenVariations::isExtrapolationVariation(unsigned int nameIndex, const std::string& flavour) const
2038{
2039 // Verifies whether the given named variation index corresponds to the extrapolation
2040 // uncertainty.
2041 int extrapIndex = m_flav_namedExtrapolation.find(flavour)->second;
2042 return (extrapIndex == int(nameIndex));
2043}
2044
2045
2046
2047//________________________________________________________________________________
2048void
2050{
2051 // Merge all systematic variation starting from the given index.
2052 // The resulting merged variation replaces the first entry in the list
2053 // (i.e., the entry specified by the index).
2054
2055 IndexSet simple_set;
2056
2057 for (size_t it = index; it < m_flav_eigen[flav].size(); ++it)
2058 simple_set.insert(it);
2059 mergeVariations(simple_set, flav);
2060}
2061
2062//________________________________________________________________________________
2063void
2065{
2066 IndexSuperSet sset;
2067 sset.insert(set);
2068 mergeVariations(sset, flav);
2069}
2070
2071
2072//________________________________________________________________________________
2073void
2075{
2077 // Merge the (flavour specific) eigenvariations, as stored in m_flav_eigen
2079
2080 // check for overlap
2081 IndexSet checker;
2082 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
2083 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it){
2084 if (checker.count(*subset_it) == 0 && *subset_it <= m_flav_eigen[flav].size())
2085 checker.insert(*subset_it);
2086 else {
2087 std::cerr << "Error in CalibrationDataGlobalEigenVariations::mergeVariations: \
2088 IndexSets must not overlap and must lie between 1 and " << m_eigen.size() << ". Aborting!" << std::endl;
2089 return;
2090 }
2091 }
2092 }
2093
2094 std::string flavour = flav;
2095
2097
2098 TH1* result = dynamic_cast<TH1*>(c->GetValue("result"));
2099 if (not result){
2100 std::cerr << "Error in CalibrationDataGlobalEigenVariations::mergeVariations: failed dynamic cast\n";
2101 return;
2102 }
2103 // retrieve the central calibration
2104
2105 IndexSet toDelete;
2106 int nbins = result->GetNbinsX()+2;
2107 int ndim = result->GetDimension();
2108 if (ndim > 1) nbins *= (result->GetNbinsY()+2);
2109 if (ndim > 2) nbins *= (result->GetNbinsZ()+2);
2110
2111 // complex sum
2112 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
2113 if (set_it->empty()) continue;
2114
2115 double sum_H_up = 0.0, sum_H_down = 0.0;
2116 size_t lowest_index = *set_it->lower_bound(0);
2117 TH1 *total_var_up = static_cast<TH1*>(m_flav_eigen[flavour][lowest_index].first->Clone());
2118 TH1 *total_var_down = static_cast<TH1*>(m_flav_eigen[flavour][lowest_index].second->Clone());
2119 total_var_up->SetDirectory(0);
2120 total_var_down->SetDirectory(0);
2121
2122 total_var_up->Reset();
2123 total_var_down->Reset();
2124
2125 // sum all other variations
2126 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it) {
2127 size_t actual_index = *subset_it;
2128
2129 if (actual_index != lowest_index) toDelete.insert(*subset_it); //
2130
2131 TH1 *partial_var_up = static_cast<TH1*>(m_flav_eigen[flavour][actual_index].first->Clone());
2132 TH1 *partial_var_down = static_cast<TH1*>(m_flav_eigen[flavour][actual_index].second->Clone());
2133 partial_var_up->SetDirectory(0);
2134 partial_var_down->SetDirectory(0);
2135
2136 partial_var_up->Add(result, -1.0);
2137 partial_var_down->Add(result, -1.0);
2138 for (int i = 0; i < nbins; ++i) {
2139 partial_var_down->SetBinContent(i, -1.0*partial_var_down->GetBinContent(i));
2140 }
2141
2142 for (int u = 0; u < nbins; ++u) {
2143 double sum_up = total_var_up->GetBinContent(u);
2144 double sum_down = total_var_down->GetBinContent(u);
2145 for (int v = 0; v < nbins; ++v) {
2146 sum_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
2147 sum_H_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
2148 sum_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
2149 sum_H_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
2150 }
2151 total_var_up->SetBinContent(u, sum_up);
2152 total_var_down->SetBinContent(u, sum_down);
2153 }
2154 delete partial_var_up;
2155 delete partial_var_down;
2156 }
2157
2158 // final part of complex summing
2159 for (int i = 0; i < nbins; ++i) {
2160 if (sum_H_up != 0.0)
2161 total_var_up->SetBinContent(i, total_var_up->GetBinContent(i)/sqrt(sum_H_up));
2162 else
2163 total_var_up->SetBinContent(i, 0.0);
2164 if (sum_H_down != 0.0)
2165 total_var_down->SetBinContent(i, -1.0*total_var_down->GetBinContent(i)/sqrt(sum_H_down));
2166 else
2167 total_var_down->SetBinContent(i, 0.0);
2168 }
2169
2170 total_var_up->Add(result);
2171 total_var_down->Add(result);
2172
2173 m_flav_eigen[flavour][lowest_index].first = total_var_up;
2174 m_flav_eigen[flavour][lowest_index].second = total_var_down;
2175 }
2176
2177 removeVariations(toDelete, flavour);
2178}
2179
2180
2181//________________________________________________________________________________
2182void
2184{
2185 if (set.size() == 0) return;
2186 std::vector<std::pair<TH1*, TH1*> > new_eigen;
2187 std::vector<std::pair<TH1*, TH1*>> eigen = m_flav_eigen[flav];
2188 for (size_t index = 0; index < eigen.size(); ++index){
2189 if (set.count(index) == 0) {
2190 new_eigen.push_back(eigen[index]);
2191 } else {
2192 delete eigen[index].first; delete eigen[index].second;
2193 }
2194 }
2195
2196 m_flav_eigen[flav] = std::move(new_eigen);
2197
2198}
2199
2200//________________________________________________________________________________
2201void
2203{
2204 IndexSet simple_set;
2205
2206 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
2207 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it)
2208 simple_set.insert(*subset_it);
2209 }
2210
2211 removeVariations(simple_set, flav);
2212}
const boost::regex ref(r_ef)
ClassImp(CalibrationDataEigenVariations) CalibrationDataEigenVariations
std::vector< std::string > split(const std::string &str, const char token=';')
local utility function: split string into a vector of substrings separated by a specified separator,...
static Double_t ss
This is the interface for the objects to be stored in the calibration ROOT file.
CalibrationDataHistogramContainer * m_cnt
container object containing the basic information
bool m_initialized
flag whether the initialization has been carried out
unsigned int getNumberOfNamedVariations() const
retrieve the number of named variations
void removeVariations(const IndexSet &set)
remove all variations in the given set
std::map< std::string, unsigned int > m_namedIndices
named variations
std::vector< std::pair< TH1 *, TH1 * > > m_eigen
eigenvector variations
TMatrixD getJacobianReductionMatrix()
matrix to remove unecessary rows and columns from covariance
bool getEigenvectorVariation(unsigned int variation, TH1 *&up, TH1 *&down)
obtain the "up" and "down" variations for the given eigenvector number.
int m_namedExtrapolation
named variation index for the special case of extrapolation uncertainties
void mergeVariationsFrom(const size_t &index)
merge all variations starting from the given index
TMatrixDSym getEigenCovarianceMatrixFromVariations()
covariance matrix corresponding to eigenvector variations constructed from the eigen-variation
bool isExtrapolationVariation(unsigned int nameIndex) const
flag whether the given index corresponds to an extrapolation variation
unsigned int getNumberOfEigenVariations()
retrieve the number of eigenvector variations
void excludeNamedUncertainty(const std::string &name, CalibrationDataContainer *cnt)
exclude the source of uncertainty indicated by name from eigenvector calculations
bool m_statVariations
indicate whether statistical uncertainties are stored as variations
bool getNamedVariation(const std::string &name, TH1 *&up, TH1 *&down)
obtain the "up" and "down" variations for the named uncertainty.
virtual void initialize(double min_variance=1.0E-20)
carry out the eigenvector computations.
virtual TMatrixDSym getEigenCovarianceMatrix()
also provide (some) access to the underlying information: covariance matrix corresponding to eigenvec...
unsigned int getNamedVariationIndex(const std::string &name) const
retrieve the integer index corresponding to the named variation.
std::vector< std::pair< TH1 *, TH1 * > > m_named
void mergeVariations(const IndexSet &set)
merge all variations in the given set
std::vector< std::string > listNamedVariations() const
list the named variations
CalibrationDataEigenVariations(const std::string &cdipath, const std::string &tagger, const std::string &wp, const std::string &jetcollection, CalibrationDataHistogramContainer *cnt, bool excludeRecommendedUncertaintySet=false, bool base=true)
normal constructor.
bool EigenVectorRecomposition(const std::string &label, std::map< std::string, std::map< std::string, float > > &coefficientMap)
Eigenvector recomposition method.
std::map< std::string, std::vector< std::pair< TH1 *, TH1 * > > > m_flav_named
void initialize(double min_variance=1.0E-6)
carry out the eigenvector computations.
void removeVariations(const IndexSet &set, std::string &flav)
void excludeNamedUncertainty(const std::string &name, const std::string &flavour)
bool getEigenvectorVariation(const std::string &flavour, unsigned int variation, TH1 *&up, TH1 *&down)
std::map< std::string, Analysis::CalibrationDataHistogramContainer * > m_histcontainers
CalibrationDataGlobalEigenVariations(const std::string &cdipath, const std::string &tagger, const std::string &wp, const std::string &jetcollection, const std::vector< std::string > &flavours, CalibrationDataHistogramContainer *cnt, bool excludeRecommendedUncertaintySet=false)
TMatrixDSym getEigenCovarianceMatrix()
also provide (some) access to the underlying information: covariance matrix corresponding to eigenvec...
std::map< std::string, std::map< std::string, unsigned int > > m_flav_namedIndices
unsigned int getNamedVariationIndex(const std::string &name, const std::string &flavour) const
bool isExtrapolationVariation(unsigned int nameIndex, const std::string &flavour) const
std::map< std::string, std::vector< std::pair< TH1 *, TH1 * > > > m_flav_eigen
void mergeVariations(const IndexSet &set, std::string &flav)
void mergeVariationsFrom(const size_t &index, std::string &flav)
bool getNamedVariation(const std::string &flavour, const std::string &name, TH1 *&up, TH1 *&down)
This is the class holding information for histogram-based calibration results.
CalibrationDataEigenVariations(const std::string &cdipath, const std::string &tagger, const std::string &wp, const std::string &jetcollection, CalibrationDataHistogramContainer *cnt, bool excludeRecommendedUncertaintySet=false, bool base=true)
normal constructor.
This is the class holding information for histogram-based calibration results.
STL iterator class.
STL iterator class.
void zero(TH2 *h)
zero the contents of a 2d histogram
int r
Definition globals.cxx:22
std::string find(const std::string &s)
return a remapped string
Definition hcg.cxx:138
std::string base
Definition hcg.cxx:81
bool verbose
Definition hcg.cxx:73
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
std::string label(const std::string &format, int i)
Definition label.h:19
static TFile * fout
Definition listroot.cxx:40
std::vector< std::string > split(const std::string &str, const char token=';')
local utility function: split string into a vector of substrings separated by a specified separator,...
row
Appending html table to final .html summary file.
float j(const xAOD::IParticle &, const xAOD::TrackMeasurementValidation &hit, const Eigen::Matrix3d &jab_inv)
const std::string tagger
Definition index.py:1
void initialize()