ATLAS Offline Software
Loading...
Searching...
No Matches
CalibrationDataEigenVariations.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
6// CalibrationDataEigenVariations.cxx, (c) ATLAS Detector software
8
12
13#include <iomanip>
14#include <iostream>
15#include <limits>
16#include <set>
17#include <algorithm>
18#include <string>
19#include <cmath>
20#include <vector>
21
22#include "TH1.h"
23#include "TH2.h"
24#include "TVectorT.h"
25#include "TDecompSVD.h"
26#include "TMatrixDSymEigen.h"
27#include "TMatrixT.h"
28#include "TROOT.h"
29#include "TFile.h"
30
31#include <memory.h>
32
36using std::string;
37using std::map;
38using std::vector;
39using std::set;
40using std::pair;
41using std::setw;
42using std::setprecision;
43
44namespace {
45
46 // Utility functions...
47 void validate_reduction(int matrixsize, TMatrixDSym& corr, std::vector<std::pair<TH1*, TH1*>>& eigenvars, TH1* result, const std::string& scheme, const std::string& tagger, const std::string& wp, const std::string& jetauthor){
48 // Validating the reduction schemes - you can compare the correlation matrix constructed before and after eigenvector decomposition
49 // using whatever eigenvectors are available at either stage. The less difference, the better.
50
51 // This function simply constructs the relevant matrix relative percentage difference comparison, and saves to file of your choosing.
52 // Saving to EOS is recommended - this can then be accessed, downloaded, and plotted with whatever plotting script you prefer.
53
54 // Each systematic strategy scheme has a different way to construct the approximate covariance matrix
55 TMatrixDSym approx_cov(matrixsize); // matrix with zeros
56
57 if (scheme == "SFEigen"){
58 for (const std::pair<TH1*,TH1*>& var : eigenvars){
59 // we want to make the tensor product of (purely) the variation with itself, and store it in a TMatrix
60 // ---> get the pure variations by subtracting the result from the up_variation combined eigenvariation histogram
61 TH1* pure_var = static_cast<TH1*>(var.first->Clone()); pure_var->SetDirectory(0);
62 pure_var->Add(result, -1.0); // do the subtraction, get variation value histogram
63 // ---> convert the pure variation into a vector of doubles
64 std::vector<double> pure_var_vec;
65 for (int binz = 1 ; binz <= pure_var->GetNbinsZ() ; ++binz){
66 for (int biny = 1 ; biny <= pure_var->GetNbinsY() ; ++biny){
67 for (int binx = 1 ; binx <= pure_var->GetNbinsX() ; ++binx){
68 pure_var_vec.push_back(pure_var->GetBinContent(pure_var->GetBin(binx,biny,binz)));
69 }
70 }
71 }
72 // then compute the covariance matrix, and sum into the total (approximate) covariance matrix
73 for(int i = 0 ; i < corr.GetNrows() ; i++){
74 for(int j = 0 ; j < corr.GetNcols() ; j++){
75 approx_cov(i,j) += (pure_var_vec[i])*(pure_var_vec[j]); // do the tensor product
76 }
77 }
78 } // end of (reduced set of) eigenvariations loop
79
80 } else if (scheme == "SFGlobalEigen"){
81 TH1* comb_result = result; // renaming to match the intended usage...
82
83 for (const std::pair<TH1*,TH1*>& var : eigenvars){
84 // we want to make the tensor product of (purely) the variation with itself, and store it in a TMatrix
85 // get the pure variations by subtracting the comb_result from the up_variation combined eigenvariation histogram
86 TH1* pure_var = static_cast<TH1*>(var.first->Clone()); pure_var->SetDirectory(0);
87 pure_var->Add(comb_result, -1.0); // do the subtraction, get variation value histogram
88 for(int i = 0 ; i < approx_cov.GetNrows() ; i++){
89 for(int j = 0 ; j < approx_cov.GetNcols() ; j++){
90 approx_cov(i,j) += (pure_var->GetBinContent(i))*(pure_var->GetBinContent(j)); // do the tensor product
91 }
92 }
93 } // end of (reduced set of) eigenvariations loop
94
95 } else {
96 std::cout << " Error: The scheme " << scheme << " didn't match SFEigen or SFGlobalEigen." << std::endl;
97 return;
98 }
99
100 // make the total correlation matrix from the (approximate) total covariance matrix
101 TMatrixDSym approx_corr(approx_cov);
102 for(int row = 0 ; row < approx_cov.GetNrows() ; row++){
103 for(int col = 0 ; col < approx_cov.GetNcols() ; col++){
104 double rowvar = sqrt(approx_cov(row,row));
105 double colvar = sqrt(approx_cov(col,col));
106 approx_corr(row,col) = approx_cov(row,col)/(rowvar * colvar); // divide each element by the variance
107 }
108 }
109 // subtract the approximate correlation matrix from the true correlation matrix, and save to file
110 TMatrixDSym numerator = (approx_corr - corr);
111 TMatrixDSym denominator = corr;
112 TMatrixDSym correlation_comparison(matrixsize);
113 for(int row = 0 ; row < approx_corr.GetNrows() ; row++){
114 for(int col = 0 ; col < approx_corr.GetNcols() ; col++){
115 if (denominator(row,col)==0.0){
116 correlation_comparison(row,col) = 0.0;
117 } else {
118 correlation_comparison(row,col) = 100*std::abs(numerator(row,col)/denominator(row,col)); // store relative percent errors
119 }
120 }
121 }
122
123 string outputname = "Validate_" + scheme + "_" + tagger + "_" + wp + "_" + jetauthor + "_" + std::to_string(eigenvars.size()) + "vars" ;
124 string eospath = "/your/path/here/plotting/"; // <------ Place you want to store plots. Obviously, change this
125 string outputfilename = eospath + outputname + ".root";
126 std::cout << "ATTEMPTING TO WRITE " << outputfilename << " TO FILE..." << std::endl;
127 TFile fout(outputfilename.c_str(), "recreate");
128 fout.cd();
129 std::unique_ptr<TH2D> hout(new TH2D(correlation_comparison));
130
131 // Note: you can also look at the other matrices involved - save them to file and plot them if you wish
132 hout->SetTitle("Correlation Matrix Comparison");
133 hout->Write();
134 fout.Close();
135
136 }
137
138
139
140
141 // The covariance matrices constructed by the following two functions adhere to the TH1 binning conventions.
142 // But in order to avoid unnecessary overhead, the actual dimensionality of the histograms is accounted for.
143
144 // Construct the (diagonal) covariance matrix for the statistical uncertainties on the "ref" results
145 // Note that when statistical uncertainties are stored as variations, they are already accounted for and hence should not be duplicated; hence they are dummy here
146 // (this is not very nice, in that the result of this function will technically be wrong,
147 // but the statistical uncertainty covariance matrix is anyway not separately visible to the outside world)
148 TMatrixDSym getStatCovarianceMatrix(const TH1* hist, bool zero) {
149 Int_t nbinx = hist->GetNbinsX()+2, nbiny = hist->GetNbinsY()+2, nbinz = hist->GetNbinsZ()+2;
150 Int_t rows = nbinx;
151 if (hist->GetDimension() > 1) rows *= nbiny;
152 if (hist->GetDimension() > 2) rows *= nbinz;
153
154 // // the "2" below doesn't actually imply that two bins are used...
155 // // this is just to make the loops below work
156 // if (ref->GetDimension() <= 1) nbiny = 2;
157 // if (ref->GetDimension() <= 2) nbinz = 2;
158
159 TMatrixDSym stat(rows);
160 for (Int_t binx = 1; binx < nbinx-1; ++binx)
161 for (Int_t biny = 1; biny < nbiny-1; ++biny)
162 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
163 Int_t bin = hist->GetBin(binx, biny, binz);
164 double err = zero ? 0 : hist->GetBinError(bin);
165 stat(bin, bin) = err*err;
166 }
167 return stat;
168 }
169
170 // Construct the covariance matrix assuming that histogram "unc" contains systematic uncertainties
171 // pertaining to the "ref" results, and that the uncertainties are fully correlated from bin to bin
172 // (unless option "doCorrelated" is false, in which bins are assumed uncorrelated).
173 // One exception is made for the uncorrelated case: if we are dealing with a "continuous" calibration
174 // object, then a full correlation is still assumed between different tag weight bins.
175 TMatrixDSym getSystCovarianceMatrix(const TH1* ref, const TH1* unc, bool doCorrelated, int tagWeightAxis) {
176 Int_t nbinx = ref->GetNbinsX()+2, nbiny = ref->GetNbinsY()+2, nbinz = ref->GetNbinsZ()+2;
177 Int_t rows = nbinx;
178 if (ref->GetDimension() > 1) rows *= nbiny;
179 if (ref->GetDimension() > 2) rows *= nbinz;
180
181 TMatrixDSym cov(rows);
182
183 // Carry out a minimal consistency check
184 if (unc->GetNbinsX()+2 != nbinx || unc->GetNbinsY()+2 != nbiny || unc->GetNbinsZ()+2 != nbinz || unc->GetDimension() != ref->GetDimension()) {
185 std::cout << "getSystCovarianceMatrix: inconsistency found in histograms " << ref->GetName() << " and " << unc->GetName() << std::endl;
186 return cov;
187 }
188
189 // // the "2" below doesn't actually imply that two bins are used...
190 // // this is just to make the loops below work
191 // if (ref->GetDimension() <= 1) nbiny = 2;
192 // if (ref->GetDimension() <= 2) nbinz = 2;
193
194 // Special case: uncertainties not correlated from bin to bin.
195 // The exception here is for tag weight bins, which are always assumed to be fully correlated.
196 if (! doCorrelated) {
197 if (tagWeightAxis < 0) {
198 // truly uncorrelated uncertainties
199 for (Int_t binx = 1; binx < nbinx-1; ++binx)
200 for (Int_t biny = 1; biny < nbiny-1; ++biny)
201 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
202 Int_t bin = ref->GetBin(binx, biny, binz);
203 double err = unc->GetBinContent(bin);
204 cov(bin,bin) = err*err;
205 }
206 return cov;
207 } else if (tagWeightAxis == 0) {
208 // continuous histogram with tag weight X axis
209 for (Int_t biny = 1; biny < nbiny-1; ++biny)
210 for (Int_t binz = 1; binz < nbinz-1; ++binz)
211 for (Int_t binx = 1; binx < nbinx-1; ++binx) {
212 Int_t bin = ref->GetBin(binx, biny, binz);
213 double err = unc->GetBinContent(bin);
214 for (Int_t binx2 = 1; binx2 < nbinx-1; ++binx2) {
215 Int_t bin2 = ref->GetBin(binx2, biny, binz);
216 double err2 = unc->GetBinContent(bin2);
217 cov(bin,bin2) = err*err2;
218 }
219 }
220 return cov;
221 } else if (tagWeightAxis == 1) {
222 // continuous histogram with tag weight Y axis
223 for (Int_t binx = 1; binx < nbinx-1; ++binx)
224 for (Int_t binz = 1; binz < nbinz-1; ++binz)
225 for (Int_t biny = 1; biny < nbiny-1; ++biny) {
226 Int_t bin = ref->GetBin(binx, biny, binz);
227 double err = unc->GetBinContent(bin);
228 for (Int_t biny2 = 1; biny2 < nbiny-1; ++biny2) {
229 Int_t bin2 = ref->GetBin(binx, biny2, binz);
230 double err2 = unc->GetBinContent(bin2);
231 cov(bin,bin2) = err*err2;
232 }
233 }
234 return cov;
235 } else if (tagWeightAxis == 2) {
236 // continuous histogram with tag weight Z axis
237 for (Int_t binx = 1; binx < nbinx-1; ++binx)
238 for (Int_t biny = 1; biny < nbiny-1; ++biny)
239 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
240 Int_t bin = ref->GetBin(binx, biny, binz);
241 double err = unc->GetBinContent(bin);
242 for (Int_t binz2 = 1; binz2 < nbinz-1; ++binz2) {
243 Int_t bin2 = ref->GetBin(binx, biny, binz2);
244 double err2 = unc->GetBinContent(bin2);
245 cov(bin,bin2) = err*err2;
246 }
247 }
248 return cov;
249 }
250 }
251
252 for (Int_t binx = 1; binx < nbinx-1; ++binx)
253 for (Int_t biny = 1; biny < nbiny-1; ++biny)
254 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
255 Int_t bin = ref->GetBin(binx, biny, binz);
256 double err = unc->GetBinContent(bin); // <------------- For every bin in the "ref" ("result") TH1*, GetBinContents of the corresponding uncertainty bin
257 for (Int_t binx2 = 1; binx2 < nbinx-1; ++binx2)
258 for (Int_t biny2 = 1; biny2 < nbiny-1; ++biny2)
259 for (Int_t binz2 = 1; binz2 < nbinz-1; ++binz2) {
260 Int_t bin2 = ref->GetBin(binx2, biny2, binz2);
261 double err2 = unc->GetBinContent(bin2); // <------- Grab the unc contents of every bin, and compute the covariance matrix element
262 cov(bin, bin2) = err*err2; // <------- err1 and err2 are the uncertainty content of the bins, so "cov" is real, symmetric
263 } // <------- "cov" would imply that the "hunc" histogram stores "x - E[x]" differences from the mean. So in the end, it computes the covariance (as a sum of these)
264 }
265 return cov;
266 }
267
268
269 // <---------------- Custom covariance matrix method - W.L.
270 TMatrixDSym getSystCovarianceMatrix(const vector<double>& unc){
271 int rows = unc.size();
272 TMatrixDSym cov(rows);
273
274 for(int i = 0 ; i < rows ; i++){
275 for(int j = 0 ; j < rows ; j++){
276 double err1 = unc[i];
277 double err2 = unc[j];
278 cov(i,j) = err1*err2;
279 }
280 }
281 return cov;
282 }
283
284 // <---------------- Custom covariance matrix method - W.L.
285 TMatrixDSym getStatCovarianceMatrix(const std::vector<double>& unc, bool zero) {
286 // This is only a stop-gap solution, need to better consider how to do this for 1D,2D,3D binning...
287 int rows = unc.size();
288 TMatrixDSym stat(rows);
289 for (int i = 0; i < rows; i++) {
290 double err = zero ? 0 : unc[i];
291 stat(i,i) = err*err;
292 }
293 return stat;
294 }
295
296
297}
298
300// //
301// CalibrationDataEigenVariations //
302// //
303// This class is intended to provide a more proper way to deal with correlations between //
304// calibration bins than would be possible using directly the calibration containers' //
305// methods (the general issue is with the fact that the interface methods are intended to //
306// be use on a jet by jet basis; in this context it is not straightforward to account for //
307// correlations). //
308// // <-------- Many CDIHistogramContainers in one CDI File
309// A CalibrationDataEigenVariations object is associated with a specific // <-------- one CDEigenVariations object per CDHistogramContainer
310// CalibrationDataHistogramContainer. It starts by constructing the covariance matrix from //
311// the information provided by the container. Subsequently it diagonalises this covariance // <-------- Each CDHistogramContainer stores hists of central values + systematic variations
312// matrix (this is a standard eigenvalue problem, hence the name of the class), and stores //
313// the result as 'variation' histograms (representing the eigenvectors multiplied by the //
314// square root of the corresponding eigenvalues). //
315// Since it is possible for systematic uncertainty contributions to be correlated with //
316// corresponding uncertainties in physics analyses, it is possible to exclude such sources //
317// of uncertainty from being used in the construction of the covariance matrix (this is //
318// since effects from the original sources of uncertainty cannot be traced anymore after //
319// application of the eigenvalue decomposition). It is still possible to evaluate correctly //
320// these uncertainties in the form of so-called 'named variations' (see class //
321// CalibrationDataInterfaceROOT); however this will always treat uncertainties as being //
322// fully correlated (or anti-correlated) between calibration bins, so it is recommended not //
323// to exclude uncertainties that are not correlated between bins from the eigenvector //
324// decomposition. //
326
327#ifndef __CINT__
329#endif
330//________________________________________________________________________________
331CalibrationDataEigenVariations::CalibrationDataEigenVariations(const std::string& cdipath, const std::string& tagger, const std::string& wp, const std::string& jetcollection, CalibrationDataHistogramContainer* cnt, bool excludeRecommendedUncertaintySet, bool base) :
332m_cnt(cnt), m_initialized(false), m_validate(false), m_namedExtrapolation(-1), m_statVariations(false), m_cdipath(cdipath), m_taggername(tagger), m_wp(wp), m_jetauthor(jetcollection), m_totalvariance(0), m_capturedvariance(0), m_verbose(false)
333{
334
335 if (m_verbose) std::cout << " CDEV Constructor : info " << cdipath << " " << tagger << " " << wp << " " << jetcollection << std::endl;
336 // if specified, add items recommended for exclusion from EV decomposition by the calibration group to the 'named uncertainties' list
337 if (excludeRecommendedUncertaintySet && base) {
338 std::vector<std::string> to_exclude = split(m_cnt->getExcludedUncertainties());
339 for (const auto& name : to_exclude) {
340 excludeNamedUncertainty(name, const_cast<CalibrationDataHistogramContainer*>(cnt));
341 }
342 if (to_exclude.size() == 0) {
343 std::cerr << "CalibrationDataEigenVariations warning: exclusion of pre-set uncertainties list requested but no (or empty) list found" <<std::endl;
344 }
345 }
346 // also flag if statistical uncertainties stored as variations (this typically happens as a result of smoothing / pruning of SF results)
347 vector<string> uncs = m_cnt->listUncertainties(); // <-------- The "listUncertainties" method retrieves the list of "information" in the CDIHistogramContainer, basically the "list" output from "showCalibration"
348 for (const auto& name : uncs) {
349 if (name.find("stat_np") != string::npos) m_statVariations = true;
350 }
351
352}
353
354//________________________________________________________________________________
356{
357 // delete all variation histograms owned by us
358 for (vector<pair<TH1*, TH1*> >::iterator it = m_eigen.begin(); it != m_eigen.end(); ++it) {
359 delete it->first;
360 delete it->second;
361 }
362
363 for (vector<pair<TH1*, TH1*> >::iterator it = m_named.begin(); it != m_named.end(); ++it) {
364 delete it->first;
365 delete it->second;
366 }
367}
368
369//________________________________________________________________________________
370void
372{
373 // Exclude the source of uncertainty identified by the given name from being used
374 // in the construction of the covariance matrix to be diagonalised.
375 // Notes:
376 // - Some names returned by CalibrationDataContainer::listUncertainties() are not
377 // meaningful in this context, and specifying them is not allowed.
378 // - Once the eigenvector diagonalisation has been carried out, this method may
379 // not be used anymore.
380
381 if (m_initialized) {
382 std::cerr << "CalibrationDataEigenVariations::excludeNamedUncertainty error:" << " initialization already done" << std::endl;
383 } else if (name == "comment" || name == "result" || name == "systematics" ||
384 name == "statistics" || name == "combined" || name == "extrapolation" ||
385 name == "MCreference" || name == "MChadronisation" || name == "ReducedSets" ||
386 name == "excluded_set" || name == "" || name == " ") // <--- these last two handle some cases that may turn up
387 {
388 std::cerr << "CalibrationDataEigenVariations::excludeNamedUncertainty error:" << " name " << name << " not allowed" << std::endl;
389 }
390 // in case multiple uncertainties should be discarded
391 else if (name.back() == '*'){
392 std::string temp_name = name.substr(0, name.size()-1); //remove "*"
393 std::vector<std::string> uncs = m_cnt->listUncertainties();
394 std::vector<std::string> unc_subgroup;
395 std::copy_if(uncs.begin(), uncs.end(), back_inserter(unc_subgroup),
396 [&temp_name](const std::string& el) {
397 return el.compare(0, temp_name.size(), temp_name) == 0;
398 });
399 if (m_verbose) std::cout <<"Found a group of uncertainties to exclude: " <<name <<" found " <<unc_subgroup.size() <<" uncertainties corresponding to the query" <<std::endl;
400 for (const auto& single_name : unc_subgroup){
401 // only really add if the entry is not yet in the list
402 if (m_namedIndices.find(single_name) == m_namedIndices.end()) {
403 if (m_verbose) std::cout << "Name : " << single_name << std::endl;
404 m_named.push_back(std::pair<TH1*, TH1*>(0, 0));
405 m_namedIndices[single_name] = m_named.size()-1;
406 }
407 }
408 }
409 else if (! cnt->GetValue(name.c_str())){
410 std::cerr << "CalibrationDataEigenVariations::excludeNamedUncertainty error:" << " uncertainty named " << name << " not found" << std::endl;
411 }
412}
413
414//________________________________________________________________________________
415TMatrixDSym
417{
418 // Construct the covariance matrix that is to be diagonalised.
419 // Note that extrapolation uncertainties (identified by the keyword "extrapolation,
420 // this will pertain mostly to the extrapolation to high jet pt) are always excluded
421 // since by definition they will not apply to the normal calibration bins. Instead
422 // this uncertainty has to be dealt with as a named variation. In addition there are
423 // other items ("combined", "systematics") that will not be dealt with correctly
424 // either and hence are excluded as well).
425 //
426 // Note that if an explicit covariance matrix is supplied (at present this may be
427 // the case only for statistical uncertainties: in the case of "continuous tagging",
428 // multinomial statistics applies so bin-to-bin correlations exist) this will be
429 // used instead of constructing the statistical uncertainties' covariance matrix on
430 // the fly.
431
432 // retrieve the central calibration
433 TH1* result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
434 if (not result){
435 std::cerr<<"CalibrationDataEigenVariations::getEigenCovarianceMatrix(): failed dynamic cast to TH1*\n";
436 return TMatrixDSym();
437 }
438
439 // loop over the uncertainties to construct the covariance matrix for all uncertainties
440 // to be addressed using the eigenvector method.
441
442 // First, treat the statistics separately.
443 // Account for the possibility that this is handled as a (non-trivial) covariance matrix
444 TMatrixDSym* sCov = dynamic_cast<TMatrixDSym*>(m_cnt->GetValue("statistics"));
445
446 // Alternatively, statistical uncertainties may be accounted for as variations (i.e., much like systematic uncertainties).
447 // In that case, we create a null matrix here and add the statistical contributions along with the systematic ones.
448 TMatrixDSym cov = (sCov) ? *sCov : getStatCovarianceMatrix(result, m_statVariations);
449
450 // Then loop through the list of (other) uncertainties
451 std::vector<string> uncs = m_cnt->listUncertainties();
452 for (unsigned int t = 0; t < uncs.size(); ++t) {
453 // entries that should never be included
454 if (uncs[t] == "comment" || uncs[t] == "result" || uncs[t] == "combined" ||
455 uncs[t] == "statistics" || uncs[t] == "systematics" || uncs[t] == "MCreference" ||
456 uncs[t] == "MChadronisation" || uncs[t] == "extrapolation" || uncs[t] == "ReducedSets" ||
457 uncs[t] == "excluded_set") continue;
458 // entries that can be excluded if desired
459 if (m_namedIndices.find(uncs[t]) != m_namedIndices.end()) continue;
460 TH1* hunc = dynamic_cast<TH1*>(m_cnt->GetValue(uncs[t].c_str()));
461 if (not hunc){
462 std::cerr<< "CalibrationDataEigenVariations::getEigenCovarianceMatrix(): dynamic cast failed\n";
463 continue;
464 }
465 cov += getSystCovarianceMatrix(result, hunc, m_cnt->isBinCorrelated(uncs[t]), m_cnt->getTagWeightAxis()); // <-------- This is where we add the covariance matrices together to form the "total covariance"
466 }
467
468 return cov;
469}
470
471//________________________________________________________________________________
472TMatrixDSym
474{
475 // Construct the (Eigen-variation part of the) covariance matrix from the individual variations.
476 // This must be called _after_ initialize()!
477
478 // retrieve the central calibration
479 TH1 *result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
480 TMatrixD jac = getJacobianReductionMatrix();
481 int nbins = jac.GetNcols();
482 TMatrixDSym cov(nbins);
483 auto variation = std::make_unique<double[]>(nbins);
484
485 for (const std::pair<TH1*, TH1*>& it : m_eigen){ // m_eigen is vector of pairs of TH1* which point to TH1's representing the upVariation and downVariation respectively
486 TH1* resultVariedUp = it.first; // <--------------- This is the "result" but "varied" upwards - i.e. how the result would look like if the systematic "it" was imposed
487 for (unsigned int u = 0; u < (unsigned int) nbins; ++u) variation[u] = resultVariedUp->GetBinContent(u) - result->GetBinContent(u);
488 for (int u = 0; u < nbins; ++u){
489 for (int v = 0; v < nbins; ++v){
490 cov(u, v) += variation[u]*variation[v];
491 }
492 }
493 }
494
495 return cov;
496}
497
498//________________________________________________________________________________
499TMatrixD
501{
502 // Construct the matrix that removes the rows and columns that fail to influence
503 // the eigen-variations. To reduce the covariance matrix, do the following:
504 //
505 // TMatrixDSym cov = getEigenCovarianceMatrix();
506 // TMatrixDSym jac = getJacobianReductionMatrix(); // <------------ This is an upper triangular matrix of 1's. Similarity transformation will do...
507 // TMatrixDSym redSystematicCovMatrix = cov.Similarity(jac);
508
509 // retrieve the central calibration
510 TH1* result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
511 if (not result){
512 std::cerr<<"CalibrationDataEigenVariations::getJacobianReductionMatrix(): dynamic cast failed\n";
513 return TMatrixD();
514 }
515
516 // loop over the uncertainties to construct the covariance matrix for all uncertainties
517 // to be addressed using the eigenvector method.
518
519 // Retrieve the un-compressed Eigenvector variation covariance matrix
520 // (only needed to check for unexpected singularities)
521 TMatrixDSym cov = getEigenCovarianceMatrix();
522
523 // Compute the original number of bins
524 int nbins = result->GetNbinsX()+2;
525 int ndim = result->GetDimension();
526 if (ndim > 1) nbins*= (result->GetNbinsY()+2);
527 if (ndim > 2) nbins*= (result->GetNbinsZ()+2);
528
529 // Start by "compressing" the covariance matrix (removing columns/rows containing zeros only)
530 int nZeros=0;
531 std::vector<int> zeroComponents;
532 if (cov.GetNrows() != nbins) {
533 std::cerr << " error: covariance matrix size (" << cov.GetNrows() << ") doesn't match histogram size (" << nbins << ")" << std::endl;
534 return TMatrixDSym();
535 }
536
537 // First flag all the zeros
538 for (int i = 0; i < nbins; ++i) {
539 // Directly identify the under- and overflow bins
540 Int_t binx, biny, binz;
541 result->GetBinXYZ(i, binx, biny, binz);
542 if ((binx == 0 || binx == result->GetNbinsX()+1) ||
543 (ndim > 1 && (biny == 0 || biny == result->GetNbinsY()+1)) ||
544 (ndim > 2 && (binz == 0 || binz == result->GetNbinsZ()+1)))
545 {
546 ++nZeros;
547 zeroComponents.push_back(i);
548 }
549 // Try a first (quick) identification of rows/columns of zeros by the first element in each row
550 // If "successful", check the whole row in more detail
551 else if (fabs(cov(i,0)) < 1e-10) {
552 bool isThereANonZero(false);
553 for (int j = 0; j < nbins; ++j) {
554 if (fabs(cov(i,j)) > 1e-10) {
555 isThereANonZero=true; break;
556 }
557 }
558 if (! isThereANonZero) {
559 ++nZeros;
560 zeroComponents.push_back(i);
561 }
562 }
563 }
564
565 // **** COMMENTED OUT FOR NOW.
566 // Leave it here in case the calibration method will change again in the future.
567 // No need to reweight the SF by the efficiency of that bin (MCreference always = 0)
568
569 // Determine whether the container is for "continuous" calibration.
570 // This is important since the number of independent scale factors (for each pt or eta bin)
571 // is reduced by 1 compared to the number of tag weight bins (related to the fact that the fractions
572 // of events in tag weight bins have to sum up to unity).
573 // int axis = m_cnt->getTagWeightAxis();
574 // bool doContinuous = false; unsigned int weightAxis = 0;
575
576 // if (axis >= 0) {
577 // doContinuous = true;
578 // // weightAxis = (unsigned int) axis;
579 // // In this case, verify that the special "uncertainty" entry that is in fact the reference MC tag
580 // // weight fractions is present. These tag weight fractions are needed in order to carry out the
581 // // diagonalisation successfully.
582 // if (! dynamic_cast<TH1*>(m_cnt->GetValue("MCreference"))) {
583 // std::cerr << " Problem: continuous calibration object found without MC reference tag weight histogram " << std::endl;
584 // return TMatrixDSym();
585 // }
586 // }
587
588 // Only relevant for continuous calibration containers, but in order to void re-computation we
589 // retrieve them here
590 // Int_t nbinx = result->GetNbinsX()+2, nbiny = result->GetNbinsY()+2, nbinz = result->GetNbinsZ()+2;
591
592 // // If we are indeed dealing with a "continuous" calibration container, ignore one tag weight row
593 // const int skipTagWeightBin = 1; // NB this follows the histogram's bin numbering
594 // if (doContinuous) {
595 // for (Int_t binx = 1; binx < nbinx-1; ++binx)
596 // for (Int_t biny = 1; biny < nbiny-1; ++biny)
597 // for (Int_t binz = 1; binz < nbinz-1; ++binz) {
598 // if ((weightAxis == 0 && binx == skipTagWeightBin) ||
599 // (weightAxis == 1 && biny == skipTagWeightBin) ||
600 // (weightAxis == 2 && binz == skipTagWeightBin)) {
601 // // At this point we simply add these to the 'null' elements
602 // ++nZeros;
603 // zeroComponents.push_back(result->GetBin(binx, biny, binz));
604 // }
605 // }
606 // }
607
608 if (nZeros >= nbins) {
609 std::cerr << " Problem. Found n. " << nZeros << " while size of matrix is " << nbins << std::endl;
610 return TMatrixDSym();
611 }
612
613 int size = nbins - nZeros; // <--- Tis the size of the matrix removing zeros from the covariance matrix
614
615 TMatrixT<double> matrixVariationJacobian(size,nbins);
616 int nMissed=0;
617 for (int i = 0; i < nbins; ++i) { //full size
618 bool missed=false;
619 for (unsigned int s = 0; s < zeroComponents.size(); ++s) { // <-------- Basically what this does is it flags "missed" for a given "i" of the full bin size
620 if (zeroComponents.at(s) == i) { // <-------- if "i" is in "zeroComponents". Breaks (because it found that it's to be missed)
621 missed = true;
622 break;
623 }
624 }
625 if (missed) { // <-------- Finally, if "i" is to be missed, increase "nMissed" by one, and....
626 ++nMissed;
627 continue;
628 }
629 matrixVariationJacobian(i-nMissed,i)=1; // <-------- ... this ALWAYS adds a one. If zero "nMissed", add to diagonal. otherwise off-diagonal
630 } // <-------- This matrix only add 1's on/off diagonal, upper triangular matrix
631
632 return matrixVariationJacobian;
633}
634
635//________________________________________________________________________________
636void
638{
639 // This is this class's most important method, in the sense that it does all the
640 // math and constructs all "variation" histograms (for both eigenvector and named
641 // named variations). This constitutes the full initialisation of the class.
642 // This method is meant to be called only after all calls (if any) to the
643 // CalibrationDataEigenVariations::excludeNamedUncertainty() method.
644
645 // retrieve the central calibration
646 TH1* result = dynamic_cast<TH1*>(m_cnt->GetValue("result"));
647 if (not result){
648 std::cerr<<"CalibrationDataEigenVariations::initialize: dynamic cast failed\n";
649 return;
650 }
651 // First step: construct the original covariance matrix
652 TMatrixDSym cov = getEigenCovarianceMatrix();
653 TMatrixDSym corr(result->GetNbinsX()*result->GetNbinsY()*result->GetNbinsZ()); // We want to construct the correlation matrix in order to compare the final eigenvariations correlation matrix to it
654
655 int rowc = 0;
656 for (int row = 0 ; row < cov.GetNrows() ; row++){
657 int colc = 0;
658 bool colb = false;
659 for (int col = 0 ; col < cov.GetNcols() ; col++){
660 if(!cov(row,col) || !cov(row,row) || !cov(col,col) || cov(row,row)<1.0e-6 || cov(col,col)<1.0e-6 ) continue; // really don't want to divide by zero...
661 colb=true;
662 long double rowvar = sqrt(cov(row,row));
663 long double colvar = sqrt(cov(col,col));
664 corr(rowc,colc) = cov(row,col)/(rowvar * colvar); // divide each element by the variance
665 colc++;
666 }
667 if (colb){
668 rowc++;
669 }
670 }
671
672 // Second step: create the variations for the named sources of uncertainty (easy...)
673 for (map<string, unsigned int>::iterator it = m_namedIndices.begin(); it != m_namedIndices.end(); ++it) {
674 pair<TH1*, TH1*>& p = m_named[it->second];
675 TH1* hunc = (TH1*) m_cnt->GetValue(it->first.c_str());
676 TString namedvar("namedVar");
677 namedvar += it->first.c_str();
678 TString namedvarUp(namedvar); namedvarUp += "_up";
679 TString namedvarDown(namedvar); namedvarDown += "_down";
680 TH1* resultVariedUp = (TH1*)result->Clone(namedvarUp); resultVariedUp->Add(hunc, 1.0); resultVariedUp->SetDirectory(0);
681 TH1* resultVariedDown = (TH1*)result->Clone(namedvarDown); resultVariedDown->Add(hunc, -1.0); resultVariedDown->SetDirectory(0);
682 p.first = resultVariedUp;
683 p.second = resultVariedDown;
684 }
685 // Refinement: add the "extrapolation" uncertainty as a named uncertainty, if the histogram is provided
686 // This is a bit special, since the extrapolation uncertainty histogram has a different size than other histograms
687 if (TH1* hunc = (TH1*) m_cnt->GetValue("extrapolation")) { // this is just saying "if it exists"...
688 TH1* resultVariedUp = (TH1*) hunc->Clone("extrapolation_up"); resultVariedUp->SetDirectory(0);
689 TH1* resultVariedDown = (TH1*) hunc->Clone("extrapolation_down"); resultVariedDown->SetDirectory(0);
690 Int_t nbinx = hunc->GetNbinsX()+2, nbiny = hunc->GetNbinsY()+2, nbinz = hunc->GetNbinsZ()+2;
691 Int_t firstbinx = hunc->GetXaxis()->FindFixBin(result->GetXaxis()->GetBinCenter(1));
692 Int_t firstbiny = result->GetDimension() > 1 ? hunc->GetYaxis()->FindFixBin(result->GetYaxis()->GetBinCenter(1)) : 1;
693 Int_t firstbinz = result->GetDimension() > 2 ? hunc->GetZaxis()->FindFixBin(result->GetZaxis()->GetBinCenter(1)) : 1;
694 for (Int_t binx = 1; binx < nbinx-1; ++binx) {
695 Int_t binxResult = binx - firstbinx + 1;
696 if (binxResult < 1) binxResult = 1;
697 if (binxResult > result->GetNbinsX()) binxResult = result->GetNbinsX();
698 for (Int_t biny = 1; biny < nbiny-1; ++biny) {
699 Int_t binyResult = biny - firstbiny + 1;
700 if (binyResult > result->GetNbinsY()) binyResult = result->GetNbinsY();
701 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
702 Int_t binzResult = binz - firstbinz + 1;
703 if (binzResult > result->GetNbinsZ()) binzResult = result->GetNbinsZ();
704 Int_t bin = hunc->GetBin(binx, biny, binz);
705 double contResult = result->GetBinContent(binxResult, binyResult, binzResult);
706 resultVariedUp->SetBinContent(bin, contResult + hunc->GetBinContent(bin));
707 resultVariedDown->SetBinContent(bin, contResult - hunc->GetBinError(bin));
708 }
709 }
710 }
711 m_named.push_back(std::make_pair(resultVariedUp, resultVariedDown));
712 m_namedExtrapolation = m_namedIndices["extrapolation"] = m_named.size()-1;
713 }
714
715 // Third step: compute the eigenvector variations corresponding to the remaining sources of uncertainty
716 int nbins = result->GetNbinsX()+2;
717 int ndim = result->GetDimension();
718 if (ndim > 1) nbins*= (result->GetNbinsY()+2);
719 if (ndim > 2) nbins*= (result->GetNbinsZ()+2);
720
721
722 // // Determine whether the container is for "continuous" calibration.
723 // // This is important since the number of independent scale factors (for each pt or eta bin)
724 // // is reduced by 1 compared to the number of tag weight bins (related to the fact that the fractions
725 // // of events in tag weight bins have to sum up to unity).
726 // int axis = m_cnt->getTagWeightAxis();
727 // bool doContinuous = false; unsigned int weightAxis = 0;
728
729 //if (axis >= 0) {
730 // doContinuous = true;
731 // weightAxis = (unsigned int) axis;
732 // // In this case, verify that the special "uncertainty" entry that is in fact the reference MC tag
733 // // weight fractions is present. These tag weight fractions are needed in order to carry out the
734 // // diagonalisation successfully.
735 // NOTE: MCreference is not used at the moment (always 0 in the CDI). Comment it out.
736 // if (! dynamic_cast<TH1*>(m_cnt->GetValue("MCreference"))) {
737 // std::cerr << " Problem: continuous calibration object found without MC reference tag weight histogram " << std::endl;
738 // return;
739 // }
740 //}
741
742 // Only relevant for continuous calibration containers, but in order to void re-computation we
743 // retrieve them here
744 // Int_t nbinx = result->GetNbinsX()+2, nbiny = result->GetNbinsY()+2, nbinz = result->GetNbinsZ()+2;
745
746 TMatrixT<double> matrixVariationJacobian = getJacobianReductionMatrix();
747 int size = matrixVariationJacobian.GetNrows();
748
749 // Reduce the matrix to one without the zeros, using a "similarity" transformation
750
751 const TMatrixDSym matrixCovariance = cov.Similarity(matrixVariationJacobian);
752
753 // Carry out the Eigenvector decomposition on this matrix
754 TMatrixDSymEigen eigenValueMaker (matrixCovariance);
755 TVectorT<double> eigenValues = eigenValueMaker.GetEigenValues();
756 TMatrixT<double> eigenVectors = eigenValueMaker.GetEigenVectors();
757 TMatrixT<double> matrixVariations (size,size);
758
759 //compute the total variance by summing the eigenvalues
760 m_totalvariance = eigenValues.Sum(); //
761
762 for (int i = 0; i < size; ++i) {
763 //now go back
764 for (int r = 0; r < size; ++r)
765 //first index is the variation number, second corresponds to the pT bin //<--------- The "eigenvariations" matrix is the "C" matrix which has CKC^T = I with "K" being the o.g. covariance matrix, and "I" is the identity
766 matrixVariations(i,r) = -1.0*eigenVectors[r][i]*sqrt(fabs(eigenValues[i])); // <--------- So the result is a matrix (eigenvariation) which is the eigenvector scaled by the sqrt(eigenvalue)
767 } // <------- matrixVariations: each row is one variation, each column is the pT bin.
768
769 TMatrixT<double> matrixVariationsWithZeros = matrixVariations * matrixVariationJacobian;
770
771 // Construct the initial set of variations from this
772 for (int i = 0; i < matrixVariationsWithZeros.GetNrows(); ++i) {
773 // TString superstring(result->GetName());
774 // superstring += "_eigenVar";
775 TString superstring("eigenVar");
776 superstring+=i;
777
778 TString nameUp(superstring); nameUp += "_up";
779 TString nameDown(superstring); nameDown += "_down";
780 // TString nameUnc(superstring); nameUnc+= "_unc";
781
782 TH1* resultVariedUp = (TH1*)result->Clone(nameUp); resultVariedUp->SetDirectory(0); // <------- clone "result" hist, call it "up", and SetDirectory(0) means histogram doesn't belong to any directory.
783 TH1* resultVariedDown = (TH1*)result->Clone(nameDown); resultVariedDown->SetDirectory(0);
784
785 for (int u = 0; u < nbins; ++u) {
786 resultVariedUp->SetBinContent(u,result->GetBinContent(u) + matrixVariationsWithZeros(i,u)); // <----- This actually constructs the up-variation histogram!
787 resultVariedDown->SetBinContent(u,result->GetBinContent(u) - matrixVariationsWithZeros(i,u)); // <--- This likewise constructs the down-var., bin-by-bin.
788 }
789
790 m_eigen.push_back(std::make_pair(resultVariedUp, resultVariedDown));
791
792 } //end eigenvector size
793
794 // Remove variations that are below the given tolerance (effectively meaning that they don't have any effect) // <--------- Here we prune systematics
795 IndexSet final_set; // <-------- What's IndexSet? It's a typedef of std::set<size_t>, where size_t = int
796 size_t current_set = 0;
797 for (size_t index = 0; index < m_eigen.size(); ++index) {
798 bool keep_variation = false;
799 TH1 *up = static_cast<TH1*>(m_eigen[index].first->Clone()); up->SetDirectory(0);
800
801 up->Add(result, -1.0);
802
803 for (int bin = 1; bin <= nbins; ++bin) {
804 if (fabs(up->GetBinContent(bin)) > min_variance) { // <----- If you find even ONE bin with big enough variance, we keep the whole systematic.
805 keep_variation = true;
806 break;
807 }
808 }
809 delete up;
810 if (!keep_variation){ // <------ At this stage, if we find no bins in the systematic with large enough variation, we insert it to "final_set" for removal/pruning
811 final_set.insert(current_set);
812 } else {
813 m_capturedvariance += eigenValues[index]; // let's add up the variance that the eigenvariation contributes to the total
814 }
815 ++current_set;
816 }
817 if (final_set.size() > 0)
818 if (m_verbose) std::cout << "CalibrationDataEigenVariations: Removing " << final_set.size() << " eigenvector variations leading to sub-tolerance effects, retaining " << m_eigen.size()-final_set.size() << " variations" << std::endl;
819
820 removeVariations(final_set); // <----------------- This method actually performs the reduction. The above logic simply flags which variations to GET RID OF, inserting them into "final_set"
821
822 if (m_verbose) std::cout << " The total variance is " << m_totalvariance << " and the reduction captured " << 100*(m_capturedvariance/m_totalvariance) << "% of this." << std::endl;
826 if (m_validate){ // <---- this flag can be set manually in a local checkout of this package (temporary fix)
827 validate_reduction(cov.GetNrows(), corr, m_eigen, result, "SFEigen", m_taggername, m_wp, m_jetauthor); // <------ This method is simply here to report the matrix comparison, for reduction scheme validation
828 }
829
830 m_initialized = true;
831}
832
833//________________________________________________________________________________
834void
836{
837 if (set.size() == 0) return;
838
839 std::vector<std::pair<TH1*, TH1*> > new_eigen;
840 for (size_t index = 0; index < m_eigen.size(); ++index){
841 if (set.count(index) == 0) new_eigen.push_back(m_eigen[index]);
842 else { delete m_eigen[index].first; delete m_eigen[index].second; }
843 }
844 m_eigen = std::move(new_eigen);
845}
846
847//________________________________________________________________________________
848void
850{
851 IndexSet simple_set;
852
853 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
854 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it)
855 simple_set.insert(*subset_it);
856 }
857
858 removeVariations(simple_set);
859}
860
861//________________________________________________________________________________
862void
864{
865 // Merge all systematic variation starting from the given index.
866 // The resulting merged variation replaces the first entry in the list
867 // (i.e., the entry specified by the index).
868 IndexSet simple_set;
869
870 for (size_t it = index; it < m_eigen.size(); ++it)
871 simple_set.insert(it);
872 mergeVariations(simple_set);
873}
874
875//________________________________________________________________________________
876void
878{
879 IndexSuperSet sset;
880 sset.insert(set);
881 mergeVariations(sset);
882}
883
884//________________________________________________________________________________
885void
887{
888 // check for overlap
889 IndexSet checker;
890 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
891 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it){
892 if (checker.count(*subset_it) == 0 && *subset_it <= m_eigen.size())
893 checker.insert(*subset_it);
894 else {
895 std::cerr << "Error in CalibrationDataEigenVariations::mergeVariations: \
896 IndexSets must not overlap and must lie between 1 and " << m_eigen.size() << ". Aborting!" << std::endl;
897 return;
898 }
899 }
900 }
901
902 // retrieve the central calibration
903 TH1 *result = static_cast<TH1*>(m_cnt->GetValue("result"));
904 IndexSet toDelete;
905 int nbins = result->GetNbinsX()+2;
906 int ndim = result->GetDimension();
907 if (ndim > 1) nbins *= (result->GetNbinsY()+2);
908 if (ndim > 2) nbins *= (result->GetNbinsZ()+2);
909
910 // TH1 *var_up_final = static_cast<TH1*>(result->Clone()),
911 // *var_down_final = static_cast<TH1*>(result->Clone());
912
913 // var_up_final->Reset();
914 // var_down_final->Reset();
915
916 // complex sum
917 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
918 if (set_it->empty()) continue;
919
920 double sum_H_up = 0.0, sum_H_down = 0.0;
921 size_t lowest_index = *set_it->lower_bound(0);
922 TH1 *total_var_up = static_cast<TH1*>(m_eigen[lowest_index].first->Clone()),
923 *total_var_down = static_cast<TH1*>(m_eigen[lowest_index].second->Clone());
924 total_var_up->SetDirectory(0);
925 total_var_down->SetDirectory(0);
926
927 total_var_up->Reset();
928 total_var_down->Reset();
929
930 // sum all other variations
931 for (IndexSet::iterator subset_it = set_it->begin();
932 subset_it != set_it->end(); ++subset_it) {
933 size_t actual_index = *subset_it;
934
935 if (actual_index != lowest_index) toDelete.insert(*subset_it);
936
937 TH1 *partial_var_up = static_cast<TH1*>(m_eigen[actual_index].first->Clone()),
938 *partial_var_down = static_cast<TH1*>(m_eigen[actual_index].second->Clone());
939 partial_var_up->SetDirectory(0);
940 partial_var_down->SetDirectory(0);
941
942 partial_var_up->Add(result, -1.0); // <----- Is this correct? Should it be +1?
943 partial_var_down->Add(result, -1.0);
944 for (int i = 0; i < nbins; ++i) {
945 partial_var_down->SetBinContent(i, -1.0*partial_var_down->GetBinContent(i));
946 }
947
948 for (int u = 0; u < nbins; ++u) {
949 double sum_up = total_var_up->GetBinContent(u),
950 sum_down = total_var_down->GetBinContent(u);
951 for (int v = 0; v < nbins; ++v) {
952 sum_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
953 sum_H_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
954 sum_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
955 sum_H_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
956 }
957 total_var_up->SetBinContent(u, sum_up);
958 total_var_down->SetBinContent(u, sum_down);
959 }
960 delete partial_var_up;
961 delete partial_var_down;
962 }
963
964 // final part of complex summing
965 for (int i = 0; i < nbins; ++i) {
966 if (sum_H_up != 0.0)
967 total_var_up->SetBinContent(i, total_var_up->GetBinContent(i)/sqrt(sum_H_up));
968 else
969 total_var_up->SetBinContent(i, 0.0);
970 if (sum_H_down != 0.0)
971 total_var_down->SetBinContent(i, -1.0*total_var_down->GetBinContent(i)/sqrt(sum_H_down));
972 else
973 total_var_down->SetBinContent(i, 0.0);
974 }
975
976 total_var_up->Add(result);
977 total_var_down->Add(result);
978
979 m_eigen[lowest_index].first = total_var_up;
980 m_eigen[lowest_index].second = total_var_down;
981 }
982
983 removeVariations(toDelete);
984}
985
986//________________________________________________________________________________
987unsigned int
988CalibrationDataEigenVariations::getNumberOfNamedVariations() const // <---- I'll have to either report all flavours or something for SFGLobalEigen
989{
990 // Returns the number of named variations
991
992 return m_namedIndices.size();
993}
994
995//________________________________________________________________________________
996vector<string>
998{
999 // Provides the list of named variations
1000
1001 vector<string> names;
1002 for (map<string, unsigned int>::const_iterator it = m_namedIndices.begin(); it != m_namedIndices.end(); ++it){
1003 names.push_back(it->first);
1004 }
1005 return names;
1006}
1007
1008//________________________________________________________________________________
1009unsigned int
1015
1016//________________________________________________________________________________
1017bool
1019 TH1*& up, TH1*& down)
1020{
1021 // Return the pointers to the up- and downward variation histogram for the specified
1022 // eigenvector variation. In case of an invalid variation number, null pointers will
1023 // be returned and the return value will be false.
1024 //
1025 // variation: eigenvector variation number
1026 // up: (reference to) pointer to upward variation histogram
1027 // down: (reference to) pointer to downward variation histogram
1028
1029 if (! m_initialized) initialize();
1030
1031 if (variation < m_eigen.size()) {
1032 up = m_eigen[variation].first;
1033 down = m_eigen[variation].second;
1034 return true;
1035 }
1036
1037 up = down = 0;
1038 return false;
1039}
1040
1041//________________________________________________________________________________
1042bool
1044 TH1*& up, TH1*& down)
1045{
1046 // Return the pointers to the up- and downward variation histogram for the specified
1047 // named variation. In case of an invalid named variation, null pointers will
1048 // be returned and the return value will be false.
1049 //
1050 // name: named variation
1051 // up: (reference to) pointer to upward variation histogram
1052 // down: (reference to) pointer to downward variation histogram
1053
1055 if (it != m_namedIndices.end()) return getNamedVariation(it->second, up, down);
1056
1057 up = down = 0;
1058 return false;
1059}
1060
1061//________________________________________________________________________________
1062bool
1064 TH1*& up, TH1*& down)
1065{
1066 // Return the pointers to the up- and downward variation histogram for the specified
1067 // named variation. In case of an invalid named variation number, null pointers will
1068 // be returned and the return value will be false.
1069 //
1070 // nameIndex: named variation number
1071 // up: (reference to) pointer to upward variation histogram
1072 // down: (reference to) pointer to downward variation histogram
1073
1074 if (! m_initialized) initialize();
1075
1076 if (nameIndex < m_named.size()) {
1077 up = m_named[nameIndex].first;
1078 down = m_named[nameIndex].second;
1079 return true;
1080 }
1081
1082 up = down = 0;
1083 return false;
1084}
1085
1086//________________________________________________________________________________
1087unsigned int
1089{
1090 // Return the integer index corresponding to the named variation.
1091 // Note that no checks are made on the validity of the name provided.
1092
1094 return it->second;
1095}
1096
1097//________________________________________________________________________________
1098bool
1100{
1101 // Verifies whether the given named variation index corresponds to the extrapolation
1102 // uncertainty.
1103
1104 return (m_namedExtrapolation == int(nameIndex));
1105}
1106
1107//________________________________________________________________________________
1108bool
1110 std::map<std::string, std::map<std::string, float>> &coefficientMap)
1111{
1112 // Calculating eigen vector recomposition coefficient map and pass to
1113 // user by reference. Return true if method success. Return false and
1114 // will not modify coefficientMap if function failed.
1115 //
1116 // label: flavour label
1117 // coefficientMap: (reference to) coefficentMap which will be used as return value.
1118
1119 if (! m_initialized) initialize();
1120
1121 std::vector<TH1*> originSF_hvec;
1122 std::vector<TH1*> eigenSF_hvec;
1123
1124 // Retrieving information for calculation
1125 std::vector<string>fullUncList = m_cnt->listUncertainties();
1126 std::vector<string> uncList;
1127 for (unsigned int t = 0; t < fullUncList.size(); ++t) {
1128 // entries that should never be included
1129 if (fullUncList[t] == "comment" || fullUncList[t] == "result" || fullUncList[t] == "combined" ||
1130 fullUncList[t] == "statistics" || fullUncList[t] == "systematics" || fullUncList[t] == "MCreference" ||
1131 fullUncList[t] == "MChadronisation" || fullUncList[t] == "extrapolation" || fullUncList[t] == "ReducedSets" ||
1132 fullUncList[t] == "excluded_set") continue;
1133
1134 // entries that can be excluded if desired
1135 if (m_namedIndices.find(fullUncList[t]) != m_namedIndices.end()) continue;
1136
1137 TH1* hunc = dynamic_cast<TH1*>(m_cnt->GetValue(fullUncList[t].c_str()));
1138 if (not hunc){
1139 std::cerr<<"CalibrationDataEigenVariations::EigenVectorRecomposition: dynamic cast failed\n";
1140 continue;
1141 }
1142
1143 Int_t nx = hunc->GetNbinsX();
1144 Int_t ny = hunc->GetNbinsY();
1145 Int_t nz = hunc->GetNbinsZ();
1146 Int_t bin = 0;
1147 bool retain = false; // Retain the histogram?
1148
1149 // discard empty histograms
1150 // Read all bins without underflow&overflow
1151 for(Int_t binx = 1; binx <= nx; binx++)
1152 for(Int_t biny = 1; biny <= ny; biny++)
1153 for(Int_t binz = 1; binz <= nz; binz++){
1154 bin = hunc->GetBin(binx, biny, binz);
1155 if (fabs(hunc->GetBinContent(bin)) > 1E-20){
1156 retain = true;
1157 break;
1158 }
1159 }// end hist bin for-loop
1160 if (!retain){
1161 if (m_verbose) std::cout<<"Eigenvector Recomposition: Empty uncertainty "<<fullUncList.at(t)<<" is discarded."<<std::endl;
1162 continue; // discard the vector
1163 }
1164
1165 uncList.push_back(fullUncList.at(t));
1166 originSF_hvec.push_back(hunc);
1167 }
1168
1169 TH1* nom = dynamic_cast<TH1*>(m_cnt->GetValue("result")); // Nominal SF hist
1170 if (not nom){
1171 if (m_verbose) std::cout<<"Eigenvector Recomposition: dynamic cast failed\n";
1172 return false;
1173 }
1174 int dim = nom->GetDimension();
1175 Int_t nx = nom->GetNbinsX();
1176 Int_t ny = nom->GetNbinsY();
1177 Int_t nz = nom->GetNbinsZ();
1178 Int_t nbins = nx;
1179 if(dim > 1) nbins *= ny;
1180 if(dim > 2) nbins *= nz;
1181 TMatrixD matSF(uncList.size(), nbins);
1182 Int_t col = 0; // mark the column number
1183 // Fill the Delta SF Matrix
1184 for(unsigned int i = 0; i < uncList.size(); i++){
1185 col = 0;
1186 // Loop all bins except underflow&overflow bin
1187 for(int binz = 1; binz <= nz; binz++)
1188 for(int biny = 1; biny <= ny; biny++)
1189 for(int binx = 1; binx <= nx; binx++){
1190 Int_t bin = originSF_hvec.at(i)->GetBin(binx, biny, binz);
1191 TMatrixDRow(matSF,i)[col] = originSF_hvec[i]->GetBinContent(bin);
1192 col++;
1193 }
1194 }
1195
1196 // get eigen vectors of scale factors. Note that this is not the original eigen-vector.
1197 int nEigen = getNumberOfEigenVariations();
1198 TH1* up = nullptr;
1199 TH1* down = nullptr;
1200 for (int i = 0; i < nEigen; i++){
1201 if (!getEigenvectorVariation(i, up, down)){
1202 std::cerr<<"EigenVectorRecomposition: Error on retrieving eigenvector "<<i<<std::endl;
1203 return false;
1204 }
1205 //Need uncertainty value so subtract central calibration here.
1206 up->Add(nom, -1);
1207 eigenSF_hvec.push_back(up);
1208 }
1209 TMatrixD matEigen(nEigen, nbins);
1210
1211 // Fill the Eigen Matrix
1212 for(int i = 0; i < nEigen; i++){
1213 col = 0;
1214 // Read 300 bins without underflow&overflow
1215 for(int binz = 1; binz <= nz; binz++)
1216 for(int biny = 1; biny <= ny; biny++)
1217 for(int binx = 1; binx <= nx; binx++){
1218 Int_t bin = eigenSF_hvec.at(i)->GetBin(binx, biny, binz);
1219 TMatrixDRow(matEigen,i)[col] = eigenSF_hvec[i]->GetBinContent(bin);
1220 col++;
1221 }
1222 }
1223
1224 // Sanity check:
1225 TMatrixD matEigenOriginal = matEigen;
1226 TMatrixD matEigenTranspose = matEigen;
1227 matEigenTranspose = matEigenTranspose.T();
1228 TMatrixD matOriginalTimesTranspose = matEigenOriginal*matEigenTranspose;
1229 TMatrixD matEigenInvert = matEigenTranspose*matOriginalTimesTranspose.Invert();
1230 //(matEigenOriginal*matEigenInvert).DrawClone("colz"); // This should give us an identity matrix
1231
1232 TMatrixD matCoeff = matSF*matEigenInvert;
1233 int nRows = matCoeff.GetNrows();
1234 int nCols = matCoeff.GetNcols();
1235 std::map<std::string, float> temp_map;
1236 for (int col = 0; col < nCols; col++){
1237 temp_map.clear();
1238 for(int row = 0; row < nRows; row++){
1239 temp_map[uncList[row]] = TMatrixDRow(matCoeff, row)[col];
1240 }
1241 coefficientMap["Eigen_"+label+"_"+std::to_string(col)] = temp_map;
1242 }
1243
1244 return true;
1245}
1246
1250
1251
1253// //
1254// CalibrationDataGlobalEigenVariations //
1255// //
1256// This class is intended to provide a more proper way to deal with correlations between //
1257// calibration bins across all jet jet flavours of a tagger/author/wp combination. //
1258// //
1259// A CalibrationDataGlobalEigenVariations object is associated with a specific set of //
1260// CalibrationDataHistogramContainer objects. It starts by constructing the covariance matrix from //
1261// the information provided by the container. Subsequently it diagonalises this covariance //
1262// matrix (this is a standard eigenvalue problem, hence the name of the class), and stores //
1263// the result as 'variation' histograms (representing the eigenvectors multiplied by the //
1264// square root of the corresponding eigenvalues). //
1265// Since it is possible for systematic uncertainty contributions to be correlated with //
1266// corresponding uncertainties in physics analyses, it is possible to exclude such sources //
1267// of uncertainty from being used in the construction of the covariance matrix (this is //
1268// since effects from the original sources of uncertainty cannot be traced anymore after //
1269// application of the eigenvalue decomposition). It is still possible to evaluate correctly //
1270// these uncertainties in the form of so-called 'named variations' (see class //
1271// CalibrationDataInterfaceROOT); however this will always treat uncertainties as being //
1272// fully correlated (or anti-correlated) between calibration bins, so it is recommended not //
1273// to exclude uncertainties that are not correlated between bins from the eigenvector //
1274// decomposition. //
1276
1277
1278
1279
1280
1281//________________________________________________________________________________
1282CalibrationDataGlobalEigenVariations::CalibrationDataGlobalEigenVariations(const std::string& cdipath, const std::string& tagger, const std::string& wp, const std::string& jetcollection, const std::vector<std::string>& flavours, CalibrationDataHistogramContainer* cnt, bool excludeRecommendedUncertaintySet) :
1283CalibrationDataEigenVariations(cdipath, tagger, wp, jetcollection, cnt, excludeRecommendedUncertaintySet, false), m_flav_eigen(), m_flavours(flavours)
1284//m_blockmatrixsize(1200)
1285{
1286 m_blockmatrixsize = 0; // <------- This needs to be computed based off of the dimensions of the result vectors (currently 4x1x5 for continuous WP, 1x300x1 for fixed cut WP, circa 2022)
1287 m_initialized = false;
1289 m_statVariations = false;
1290 // We want to retrieve all the containers that belong in the tagger/jetcollection/wp group
1291 // These are then stored, with all uncertainties extracted and stored as well, for later use in EV decomposition
1292
1293 // For now we will open up the CDI file from within the constructor, to get at the data we need
1294 TString fileName(cdipath);
1295 TFile* f = TFile::Open(fileName.Data(), "READ");
1296 f->cd();
1297
1298 // This loop extracts all the "flavour containers", i.e. all CalibrationDataHistogramContainers pertaining to the same path, but with different flavours
1299 // It also puts all the uncertainties for all containers into a set, which we can then later loop over, to construct the total covariance matrix
1300 for (const auto& flavour : m_flavours){
1301 std::string dir = m_taggername + "/" + m_jetauthor + "/" + m_wp + "/" + flavour + "/" + "default_SF" ;
1302 TString contName(dir);
1304 f->GetObject(contName.Data(), c);
1305 if (c) {
1306 if (m_verbose) std::cout << "Found " << contName.Data() << std::endl;
1307 m_histcontainers.insert({flavour, c}); // Build the mapping between flavour and the corresponding "flavour container", i.e. the CalibrationDataHistogramContainer
1308 std::vector<std::string> uncs = c->listUncertainties();
1309 TH1* result = dynamic_cast<TH1*>(c->GetValue("result")); // let's get the size of this for later
1310 if (not result){
1311 if (m_verbose) std::cout << "Dynamic cast failed at "<<__LINE__<<"\n";
1312 continue;
1313 }
1314 m_blockmatrixsize+=result->GetNbinsX()*result->GetNbinsY()*result->GetNbinsZ(); // should be ~300 for fixed cut, something else for continuous
1315 if (m_verbose) std::cout << "m_blockmatrixsize is now " << m_blockmatrixsize << std::endl;
1316 for (const std::string& unc : uncs){
1317 if (unc.find("stat_np") != string::npos) m_statVariations = true;
1318 if ((unc=="result")||(unc=="comment")||(unc=="ReducedSets")||(unc=="systematics")||(unc=="statistics")||(unc=="extrapolation")||(unc=="MChadronisation")||(unc=="combined")||(unc=="extrapolation from charm")) {
1319 continue;
1320 } else {
1321 m_all_shared_systematics.insert(unc); // Construct the set of uncertainties to get a full tally of everything in the container group
1322 }
1323 }
1324 // If specified, add items recommended for exclusion from EV decomposition by the calibration group to the 'named uncertainties' list
1325 // This used to work on only one container, but now we do it on all four containers
1326 if (excludeRecommendedUncertaintySet) {
1327 std::vector<std::string> to_exclude = split(c->getExcludedUncertainties());
1328 if (to_exclude.size() == 0) {
1329 std::cerr << "CalibrationDataEigenVariations warning: exclusion of pre-set uncertainties list requested but no (or empty) list found" << std::endl;
1330 }
1331 for (const auto& name : to_exclude) {
1332 if (name == "") continue;
1333 excludeNamedUncertainty(name, flavour);
1334 }
1335 }
1336 }
1337 }
1338
1339 if (m_verbose) std::cout << "\n number of shared uncertainties is " << m_all_shared_systematics.size() << std::endl;
1340
1341 std::set<std::string>::iterator it = m_all_shared_systematics.begin();
1342 if (it != m_all_shared_systematics.end()){
1343 if (m_verbose) std::cout << "Printing out all shared uncertainties for " << tagger << "/" << jetcollection << "/" << wp << std::endl;
1344 if (m_verbose) std::cout << "| " << std::endl;
1345 while (it != m_all_shared_systematics.end()){
1346 if (m_verbose) std::cout << "|-- " << (*it) << std::endl;
1347 ++it;
1348 }
1349 } else {
1350 if (m_verbose) std::cout << "| no shared systematics between ";
1351 for (const auto& f : m_flavours){
1352 if (m_verbose) std::cout << f << ", ";
1353 } if (m_verbose) std::cout << std::endl;
1354 }
1355 // End of constructor
1356}
1357
1358//________________________________________________________________________________
1360{
1361 // delete all variation histograms owned by us
1362 for (const auto& flavour : m_flavours){
1363 for (vector<pair<TH1*, TH1*> >::iterator it = m_flav_eigen[flavour].begin(); it != m_flav_eigen[flavour].end(); ++it) {
1364 delete it->first;
1365 delete it->second;
1366 }
1367
1368 for (vector<pair<TH1*, TH1*> >::iterator it = m_flav_named[flavour].begin(); it != m_flav_named[flavour].end(); ++it) {
1369 delete it->first;
1370 delete it->second;
1371 }
1372 }
1373}
1374
1375//________________________________________________________________________________
1376TMatrixDSym
1378{
1379 // Construct the block (global) covariance matrix that is to be diagonalised.
1380 // Note that extrapolation uncertainties (identified by the keyword "extrapolation,
1381 // this will pertain mostly to the extrapolation to high jet pt) are always excluded
1382 // since by definition they will not apply to the normal calibration bins. Instead
1383 // this uncertainty has to be dealt with as a named variation. In addition there are
1384 // other items ("combined", "systematics") that will not be dealt with correctly
1385 // either and hence are excluded as well).
1386 //
1387 // Note that if an explicit covariance matrix is supplied (at present this may be
1388 // the case only for statistical uncertainties: in the case of "continuous tagging",
1389 // multinomial statistics applies so bin-to-bin correlations exist) this will be
1390 // used instead of constructing the statistical uncertainties' covariance matrix on
1391 // the fly.
1392
1393 std::map<std::string, TMatrixDSym> cov_matrices; // temporary store, just to aid in printing out the individual systematic covariance matrices
1394 TMatrixDSym global_covariance(m_blockmatrixsize);
1395 // Then loop through the list of (other) uncertainties
1396 for (const std::string& unc : m_all_shared_systematics){
1397 std::vector<int> flavs_in_common;
1398 TString tunc(unc);
1399 std::vector<double> comb_syst; // this vector combines the TH1 uncertainty bins for each flavour into one object -> stored in comb_systematics for now, but meant for covariance matrix method
1400 // For the fixed cut case, we want to bin by groups of flavour > pT (i.e. "blocks" of flavour)
1401 // For the continuous case, we want to bin by groups of flavour > tagweight > pT (i.e. "blocks" of flavour, containing tagweight blocks... more complicated, but works on same principle)
1402 for (const auto& flavour : m_flavours){
1403 Analysis::CalibrationDataHistogramContainer* c = m_histcontainers[flavour]; // pointer to the flavour container
1404 int flavour_size = 0; // Store the length of the uncertainty in the flavour, initialize to zero
1405 if (c) {
1406 // Now we want to get the histogram for this systematic, for this flavour
1407 TH1* hunc = dynamic_cast<TH1*>(c->GetValue(tunc.Data()));
1408 TH1* ref = dynamic_cast<TH1*>(c->GetValue("result")); // retrieving this just in case the uncertainty doesn't exist for this flavour, just need it to get dimensions right
1409 if (not ref){
1410 if (m_verbose) std::cout << " There was no uncertainty OR SF/EFF results... Are you sure you have the right CDIContainer path?" << std::endl;
1411 continue;
1412 }
1413 int tagweightax = c->getTagWeightAxis(); // for handling the continuous case(s)
1414 if (hunc){
1415 flavs_in_common.push_back(1); // report that we have this uncertainty in this flavour
1416 Int_t nbinx = hunc->GetNbinsX(), nbiny = hunc->GetNbinsY(), nbinz = hunc->GetNbinsZ();
1417 Int_t rows = nbinx;
1418 if (hunc->GetDimension() > 1) rows *= nbiny;
1419 if (hunc->GetDimension() > 2) rows *= nbinz;
1420 flavour_size = rows; // Record the number of bins in the uncertainty TH1
1421 } else {
1422 flavs_in_common.push_back(0); // If the uncertainty doesn't exist for that flavour, just report, we'll set to zero in the combined vector
1423 // Because the uncertainty doesn't exist for this flavour, we just get the dimensions we need
1424 Int_t nbinx = ref->GetNbinsX(), nbiny = ref->GetNbinsY(), nbinz = ref->GetNbinsZ();
1425 Int_t rows = nbinx;
1426 if (ref->GetDimension() > 1) rows *= nbiny;
1427 if (ref->GetDimension() > 2) rows *= nbinz;
1428 flavour_size = rows;
1429 }
1430
1431 // Now we can loop through the bins of the flavour uncertainty, adding them onto the combined systematic
1432 if (tagweightax == -1){ //<--- i.e. NOT continuous, but is a fixed cut WP
1433 for (int i = 1 ; i <= flavour_size ; i++){
1434 if (hunc){
1435 Int_t bin = hunc->GetBin(1,i,1);
1436 double unc_val = hunc->GetBinContent(bin);
1437 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1438 } else {
1439 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1440 }
1441 }
1442 } else if (tagweightax == 0){
1443 // X axis is the tagweight axis, meaning Y is pt, Z is abs(eta)
1444 for (Int_t xbins = 1 ; xbins <= ref->GetNbinsX() ; xbins++){
1445 for (Int_t zbins = 1 ; zbins <= ref->GetNbinsZ() ; zbins++){
1446 for (Int_t ybins = 1 ; ybins <= ref->GetNbinsY() ; ybins++){
1447 if (hunc){
1448 Int_t bin = hunc->GetBin(xbins,ybins,zbins);
1449 double unc_val = hunc->GetBinContent(bin);
1450 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1451 } else {
1452 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1453 }
1454 // And now we should be constructing the initial covariance matrix for block continuous correctly
1455 }
1456 }
1457 }
1458 } else if (tagweightax == 1){
1459 // Y axis is the tagweight axis, meaning X is pt, Z is abs(eta)
1460 for (Int_t ybins = 1 ; ybins <= ref->GetNbinsY() ; ybins++){
1461 for (Int_t zbins = 1 ; zbins <= ref->GetNbinsZ() ; zbins++){
1462 for (Int_t xbins = 1 ; xbins <= ref->GetNbinsX() ; xbins++){
1463 if (hunc){
1464 Int_t bin = hunc->GetBin(xbins,ybins,zbins);
1465 double unc_val = hunc->GetBinContent(bin);
1466 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1467 } else {
1468 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1469 }
1470 // And now we should be constructing the initial covariance matrix for block continuous correctly
1471 }
1472 }
1473 }
1474 } else if (tagweightax == 2){
1475 // Z axis is the tagweight axis, meaning X is pt, Y is abs(eta)
1476 for (Int_t zbins = 1 ; zbins <= ref->GetNbinsZ() ; zbins++){
1477 for (Int_t ybins = 1 ; ybins <= ref->GetNbinsY() ; ybins++){
1478 for (Int_t xbins = 1 ; xbins <= ref->GetNbinsX() ; xbins++){
1479 if (hunc){
1480 Int_t bin = hunc->GetBin(xbins,ybins,zbins);
1481 double unc_val = hunc->GetBinContent(bin);
1482 comb_syst.push_back(unc_val); // if uncertainty, push uncertainty bin content to combined systematic vector
1483 } else {
1484 comb_syst.push_back(0.0); // if no uncertainty, push 0's
1485 }
1486 // And now we should be constructing the initial covariance matrix for block continuous correctly
1487 }
1488 }
1489 }
1490 }
1491 }
1492 }
1493 //comb_systematics.insert({unc, comb_syst}); // after having looped through the bins for each flavour, the comb_syst vector should be completed and added (or rather, made into a covariance matrix)
1494 TMatrixDSym unc_cov(comb_syst.size());
1495 if (unc == "statistics"){
1496 unc_cov = getStatCovarianceMatrix(comb_syst, m_statVariations); // we want to handle the "statistics" uncertainty differently
1497 } else {
1498 unc_cov = getSystCovarianceMatrix(comb_syst);
1499 }
1500 cov_matrices.insert({unc, unc_cov}); // add the covariance matrix for the uncertainty to this map, this is temporary for testing/plotting purposes
1501
1502 // To look only at uncertainties that pertain to more than one flavour (2, 3 or all 4) we can store the covariances separately for later saving and plotting
1503 if (flavs_in_common[0]+flavs_in_common[1]+flavs_in_common[2]+flavs_in_common[3] > 1){
1504 m_only_shared_systematics.insert(unc) ; // mark the shared systematics...
1505 }
1506
1507 global_covariance += unc_cov;
1508 }
1509 return global_covariance;
1510}
1511
1512
1513//________________________________________________________________________________
1514void
1515CalibrationDataGlobalEigenVariations::excludeNamedUncertainty(const std::string& name, const std::string& flavour)//, CalibrationDataContainer* cnt)
1516{
1517 // Exclude the source of uncertainty identified by the given name from being used
1518 // in the construction of the covariance matrix to be diagonalised.
1519 // Notes:
1520 // - Some names returned by CalibrationDataContainer::listUncertainties() are not
1521 // meaningful in this context, and specifying them is not allowed.
1522 // - Once the eigenvector diagonalisation has been carried out, this method may
1523 // not be used anymore.
1524
1525 if (m_initialized) std::cerr << "CalibrationDataGlobalEigenVariations::excludeNamedUncertainty error:" << " initialization already done" << std::endl;
1526
1527 else if (name == "comment" || name == "result" || name == "systematics" ||
1528 name == "statistics" || name == "combined" || name == "extrapolation" ||
1529 name == "MCreference" || name == "MChadronisation" || name == "ReducedSets" ||
1530 name == "excluded_set" || name == "" || name == " ")
1531 std::cerr << "CalibrationDataGlobalEigenVariations::excludeNamedUncertainty error:" << " name [" << name << "] not allowed" << std::endl;
1532
1533 // Note: we WANT to exclude named uncertainties FROM ALL FLAVOURS, even if they don't contain the uncertainty (just fill with zeros). So I won't do this check here.
1534 // only really add if the entry is not yet in the list
1535 else if (m_flav_namedIndices[flavour].find(name) == m_flav_namedIndices[flavour].end()) {
1536 m_flav_named[flavour].push_back(std::pair<TH1*, TH1*>(0, 0));
1537 m_flav_namedIndices[flavour][name] = m_flav_named[flavour].size()-1; // store the index that the name variation pair is stored with in m_named
1538 }
1539}
1540
1541
1542//________________________________________________________________________________
1543std::vector<std::string>
1545{
1546 // Provides the list of named variations
1547
1548 std::vector<std::string> names;
1549 for(const auto& namedar : m_flav_namedIndices.find(flavour)->second){
1550 names.push_back(namedar.first);
1551 }
1552 return names;
1553}
1554
1555
1556//________________________________________________________________________________
1557void
1559{
1560 // This is this class's most important method, in the sense that it does all the
1561 // math and constructs all "variation" histograms (for both eigenvector and named
1562 // named variations). This constitutes the full initialisation of the class.
1563 // This method is meant to be called only after all calls (if any) to the
1564 // CalibrationDataGlobalEigenVariations::excludeNamedUncertainty() method.
1565
1566
1567 // First step: construct the block covariance matrix
1568 TMatrixDSym cov = getEigenCovarianceMatrix();
1569
1570 TMatrixDSym corr(cov); // We want to construct the correlation matrix in order to compare the final eigenvariations correlation matrix to it
1571 for (int row = 0 ; row < cov.GetNrows() ; row++){
1572 for (int col = 0 ; col < cov.GetNcols() ; col++){
1573 double rowvar = sqrt(cov(row,row));
1574 double colvar = sqrt(cov(col,col));
1575 corr(row,col) = corr(row,col)/(rowvar * colvar); // divide each element by the variance
1576 }
1577 }
1578
1580
1581 // Second step: create the variations for the named sources of uncertainty (easy...)
1582 // News flash: This isn't so easy now, as it has to be done for all the flavours to which the uncertainty pertains
1583 // the following are temporary data structures
1584 std::vector<double> combined_result;
1585 std::map<std::string, int> flav_bins; // store the nbins of each flavour histogram for later use in "reporting"
1586 // and eventually, at the end, we will want to separate the flavour blocks out...
1587 for (const auto& flavour : m_flavours) {
1588 // for each flavour, we want to combine the results and uncertainty values across flavours into one "block" vector
1589 // retrieve the central calibration
1590 Analysis::CalibrationDataHistogramContainer* c = m_histcontainers[flavour]; // pointer to the flavour container
1591
1592 TH1* result = dynamic_cast<TH1*>(c->GetValue("result"));
1593 if (not result){
1594 std::cerr<<"CalibrationDataGlobalEigenVariations::initialize: dynamic cast failed\n ";
1595 continue;
1596 }
1597 //construct the combined_result and combined_named_variations (up and down)
1598 if (c->getTagWeightAxis() == -1){ // For fixed cut WP, the Y axis **should** be the pT axis (but can it can potentially be different in the future)
1599 flav_bins[flavour] = result->GetNbinsY(); // Add the number of bins of the result histogram with non-zero results...
1600 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1601 for(int i = 0 ; i < flav_bins[flavour] ; i++){
1602 // push bin content onto the combined_result vector
1603 Int_t bin = result->GetBin(1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1604 double res_value = result->GetBinContent(bin);
1605 combined_result.push_back(res_value);
1606 }
1607 } else if (c->getTagWeightAxis() == 0) { // for continuous WP, the taxweight axis determines which axis is pt and |eta|
1608 flav_bins[flavour] = result->GetNbinsX()*result->GetNbinsY();
1609 int tagbins = result->GetNbinsX();
1610 int ptbins = result->GetNbinsY();
1611 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1612 for(int i = 0 ; i < tagbins ; i++){
1613 for(int j = 0 ; j < ptbins ; j++){
1614 // push bin content onto the combined_result vector
1615 Int_t bin = result->GetBin(j+1,1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1616 double res_value = result->GetBinContent(bin);
1617 combined_result.push_back(res_value);
1618 }
1619 }
1620 } else if (c->getTagWeightAxis() == 1) {
1621 flav_bins[flavour] = result->GetNbinsX()*result->GetNbinsY();
1622 int tagbins = result->GetNbinsY();
1623 int ptbins = result->GetNbinsX();
1624 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1625 for(int i = 0 ; i < tagbins ; i++){
1626 for(int j = 0 ; j < ptbins ; j++){
1627 // push bin content onto the combined_result vector
1628 Int_t bin = result->GetBin(j+1,1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1629 double res_value = result->GetBinContent(bin);
1630 combined_result.push_back(res_value);
1631 }
1632 }
1633 } else if (c->getTagWeightAxis() == 2) {
1634 flav_bins[flavour] = result->GetNbinsX()*result->GetNbinsZ();
1635 int tagbins = result->GetNbinsZ();
1636 int ptbins = result->GetNbinsX();
1637 if (m_verbose) std::cout << "flav_bins["<<flavour<<"] = " << flav_bins[flavour] << std::endl;
1638 for(int i = 0 ; i < tagbins ; i++){
1639 for(int j = 0 ; j < ptbins ; j++){
1640 // push bin content onto the combined_result vector
1641 Int_t bin = result->GetBin(j+1,1,i+1); // This will only pick up the CONTENTS, not of the underflow bin
1642 double res_value = result->GetBinContent(bin);
1643 combined_result.push_back(res_value);
1644 }
1645 }
1646 }
1647
1648
1649 // loop over the excluded uncertainties for this flavour, and construct their actual variations...
1650 // the "m_flav_namedIndices" are constructed within "excludeNamedUncertainties", which is called in the constructor
1651 for (map<string, unsigned int>::iterator it = m_flav_namedIndices[flavour].begin(); it != m_flav_namedIndices[flavour].end(); ++it) {
1652 TH1* hunc = (TH1*) c->GetValue(it->first.c_str()); // this should store the name uncertainty, if it exists for this flavour
1653 // I need to test if this uncertainty actually exists, and if it doesn't just use a ZERO variation histogram explicitly
1654 // but even if it doesn't exist, we want to have it, so that the indices match between flavours
1655 pair<TH1*, TH1*>& p = m_flav_named[flavour][it->second];
1656 TString namedvar("namedVar");
1657 namedvar += it->first.c_str();
1658 TString namedvarUp(namedvar); namedvarUp += "_up";
1659 TString namedvarDown(namedvar); namedvarDown += "_down";
1660 TH1* resultVariedUp = (TH1*)result->Clone(namedvarUp);
1661 TH1* resultVariedDown = (TH1*)result->Clone(namedvarDown);
1662 if (hunc){
1663 resultVariedUp->Add(hunc, 1.0); resultVariedUp->SetDirectory(0);
1664 resultVariedDown->Add(hunc, -1.0); resultVariedDown->SetDirectory(0);
1665 } else {
1666 resultVariedUp->SetDirectory(0);
1667 resultVariedDown->SetDirectory(0);
1668 }
1669 p.first = resultVariedUp;
1670 p.second = resultVariedDown;
1671 } // End of uncertainty in flavour loop
1672
1673 // Now handle the extrapolation uncertainties per flavour...
1674 // Refinement: add the "extrapolation" uncertainty as a named uncertainty, if the histogram is provided
1675 // This is a bit special, since the extrapolation uncertainty histogram has a different size than other histograms
1676 if (TH1* hunc = (TH1*)c->GetValue("extrapolation")) { // this is just saying "if it exists"...
1677 TH1* resultVariedUp = (TH1*) hunc->Clone("extrapolation_up"); resultVariedUp->SetDirectory(0);
1678 TH1* resultVariedDown = (TH1*) hunc->Clone("extrapolation_down"); resultVariedDown->SetDirectory(0);
1679 Int_t nbinx = hunc->GetNbinsX()+2, nbiny = hunc->GetNbinsY()+2, nbinz = hunc->GetNbinsZ()+2;
1680 Int_t firstbinx = hunc->GetXaxis()->FindFixBin(result->GetXaxis()->GetBinCenter(1));
1681 Int_t firstbiny = result->GetDimension() > 1 ? hunc->GetYaxis()->FindFixBin(result->GetYaxis()->GetBinCenter(1)) : 1;
1682 Int_t firstbinz = result->GetDimension() > 2 ? hunc->GetZaxis()->FindFixBin(result->GetZaxis()->GetBinCenter(1)) : 1;
1683 for (Int_t binx = 1; binx < nbinx-1; ++binx) {
1684 Int_t binxResult = binx - firstbinx + 1;
1685 if (binxResult < 1) binxResult = 1;
1686 if (binxResult > result->GetNbinsX()) binxResult = result->GetNbinsX();
1687 for (Int_t biny = 1; biny < nbiny-1; ++biny) {
1688 Int_t binyResult = biny - firstbiny + 1;
1689 if (binyResult > result->GetNbinsY()) binyResult = result->GetNbinsY();
1690 for (Int_t binz = 1; binz < nbinz-1; ++binz) {
1691 Int_t binzResult = binz - firstbinz + 1;
1692 if (binzResult > result->GetNbinsZ()) binzResult = result->GetNbinsZ();
1693 Int_t bin = hunc->GetBin(binx, biny, binz);
1694 double contResult = result->GetBinContent(binxResult, binyResult, binzResult);
1695 resultVariedUp->SetBinContent(bin, contResult + hunc->GetBinContent(bin));
1696 resultVariedDown->SetBinContent(bin, contResult - hunc->GetBinError(bin));
1697 }
1698 }
1699 }
1700 m_flav_named[flavour].push_back(std::make_pair(resultVariedUp, resultVariedDown));
1701 m_flav_namedExtrapolation[flavour] = m_flav_namedIndices[flavour]["extrapolation"] = m_flav_named[flavour].size()-1;
1702 }
1703
1704
1705 } // End flavour loop
1706
1707
1708
1709 // Third step: compute the eigenvector variations corresponding to the remaining sources of uncertainty
1710 // First, build the combined_result vector into a TH1
1711 std::unique_ptr<TH1> comb_result(new TH1D("combined_result", "", combined_result.size(), 0., 1.));
1712 int nbins = comb_result->GetNbinsX()+2;
1713 int ndim = comb_result->GetDimension();
1714 if (ndim > 1) nbins*= (comb_result->GetNbinsY()+2);
1715 if (ndim > 2) nbins*= (comb_result->GetNbinsZ()+2);
1716
1717 // I want to take the contents of "combined_result" and fill in "comb_result" without the under/overflow
1718 for (unsigned int i=0 ; i<combined_result.size() ; i++){
1719 // assuming dimension == 1, which should be the case...
1720 comb_result->SetBinContent(i+1, combined_result[i]); // setting i+1 so we don't start with the underflow bin
1721 }
1722
1723 // Get the portions of the covariance matrix that aren't zero, this next step
1724 TMatrixT<double> matrixVariationJacobian = getJacobianReductionMatrix(cov); // we pass the covariance matrix in as a starting point
1725
1726 int size = matrixVariationJacobian.GetNrows();
1727
1728 // Reduce the matrix to one without the zeros, using a "similarity" transformation
1729 const TMatrixDSym matrixCovariance = cov.Similarity(matrixVariationJacobian); // <--- This step removes the zeros
1730
1731 // Carry out the Eigenvector decomposition on this matrix
1732 TMatrixDSymEigen eigenValueMaker (matrixCovariance);
1733 TVectorT<double> eigenValues = eigenValueMaker.GetEigenValues();
1734 TMatrixT<double> eigenVectors = eigenValueMaker.GetEigenVectors();
1735 TMatrixT<double> matrixVariations (size,size);
1736
1737 //compute the total variance by summing the eigenvalues
1738 m_totalvariance = eigenValues.Sum();
1739
1740 for (int i = 0; i < size; ++i) {
1741 for (int r = 0; r < size; ++r) {
1742 //first index is the variation number, second corresponds to the pT bin // The "eigenvariations" matrix is the "C" matrix which has CKC^T = I with "K" being the o.g. covariance matrix, and "I" is the identity.
1743 matrixVariations(i,r) = -1.0*eigenVectors[r][i]*sqrt(fabs(eigenValues[i])); // So the result is a matrix (eigenvariation) which is the eigenvector scaled by the sqrt(eigenvalue)
1744 }
1745 } // <------- matrixVariations: each row is one variation, each column is the pT bin.
1746
1747 TMatrixT<double> matrixVariationsWithZeros = matrixVariations * matrixVariationJacobian; // This step adds in the zero rows again
1748
1749 // Construct the initial set of variations from this
1750 for (int i = 0; i < matrixVariationsWithZeros.GetNrows(); ++i) {
1751 TString superstring("eigenVar");
1752 superstring+=i;
1753
1754 TString nameUp(superstring); nameUp += "_up"; // In the end you get something like "eigenVar5_up"
1755 TString nameDown(superstring); nameDown += "_down";
1756 // TString nameUnc(superstring); nameUnc+= "_unc";
1757
1758 TH1* resultVariedUp = (TH1*)comb_result->Clone(nameUp); resultVariedUp->SetDirectory(0);
1759 TH1* resultVariedDown = (TH1*)comb_result->Clone(nameDown); resultVariedDown->SetDirectory(0);
1760
1761 for (int u = 0; u < comb_result->GetNbinsX(); ++u) {
1762 resultVariedUp->SetBinContent(u,(comb_result->GetBinContent(u) + matrixVariationsWithZeros(i,u)));
1763 resultVariedDown->SetBinContent(u,(comb_result->GetBinContent(u) - matrixVariationsWithZeros(i,u)));
1764 }
1765
1766 m_eigen.push_back(std::make_pair(resultVariedUp, resultVariedDown)); //<--- This is currently storing the FULL/combined variations, which aren't binned with proper bin widths etc.
1767 // The "proper binning" isn't necessary for pruning purposes. Later on, after pruning, separate the flavour blocks of each eigenvariation and construct the variations for each flavour, storing results in m_flav_eigen
1768
1769 } //end eigenvector size
1770
1771
1773 // Without any changes, all eigenvariations are kept due to the variation being non-negligible for SOME flavour...
1775 // Step 4 : Time for PRUNING
1776 // > Check the variation bins to see if any exceed a given threshold value of "min_variance"
1777 // > Value of E-6 seems to work well for SFGlobalEigen (found through a scan of possible thresholds)
1778 // but this is not necessarily going to hold for future CDI's. This needs to be checked through the
1779 // "validate_reduction" function, which can be used to compare how well the reduction captures the total covariance.
1781
1782 // Remove variations that are below the given tolerance (effectively meaning that they don't have any effect)
1783 IndexSet final_set;
1784 size_t current_set = 0;
1785
1786 // We set the custom min_variance here
1787 min_variance = 1.0E-6;
1788 for (size_t index = 0; index < m_eigen.size(); ++index) {
1789 bool keep_variation = false; // guilty until proven innocent
1790 TH1* up = static_cast<TH1*>(m_eigen[index].first->Clone()); up->SetDirectory(0); // clone the up variation and check it out
1791 up->Add(comb_result.get(), -1.0); // now we're left with decimal values centered around 0, i.e. 0.02 or -0.02
1792
1793 for (int bin = 1; bin <= nbins; ++bin) {
1794 if (fabs(up->GetBinContent(bin)) > min_variance) { // If you find even ONE bin with big enough variance, we keep the whole systematic.
1795 keep_variation = true;
1796 break;
1797 }
1798 }
1799 if (!keep_variation){ // At this stage, if we find no bins in the systematic with large enough variation, we insert it to "final_set" for removal/pruning
1800 final_set.insert(current_set);
1801 } else {
1802 m_capturedvariance += eigenValues[index];
1803 }
1804 delete up;
1805 ++current_set;
1806 }
1807 if (final_set.size() > 0){ // at this point, a set of the indices of negligible variations should have been gathered, proceed to remove them...
1808 if (m_verbose) std::cout << "CalibrationDataEigenVariations: Removing " << final_set.size() << " eigenvector variations leading to sub-tolerance effects, retaining " << m_eigen.size()-final_set.size() << " variations" << std::endl;
1809 }
1810
1811 CalibrationDataEigenVariations::removeVariations(final_set); // This method actually performs the reduction. The above logic simply flags which variations to get rid of, inserting them into "final_set"
1812
1813 // AT THIS STAGE: Pruning has already occurred, leaving us with a set of combined eigenvariations in "m_eigen", which we can thence recombine and compute the correlation matrix
1814 // That correlation matrix can then be compared to the original correlation matrix "corr", simply subtracting one from the other. Better approximations will have close to zero deviation.
1815 // What follows is the construction of this comparison, and the reporting of the comparison (saving it to file)...
1816 std::streamsize ss = std::cout.precision();
1817 if (m_verbose) std::cout << " The total variance is " << m_totalvariance << " and the reduction captured " << std::setprecision(9) << 100.0*(m_capturedvariance/m_totalvariance) << "% of this." << std::endl;
1818 std::cout.precision(ss); //restore precision
1822 if (m_validate){ // <---- this flag can be set manually in a local checkout of this package (temporary fix)
1823 validate_reduction(m_blockmatrixsize, corr, m_eigen, comb_result.get(), "SFGlobalEigen", m_taggername, m_wp, m_jetauthor); // This method is simply here to report the matrix comparison, for reduction scheme validation
1824 }
1825
1826
1828 // Let's separate out the flavour histograms now, this should be far simpler to ensure the binning is correct for each flavour.
1829 // At this stage, we should have constructed a set of combined eigenvariations stored in m_eigen.
1830 // Below, we take them one by one, separate, and store the flavour variations in m_flav_eigen, with the proper flavour heading
1831 // <----- The "mergeVariations" code originally merged the variations in m_eigen. But now that wouldn't work, since m_eigen stores
1832 // the combined histograms. We should instead merge the "reported" flavour histograms, as they're properly binned (and physically meaningful)
1833 // So if we construct the flavour variations first, keeping the indices all the same, then merge them.
1835
1836
1837 for(const std::pair<TH1*,TH1*>& var : m_eigen){
1838 // now we make use of the same old flavour loop and impose the flavour variation to the flavour container
1839 TString eigenvarup = var.first->GetName();
1840 TString eigenvardown = var.second->GetName();
1841 int bin_baseline = 0; // increment this by flav_bins after getting each flavour block
1842 for (const std::string& flavour : m_flavours){
1844 TH1* result = dynamic_cast<TH1*>(c->GetValue("result"));
1845 if (not result){
1846 std::cerr<<"CalibrationDataGlobalEigenVariations::initialize: dynamic cast failed\n";
1847 continue;
1848 }
1849 TH1* resultVariedUp = (TH1*)result->Clone(eigenvarup); resultVariedUp->SetDirectory(0); // copy flavour result, want to set bin contents according to the combined eigenvartion flavour block
1850 TH1* resultVariedDown = (TH1*)result->Clone(eigenvardown); resultVariedDown->SetDirectory(0);
1851 int up_to_bin = flav_bins[flavour];
1852 int current_bin = 1; // starting from 1 for filling histograms
1853 for(int flav_bin = bin_baseline+1 ; flav_bin < up_to_bin+1 ; flav_bin++){ // the +1's here are to deal with bin numbering yet again...
1854 Int_t bin = result->GetBin(1,current_bin); // increment along smaller (result) flavour variation TH1
1855 resultVariedUp->SetBinContent(bin, var.first->GetBinContent(flav_bin)); // Sets the result TH1 bin content to the eigenvariation bin content
1856 resultVariedDown->SetBinContent(bin, var.second->GetBinContent(flav_bin)); // In this way, we achieve flavour variations with proper binning
1857 current_bin+=1;
1858 }
1859 bin_baseline+=up_to_bin;
1860 m_flav_eigen[flavour].push_back(std::make_pair(resultVariedUp, resultVariedDown)); // <-------- add the flavour EV to the storage (indexed the same as in m_eigen!)
1861 }
1862 }
1863
1864 for(const auto& f : m_flavours){
1865 if (m_verbose) std::cout << " " << f << " m_flav_eigen has " << m_flav_eigen[f].size() << std::endl;
1866 }
1867
1868 m_initialized = true;
1869}
1870
1871
1872
1873//________________________________________________________________________________
1874TMatrixD
1876{
1877 // Construct the matrix that removes the rows and columns that fail to influence
1878 // the eigen-variations. To reduce the covariance matrix, do the following:
1879 // Note: Previous version called "getEigenCovariance" whereas here we pass it in by reference to save on compute
1880 // This "cov" is the "uncompressed" eigenvariation covariance matrix for all uncertainties.
1881 // We will subsequently compress it (removing columns/rows containing zeros only)
1882 // and then construct the hopefully rectangular matrix that can remove these ones from the covariance matrix
1883
1884 int nZeros = 0;
1885 std::vector<int> zeroComponents;
1886
1887 // First flag all the zeros
1888 for (int i = 0 ; i < m_blockmatrixsize ; i++){
1889 if (fabs(cov(i,0)) < 1e-10){
1890 bool isThereANonZero = false;
1891 for (int j = 0 ; j < m_blockmatrixsize ; j++){
1892 // Try a first (quick) identification of rows/columns of zeros by the first element in each row
1893 // If "successful", check the whole row in more detail
1894 if (fabs(cov(i,j)) > 1e-10){
1895 isThereANonZero = true;
1896 break;
1897 }
1898 }
1899 if (! isThereANonZero){
1900 ++nZeros;
1901 zeroComponents.push_back(i) ;
1902 }
1903 }
1904 }
1905
1906 if (nZeros >= m_blockmatrixsize) {
1907 std::cerr << " Problem. Found n. " << nZeros << " while size of matrix is " << m_blockmatrixsize << std::endl;
1908 return TMatrixDSym();
1909 }
1910
1911 int size = m_blockmatrixsize - nZeros;
1912 TMatrixT<double> matrixVariationJacobian(size,m_blockmatrixsize);
1913 int nMissed = 0;
1914
1915 for (int i = 0; i < m_blockmatrixsize; ++i) { // full size
1916 bool missed = false;
1917 for (unsigned int s = 0 ; s < zeroComponents.size(); ++s) { // <--- Basically what this does is it flags "missed" for a given "i" of the full bin size
1918 if (zeroComponents.at(s) == i) { // <--- if "i" is in "zeroComponents". Breaks (because it found that it's to be missed)
1919 missed = true;
1920 break;
1921 }
1922 }
1923 if (missed) { // <-------- Finally, if "i" is to be missed, increase "nMissed" by one, and....
1924 ++nMissed;
1925 continue;
1926 }
1927
1928 matrixVariationJacobian(i-nMissed,i)=1; // <-------- ... this ALWAYS adds a one. If zero "nMissed", add to diagonal. otherwise off-diagonal
1929 }
1930
1931 return matrixVariationJacobian;
1932}
1933
1934
1935
1936
1937
1938//________________________________________________________________________________
1939unsigned int
1941{
1942 if (! m_initialized) initialize();
1943
1944 if (m_flav_eigen.find(flavour) == m_flav_eigen.end()){
1945 if (m_verbose) std::cout << "No m_flav_eigen for flavour " << flavour << std::endl;
1946 return 0;
1947 }
1948 return (m_flav_eigen.find(flavour)->second).size();
1949}
1950
1951//________________________________________________________________________________
1952bool
1953CalibrationDataGlobalEigenVariations::getEigenvectorVariation(const std::string& flavour, unsigned int variation, TH1*& up, TH1*& down)
1954{
1955 // Return the pointers to the up- and downward variation histogram for the specified
1956 // eigenvector variation. In case of an invalid variation number, null pointers will
1957 // be returned and the return value will be false.
1958 //
1959 // variation: eigenvector variation number
1960 // up: (reference to) pointer to upward variation histogram
1961 // down: (reference to) pointer to downward variation histogram
1962
1963 if (! m_initialized) initialize();
1964 std::vector<std::pair<TH1*,TH1*>> flav_variations = m_flav_eigen.find(flavour)->second;
1965 if (variation < flav_variations.size()){
1966 up = flav_variations[variation].first;
1967 down = flav_variations[variation].second;
1968 return true;
1969 }
1970
1971 up = down = 0;
1972 return false;
1973}
1974
1975//________________________________________________________________________________
1976bool
1977CalibrationDataGlobalEigenVariations::getNamedVariation(const string& flavour, const string& name, TH1*& up, TH1*& down)
1978{
1979 // Return the pointers to the up- and downward variation histogram for the specified
1980 // named variation. In case of an invalid named variation, null pointers will
1981 // be returned and the return value will be false.
1982 //
1983 // name: named variation
1984 // up: (reference to) pointer to upward variation histogram
1985 // down: (reference to) pointer to downward variation histogram
1986
1987 // Find the named variation index (if it exists) and pass to "getNamedVariation"
1988 std::map<std::string, unsigned int>::const_iterator it = (m_flav_namedIndices.find(flavour)->second).find(name);
1989 if (it != (m_flav_namedIndices.find(flavour)->second).end()) return getNamedVariation(it->second, flavour, up, down);
1990
1991 up = down = 0;
1992 return false;
1993}
1994
1995//________________________________________________________________________________
1996bool
1997CalibrationDataGlobalEigenVariations::getNamedVariation(unsigned int nameIndex, const std::string& flavour, TH1*& up, TH1*& down)
1998{
1999 // Return the pointers to the up- and downward variation histogram for the specified
2000 // named variation. In case of an invalid named variation number, null pointers will
2001 // be returned and the return value will be false.
2002 //
2003 // nameIndex: named variation number
2004 // up: (reference to) pointer to upward variation histogram
2005 // down: (reference to) pointer to downward variation histogram
2006
2007 if (! m_initialized) initialize();
2008
2009 if (nameIndex < m_flav_named[flavour].size()) {
2010 up = m_flav_named[flavour][nameIndex].first;
2011 down = m_flav_named[flavour][nameIndex].second;
2012 return true;
2013 }
2014
2015 up = down = 0;
2016 return false;
2017}
2018
2019
2020//________________________________________________________________________________
2021unsigned int
2022CalibrationDataGlobalEigenVariations::getNamedVariationIndex(const std::string& name, const std::string& flavour) const
2023{
2024 // Return the integer index corresponding to the named variation.
2025 // Note that no checks are made on the validity of the name provided.
2026 std::map<std::string, unsigned int>::const_iterator it = (m_flav_namedIndices.find(flavour)->second).find(name);
2027 return it->second;
2028}
2029
2030
2031//________________________________________________________________________________
2032bool
2033CalibrationDataGlobalEigenVariations::isExtrapolationVariation(unsigned int nameIndex, const std::string& flavour) const
2034{
2035 // Verifies whether the given named variation index corresponds to the extrapolation
2036 // uncertainty.
2037 int extrapIndex = m_flav_namedExtrapolation.find(flavour)->second;
2038 return (extrapIndex == int(nameIndex));
2039}
2040
2041
2042
2043//________________________________________________________________________________
2044void
2046{
2047 // Merge all systematic variation starting from the given index.
2048 // The resulting merged variation replaces the first entry in the list
2049 // (i.e., the entry specified by the index).
2050
2051 IndexSet simple_set;
2052
2053 for (size_t it = index; it < m_flav_eigen[flav].size(); ++it)
2054 simple_set.insert(it);
2055 mergeVariations(simple_set, flav);
2056}
2057
2058//________________________________________________________________________________
2059void
2061{
2062 IndexSuperSet sset;
2063 sset.insert(set);
2064 mergeVariations(sset, flav);
2065}
2066
2067
2068//________________________________________________________________________________
2069void
2071{
2073 // Merge the (flavour specific) eigenvariations, as stored in m_flav_eigen
2075
2076 // check for overlap
2077 IndexSet checker;
2078 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
2079 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it){
2080 if (checker.count(*subset_it) == 0 && *subset_it <= m_flav_eigen[flav].size())
2081 checker.insert(*subset_it);
2082 else {
2083 std::cerr << "Error in CalibrationDataGlobalEigenVariations::mergeVariations: \
2084 IndexSets must not overlap and must lie between 1 and " << m_eigen.size() << ". Aborting!" << std::endl;
2085 return;
2086 }
2087 }
2088 }
2089
2090 std::string flavour = flav;
2091
2093
2094 TH1* result = dynamic_cast<TH1*>(c->GetValue("result"));
2095 if (not result){
2096 std::cerr << "Error in CalibrationDataGlobalEigenVariations::mergeVariations: failed dynamic cast\n";
2097 return;
2098 }
2099 // retrieve the central calibration
2100
2101 IndexSet toDelete;
2102 int nbins = result->GetNbinsX()+2;
2103 int ndim = result->GetDimension();
2104 if (ndim > 1) nbins *= (result->GetNbinsY()+2);
2105 if (ndim > 2) nbins *= (result->GetNbinsZ()+2);
2106
2107 // complex sum
2108 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
2109 if (set_it->empty()) continue;
2110
2111 double sum_H_up = 0.0, sum_H_down = 0.0;
2112 size_t lowest_index = *set_it->lower_bound(0);
2113 TH1 *total_var_up = static_cast<TH1*>(m_flav_eigen[flavour][lowest_index].first->Clone());
2114 TH1 *total_var_down = static_cast<TH1*>(m_flav_eigen[flavour][lowest_index].second->Clone());
2115 total_var_up->SetDirectory(0);
2116 total_var_down->SetDirectory(0);
2117
2118 total_var_up->Reset();
2119 total_var_down->Reset();
2120
2121 // sum all other variations
2122 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it) {
2123 size_t actual_index = *subset_it;
2124
2125 if (actual_index != lowest_index) toDelete.insert(*subset_it); //
2126
2127 TH1 *partial_var_up = static_cast<TH1*>(m_flav_eigen[flavour][actual_index].first->Clone());
2128 TH1 *partial_var_down = static_cast<TH1*>(m_flav_eigen[flavour][actual_index].second->Clone());
2129 partial_var_up->SetDirectory(0);
2130 partial_var_down->SetDirectory(0);
2131
2132 partial_var_up->Add(result, -1.0);
2133 partial_var_down->Add(result, -1.0);
2134 for (int i = 0; i < nbins; ++i) {
2135 partial_var_down->SetBinContent(i, -1.0*partial_var_down->GetBinContent(i));
2136 }
2137
2138 for (int u = 0; u < nbins; ++u) {
2139 double sum_up = total_var_up->GetBinContent(u);
2140 double sum_down = total_var_down->GetBinContent(u);
2141 for (int v = 0; v < nbins; ++v) {
2142 sum_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
2143 sum_H_up += partial_var_up->GetBinContent(u)*partial_var_up->GetBinContent(v);
2144 sum_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
2145 sum_H_down += partial_var_down->GetBinContent(u)*partial_var_down->GetBinContent(v);
2146 }
2147 total_var_up->SetBinContent(u, sum_up);
2148 total_var_down->SetBinContent(u, sum_down);
2149 }
2150 delete partial_var_up;
2151 delete partial_var_down;
2152 }
2153
2154 // final part of complex summing
2155 for (int i = 0; i < nbins; ++i) {
2156 if (sum_H_up != 0.0)
2157 total_var_up->SetBinContent(i, total_var_up->GetBinContent(i)/sqrt(sum_H_up));
2158 else
2159 total_var_up->SetBinContent(i, 0.0);
2160 if (sum_H_down != 0.0)
2161 total_var_down->SetBinContent(i, -1.0*total_var_down->GetBinContent(i)/sqrt(sum_H_down));
2162 else
2163 total_var_down->SetBinContent(i, 0.0);
2164 }
2165
2166 total_var_up->Add(result);
2167 total_var_down->Add(result);
2168
2169 m_flav_eigen[flavour][lowest_index].first = total_var_up;
2170 m_flav_eigen[flavour][lowest_index].second = total_var_down;
2171 }
2172
2173 removeVariations(toDelete, flavour);
2174}
2175
2176
2177//________________________________________________________________________________
2178void
2180{
2181 if (set.size() == 0) return;
2182 std::vector<std::pair<TH1*, TH1*> > new_eigen;
2183 std::vector<std::pair<TH1*, TH1*>> eigen = m_flav_eigen[flav];
2184 for (size_t index = 0; index < eigen.size(); ++index){
2185 if (set.count(index) == 0) {
2186 new_eigen.push_back(eigen[index]);
2187 } else {
2188 delete eigen[index].first; delete eigen[index].second;
2189 }
2190 }
2191
2192 m_flav_eigen[flav] = std::move(new_eigen);
2193
2194}
2195
2196//________________________________________________________________________________
2197void
2199{
2200 IndexSet simple_set;
2201
2202 for (IndexSuperSet::iterator set_it = set.begin(); set_it != set.end(); ++set_it) {
2203 for (IndexSet::iterator subset_it = set_it->begin(); subset_it != set_it->end(); ++subset_it)
2204 simple_set.insert(*subset_it);
2205 }
2206
2207 removeVariations(simple_set, flav);
2208}
const boost::regex ref(r_ef)
ClassImp(CalibrationDataEigenVariations) CalibrationDataEigenVariations
std::vector< std::string > split(const std::string &str, const char token=';')
local utility function: split string into a vector of substrings separated by a specified separator,...
static Double_t ss
This is the interface for the objects to be stored in the calibration ROOT file.
CalibrationDataHistogramContainer * m_cnt
container object containing the basic information
bool m_initialized
flag whether the initialization has been carried out
unsigned int getNumberOfNamedVariations() const
retrieve the number of named variations
void removeVariations(const IndexSet &set)
remove all variations in the given set
std::map< std::string, unsigned int > m_namedIndices
named variations
std::vector< std::pair< TH1 *, TH1 * > > m_eigen
eigenvector variations
TMatrixD getJacobianReductionMatrix()
matrix to remove unecessary rows and columns from covariance
bool getEigenvectorVariation(unsigned int variation, TH1 *&up, TH1 *&down)
obtain the "up" and "down" variations for the given eigenvector number.
int m_namedExtrapolation
named variation index for the special case of extrapolation uncertainties
void mergeVariationsFrom(const size_t &index)
merge all variations starting from the given index
TMatrixDSym getEigenCovarianceMatrixFromVariations()
covariance matrix corresponding to eigenvector variations constructed from the eigen-variation
bool isExtrapolationVariation(unsigned int nameIndex) const
flag whether the given index corresponds to an extrapolation variation
unsigned int getNumberOfEigenVariations()
retrieve the number of eigenvector variations
void excludeNamedUncertainty(const std::string &name, CalibrationDataContainer *cnt)
exclude the source of uncertainty indicated by name from eigenvector calculations
bool m_statVariations
indicate whether statistical uncertainties are stored as variations
bool getNamedVariation(const std::string &name, TH1 *&up, TH1 *&down)
obtain the "up" and "down" variations for the named uncertainty.
virtual void initialize(double min_variance=1.0E-20)
carry out the eigenvector computations.
virtual TMatrixDSym getEigenCovarianceMatrix()
also provide (some) access to the underlying information: covariance matrix corresponding to eigenvec...
unsigned int getNamedVariationIndex(const std::string &name) const
retrieve the integer index corresponding to the named variation.
std::vector< std::pair< TH1 *, TH1 * > > m_named
void mergeVariations(const IndexSet &set)
merge all variations in the given set
std::vector< std::string > listNamedVariations() const
list the named variations
CalibrationDataEigenVariations(const std::string &cdipath, const std::string &tagger, const std::string &wp, const std::string &jetcollection, CalibrationDataHistogramContainer *cnt, bool excludeRecommendedUncertaintySet=false, bool base=true)
normal constructor.
bool EigenVectorRecomposition(const std::string &label, std::map< std::string, std::map< std::string, float > > &coefficientMap)
Eigenvector recomposition method.
std::map< std::string, std::vector< std::pair< TH1 *, TH1 * > > > m_flav_named
void initialize(double min_variance=1.0E-6)
carry out the eigenvector computations.
void removeVariations(const IndexSet &set, std::string &flav)
void excludeNamedUncertainty(const std::string &name, const std::string &flavour)
bool getEigenvectorVariation(const std::string &flavour, unsigned int variation, TH1 *&up, TH1 *&down)
std::map< std::string, Analysis::CalibrationDataHistogramContainer * > m_histcontainers
CalibrationDataGlobalEigenVariations(const std::string &cdipath, const std::string &tagger, const std::string &wp, const std::string &jetcollection, const std::vector< std::string > &flavours, CalibrationDataHistogramContainer *cnt, bool excludeRecommendedUncertaintySet=false)
TMatrixDSym getEigenCovarianceMatrix()
also provide (some) access to the underlying information: covariance matrix corresponding to eigenvec...
std::map< std::string, std::map< std::string, unsigned int > > m_flav_namedIndices
unsigned int getNamedVariationIndex(const std::string &name, const std::string &flavour) const
bool isExtrapolationVariation(unsigned int nameIndex, const std::string &flavour) const
std::map< std::string, std::vector< std::pair< TH1 *, TH1 * > > > m_flav_eigen
void mergeVariations(const IndexSet &set, std::string &flav)
void mergeVariationsFrom(const size_t &index, std::string &flav)
bool getNamedVariation(const std::string &flavour, const std::string &name, TH1 *&up, TH1 *&down)
This is the class holding information for histogram-based calibration results.
CalibrationDataEigenVariations(const std::string &cdipath, const std::string &tagger, const std::string &wp, const std::string &jetcollection, CalibrationDataHistogramContainer *cnt, bool excludeRecommendedUncertaintySet=false, bool base=true)
normal constructor.
This is the class holding information for histogram-based calibration results.
STL iterator class.
STL iterator class.
void zero(TH2 *h)
zero the contents of a 2d histogram
int r
Definition globals.cxx:22
std::string find(const std::string &s)
return a remapped string
Definition hcg.cxx:138
std::string base
Definition hcg.cxx:81
bool verbose
Definition hcg.cxx:73
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
std::string label(const std::string &format, int i)
Definition label.h:19
static TFile * fout
Definition listroot.cxx:40
std::vector< std::string > split(const std::string &str, const char token=';')
local utility function: split string into a vector of substrings separated by a specified separator,...
row
Appending html table to final .html summary file.
const std::string tagger
Definition index.py:1
void initialize()