28 m_typesToFill(selectTypesToFill(
client)),
29 m_convertWhenMissing(convertWhenMissing)
51 if(kv.second.size())
return true;
67 std::map<unsigned, EfficiencyTable::BoundType> cachedParamVals;
79 bool found_central =
false;
81 eff->uncertainties.clear();
91 for(
auto&
table : relevantTables->second)
94 if(
status < 0)
return false;
99 error =
"while retrieving " +
getTypeAsString(
type) +
", found two non-orthogonal tables providing the central value";
102 found_central =
true;
111 if(
type != wantedType)
113 if(
eff == &
pd.fake_factor)
117 for(
auto& kv :
eff->uncertainties) kv.second *=
k;
119 else if(
eff == &
pd.fake_efficiency)
123 for(
auto& kv :
eff->uncertainties) kv.second *=
k;
134 for(
const auto&
dim :
table.m_dimensions)
138 auto&
val = ins.first->second;
143 error =
"can't retrieve value of parameter \"" + param.name +
"\"";
148 auto ubound = std::upper_bound(
first, last,
val,
149 [=](
auto x,
auto y){
return param.integer() ? (
x.as_int<
y.as_int) : (
x.as_float<
y.as_float); });
150 if(ubound==
first || ubound==last)
157 if(
bin < 0)
return 0;
162 error =
"unknown table type (tool implementation incomplete!)";
166 for(
auto& kv :
eff.uncertainties) kv.second *=
ref.nominal;
167 for(
auto& kv :
ref.uncertainties)
171 if(!
eff.uncertainties.emplace(kv.first,
eff.nominal*kv.second).second)
173 error =
"central values and corrections must use different systematic uncertainties";
178 eff.nominal *=
ref.nominal;
192 auto begpos =
xml.tellg();
194 std::size_t bufferSize = 1.05 *
static_cast<std::size_t
>(
xml.tellg() - begpos);
196 if(bufferSize > 0x100000) bufferSize = 0x100000;
208 while(
line.length() && (
line.back()==
'\n' ||
line.back()==
'\r'))
line.pop_back();
227 else throw(
XmlError(
stream) <<
"unknown/unexpected XML tag \"" <<
tag.str() <<
"\"");
241 std::string
pattern =
"^\\s*(<([[:alnum:]]+)((?:\\s+\\|?[_[:alnum:]]+\\|?\\s*=\\s*\"[_[:alnum:]\\s,-\\[\\]\\.\\|]+\")*)\\s*>)(.*?)</\\2>\\s*";
248 tag.set(
stream.ptr+cmr.position(2), cmr.length(2));
252 auto cpos = cmr.size()-1;
255 stream.ptr += cmr.length();
264 for(
int i=0;
i<nAttr;++
i)
pattern +=
"\\s+(\\|?[_[:alnum:]]+\\|?)\\s*=\\s*\"([_[:alnum:]\\s,-\\[\\]\\.\\|]+)\"";
271 for(
unsigned i=1;
i<cmr.size();
i+=2)
273 auto attr = cmr[
i].str();
277 throw(
XmlError(
stream.ptr+cmr.position(
i), attr.length()) <<
"invalid attribute \"" << attr <<
"\"");
281 throw(
XmlError(
stream.ptr+cmr.position(
i), attr.length()) <<
"empty value for attribute \"" << attr <<
"\"");
283 auto& attrVal = itr->second;
284 if(attrVal)
throw(
XmlError(
stream.ptr+cmr.position(
i), attr.length()) <<
"the attribute \"" << attr <<
"\" has already been specified for that tag");
285 attrVal.set(
stream.ptr+cmr.position(
i+1), cmr.length(
i+1));
293 while(std::regex_search(
buffer, smr, rx))
301 buffer = smr.prefix().str() + smr.suffix().str();
307 const std::vector<std::string>
keys = {
"<efficiencies>",
"</efficiencies>"};
311 while((ipos =
buffer.find(
key)) != std::string::npos)
321 std::vector<std::string>
words;
324 while(std::getline(
ss,
w,
','))
326 std::size_t
i =
w.find_first_not_of(
" \t");
327 std::size_t j =
w.find_last_not_of(
" \t");
328 if(
i == std::string::npos)
330 throw(
XmlError(
stream) <<
"this should be a comma-separated list of names");
332 words.push_back(
w.substr(
i, j-
i+1));
337 template<
typename ReturnValue>
341 throw(
XmlError(attr) <<
"unsupported parameter type \"" << attr.
str() <<
"\"");
344 template<
typename ReturnValue,
typename...
Args>
351 template<
typename ReturnValue,
typename...
Args>
354 std::string attrname =
tag.str() +
"/" +
type;
356 if(!attr)
throw(
XmlError(
tag) <<
"unspecified value for attribute \"" <<
type <<
"\"");
379 std::bitset<N_EFFICIENCY_TYPES> affects;
382 auto targetMatches = [&](
const char*
a,
const char*
b) ->
bool
394 if(!
matched)
throw(
XmlError(
tag) <<
"the value \"" <<
target <<
"\" specified for the attribute \"affects\" is not recognized");
396 if(affects.none())
throw(
XmlError(
tag) <<
"missing or empty attribute \"affects\"");
400 if(std::any_of(
m_systs.begin(),
m_systs.end(), [&](
const SystDef&
s){ return s.name==name && (affects&s.affects).any(); }))
402 throw(
XmlError(
contents) <<
"the systematic \"" <<
name <<
"\" was already declared previously; duplicates are only allowed if their \"affects\" attributes do not overlap, which is not the case here");
413 if(
pos)
throw(
XmlError(
pos) <<
"exceeded max number of statistical uncertainties");
414 else throw(
GenericError() <<
"exceeded max number of statistical uncertainties");
434 unsigned short globalStatUID = 0;
443 const TH1*
hist =
nullptr;
444 if(
tag==
"bin" ||
tag==
"table" ||
tag==
"TH1")
448 if(!
source)
throw(
XmlError(
tag) <<
"histograms can only be imported inside <ROOT>...</ROOT> blocks!");
449 if(!subattributes.at(
"TH1/X"))
throw(
XmlError(
tag) <<
"the attribute 'X' should be specified (as well as 'Y' for 2D histograms, 'Z' for 3D histograms)");
450 auto name = subcontents.trim();
452 if(!
hist)
throw(
XmlError(subcontents) <<
"can't find any histogram named \"" <<
name <<
"\" in the file " <<
source->GetName());
453 auto&
norm = subattributes.at(
"TH1/norm");
455 throw(
XmlError(
norm) <<
"normalization of input histograms is only accepted for 'input=\"correction\"'");
457 importNominalTH1(
hist,
type, subattributes.at(
"TH1/X"), subattributes.at(
"TH1/Y"), subattributes.at(
"TH1/Z"),
scale, statMode, globalStatUID, subcontents);
463 auto initialNumberOfBins =
table.numberOfBins();
464 std::map<StringRef, unsigned> dimBins;
465 for(
unsigned uid=0; uid<
m_params.size(); ++uid)
474 if(
tag ==
"bin" &&
table.numberOfBins() > 1)
throw(
XmlError(
tag) <<
"use a <table> instead of a <bin> tag to hold several values");
477 else if(
table.numberOfBins() != initialNumberOfBins)
throw(
XmlError(
tag) <<
"extra binned dimensions do not make sense");
478 if(
tag==
"TH1" ||
tag==
"bin")
480 auto&
label = subattributes.at(
tag +
"/label");
483 float normFactor {0};
487 assert (
tag ==
"bin");
488 normFactor = 1.f /
table.m_efficiencies[0].nominal;
490 if(!std::isnormal(normFactor) || normFactor<=0.)
throw(
XmlError(
label) <<
"computed normalization factor is 0 / NaN / infinite / negative");
496 else throw(
XmlError(
tag) <<
"unknown/unexpected XML tag \"" <<
tag.str() <<
"\"");
507 throw(
XmlError(
pos) <<
"unexpected parsing error (leftover data \"" <<
line <<
"\")");
515 const bool integer = param.integer();
516 const std::string
fp =
"[+-]?[0-9]*\\.?[0-9]+(?:[Ee][+-]?[0-9]+)?";
517 const std::string
pattern =
"^\\s*\\[\\s*(?:(?:-inf\\s*,\\s*|-?)inf|(?:-inf\\s*,\\s*)?"
518 +
fp +
"(?:\\s*,\\s*" +
fp +
")*(?:\\s*,\\s*inf)?)\\s*\\]\\s*$";
524 throw(
XmlError(
contents) <<
"invalid format for the range of the parameter " << param.name);
528 auto& bounds =
table.m_bounds;
529 table.m_dimensions.emplace_back();
530 auto&
dim =
table.m_dimensions.back();
531 dim.paramUID = paramUID;
532 dim.iMinBound =
table.m_bounds.size();
535 if(
integer &&
dim.nBounds < 1)
throw(
XmlError(
contents) <<
"should specify at least one bin boundary for parameter " << param.name);
536 if(!
integer && (
dim.nBounds < 2))
throw(
XmlError(
contents) <<
"should specify at least two bin boundaries for parameter " << param.name);
537 for(
auto&
c :
line)
if(
c==
',' ||
c==
'[' ||
c==
']')
c =
' ';
538 std::stringstream
ss(
line);
539 for(
int i=0;
i<
dim.nBounds;++
i)
543 else ss >>
x.as_float;
546 if(
i==0 ||
i==(
dim.nBounds-1))
553 if(x_s==
"inf" || x_s==
"-inf")
555 bool defMax = (x_s.front() !=
'-');
561 if(
ss.fail())
throw(
XmlError(
contents) <<
"parsing error (can't read int/float boundary)");
565 if(
integer ? (bounds.back().as_int >
x.as_int) : (bounds.back().as_float >
x.as_float))
567 throw(
XmlError(
contents) <<
"bin boundaries must be sorted in increasing order");
576 bounds.push_back(bounds.back());
577 bounds.back().as_int += 1;
583 const std::string fpv =
"(?:[0-9]+\\.)?[0-9]+(?:[Ee][+-]?[0-9]+)?", fpu = fpv +
"\\s*\\%?";
584 const std::string
pattern =
"^\\s*" + fpv +
"(?:\\s*(?:\\+(?:\\s*" + fpu +
"\\s*)?-|-(?:\\s*" + fpu +
"\\s*)?\\+)\\s*" + fpu +
"\\s*\\([_[:alnum:]]+\\))*\\s*";
594 if(!std::regex_search(
ptr,
contents.endptr,
cm, rxValidFormat))
597 throw(
XmlError(lineref) <<
"the central value(s) and uncertainties are not in the expected format; first issue found with value " << lineref.str().substr(0, 32) <<
" [...]");
601 std::string
value = valref.str();
602 value.erase(std::remove_if(
value.begin(),
value.end(), [](
char c){ return std::isspace(c); }),
value.end());
604 for(
auto&
c :
value)
if(
c==
'(' ||
c==
')')
c =
' ';
607 table.m_efficiencies.emplace_back();
608 auto&
eff =
table.m_efficiencies.back();
610 bool foundStat =
false;
611 for(
unsigned i=0;
i<nErrs;++
i)
618 if(
c2==
'+' ||
c2==
'-')
620 ss >>
c2 >> uncval.
up >> sysname;
623 uncval.
up *= 0.01f *
eff.nominal;
633 uncval.
up *= 0.01f *
eff.nominal;
636 ss >> uncval.
down >> sysname;
639 uncval.
down *= 0.01f *
eff.nominal;
643 if(
ss.bad())
throw(
XmlError(valref) <<
"unexpected parsing error");
644 if(std::signbit(uncval.
up) != std::signbit(uncval.
down))
throw(
XmlError(valref) <<
"one-sided up/down errors");
647 uncval.
up = -uncval.
up;
652 if(sysname ==
"stat")
654 if(foundStat)
throw(
XmlError(valref) <<
"there can be only one source of statistical uncertainty per bin");
662 [&](
const SystDef&
sd){ return sd.name==sysname && sd.affects[type]; });
663 if(
sys ==
m_systs.end())
throw(
XmlError(valref) <<
"the systematic \"" << sysname <<
"\" has either not been defined, or does not affect this type of efficiency");
667 if(!
eff.uncertainties.emplace(uid, uncval).second)
669 throw(
XmlError(valref) <<
"source of uncertainty \"" << sysname <<
"\" specified twice");
674 if(
table.m_efficiencies.size() !=
table.numberOfBins())
677 <<
") is inconsistent with the number of bins (" <<
table.numberOfBins() <<
")");
684 if(!
filename.length())
throw(
XmlError(rootTag) <<
"the 'file' attribute must be specified!");
715 const std::vector<std::string>
parts = {
"electron",
"muon",
"tau"};
716 for(
const auto&
p :
parts)
744 const std::string
prefix =
"^(FakeFactor|FakeEfficiency|RealEfficiency|FakeRate|FakeRateSF)",
suffix =
"_([[:w:]][^_]+)(__[[:w:]]+)?$";
760 unsigned short dummy;
765 for(
int i=0;
i<
keys->GetSize();++
i)
767 TKey*
key =
static_cast<TKey*
>(
keys->At(
i));
769 std::string keyType =
key->GetClassName();
771 if(keyType==
"TH1F" || keyType==
"TH1D") nDims = 1 * std::regex_match(
key->GetName(),
mr, rxTH1);
772 else if(keyType==
"TH2F" || keyType==
"TH2D") nDims = 2 * std::regex_match(
key->GetName(),
mr, rxTH2);
773 else if(keyType==
"TH3F" || keyType==
"TH3D") nDims = 3 * std::regex_match(
key->GetName(),
mr, rxTH3);
775 if(nDims < 1)
throw(
GenericError() <<
"don't know what to do with histogram named \"" <<
key->GetName() <<
"\" (please check naming conventions)");
776 TH1*
hist =
static_cast<TH1*
>(
key->ReadObj());
778 std::string sss =
mr[1].str() +
"-" +
mr[2].str();
785 bool systTH1 = (
mr[
mr.size()-1].str() !=
"");
786 if(
step==0 && !systTH1)
806 if(
hist->GetNbinsX()!=1 ||
hist->GetNbinsY()!=1 ||
hist->GetNbinsZ()!=1)
810 for(
int i=1;
i<=
hist->GetNbinsX();++
i)
811 for(
int j=1;j<=
hist->GetNbinsY();++j)
812 for(
int k=1;
k<=
hist->GetNbinsZ();++
k)
814 double x =
hist->GetBinContent(
i, j,
k);
815 if(
x == 0.)
continue;
816 double w =
hist->GetBinError(
i, j,
k);
817 if(
w == 0.)
throw(
XmlError(xmlStream) <<
"bin with error = 0 encountered when trying to normalize histogram " <<
hist->GetName() <<
" to weighted bins average");
824 else avg = 1. /
hist->GetBinContent(1);
825 if(!std::isnormal(
avg) ||
avg<=0.)
throw(
XmlError(xmlStream) <<
"something bad happened when trying to compute the weighted average of histogram \""
826 <<
hist->GetName() <<
"\" bins, the result ended up 0 / NaN / infinite / negative");
833 if(!
norm)
return 1.f;
834 auto normType =
norm.str();
836 else if(normType !=
"none")
848 const bool useDefaults = !xmlStream;
850 if(useDefaults &&
m_tables[
type].
size())
throw(
GenericError() <<
"already filled that table, please use an XML to describe how to interpret the more complex ROOT files");
854 const int nDims = paramZ? 3 : paramY? 2 : 1;
855 if(
hist->GetDimension() != nDims)
857 if(xmlStream)
throw(
XmlError(xmlStream) <<
"histogram " <<
hist->GetName() <<
" doesn't have the expected dimension");
858 else throw(
GenericError() <<
"histogram " <<
hist->GetName() <<
" doesn't have the expected dimension");
862 for(
int j=0;j<nDims;++j)
864 std::string
name = ((j==2)? paramZ : (j==1)? paramY : paramX).
str();
865 const TAxis*
axis = (j==2)?
hist->GetZaxis() : (j==1)?
hist->GetYaxis() :
hist->GetXaxis();
866 if(useDefaults &&
name ==
"eta" &&
axis->GetBinLowEdge(1) >= 0)
name =
"|eta|";
867 table.m_dimensions.emplace_back();
868 auto&
dim =
table.m_dimensions.back();
879 else throw(
XmlError(j? paramY : paramX) <<
"parameter \"" <<
name <<
"\" has not been defined beforehand");
886 dim.iMinBound =
table.m_bounds.size();
887 dim.nBounds =
axis->GetNbins() + 1;
889 table.m_bounds.emplace_back();
891 else table.m_bounds.back().as_float = std::numeric_limits<float>::lowest();
892 for(
int k=1;
k<
dim.nBounds-1;++
k)
894 table.m_bounds.emplace_back();
896 else table.m_bounds.back().as_float =
axis->GetBinUpEdge(
k);
898 table.m_bounds.emplace_back();
905 const unsigned xmax =
table.m_dimensions.front().nBounds;
906 const unsigned ymax =
table.m_dimensions.size()>1?
table.m_dimensions[1].nBounds : 2;
907 const unsigned zmax =
table.m_dimensions.size()>2?
table.m_dimensions[2].nBounds : 2;
912 table.m_efficiencies.emplace_back();
913 auto&
eff =
table.m_efficiencies.back();
920 eff.uncertainties.emplace(uid, uncdata);
927 if(!
m_tables[
type].
size())
throw(
GenericError() <<
"there should be another histogram containing central values to accompany the histogram " <<
hist->GetName());
929 const int xmax =
table.m_dimensions.front().nBounds;
930 const int ymax =
table.m_dimensions.size()>1?
table.m_dimensions[1].nBounds : 2;
931 const int zmax =
table.m_dimensions.size()>2?
table.m_dimensions[2].nBounds : 2;
934 throw(
GenericError() <<
"binning mismatch between the nominal histogram and " <<
hist->GetName());
942 itr->affects.set(
type);
953 bool syst_central_equal_nom_central =
true;
954 bool syst_errors_equal_zero =
true;
955 bool syst_errors_equal_nom_errors =
true;
957 auto eff =
table.m_efficiencies.begin();
962 if (fabs ((
float)
eff->nominal - (
float)
hist->GetBinContent(
x,
y,
z)) > 0.001 ){ syst_central_equal_nom_central =
false;}
963 if (
hist->GetBinError(
x,
y,
z) != 0 ) { syst_errors_equal_zero =
false;}
965 for(
auto& kv :
eff->uncertainties)
968 stat_up = kv.second.up;
break;
970 if ( fabs((
float)
hist->GetBinError(
x,
y,
z) - (
float) stat_up ) > 0.001) { syst_errors_equal_nom_errors =
false;}
990 if (syst_central_equal_nom_central){
991 if (syst_errors_equal_nom_errors ){
992 throw(
GenericError() <<
"The central values and uncertainties for this systematic are identical to the nominal+stat uncertainties. This is ambiguous: did you mean to assign a 100% uncertainty? If so, please set all (unused) error bars to zero. ");
993 }
else if (syst_errors_equal_zero ) {
1004 if(!
eff->uncertainties.emplace(uid, uncdata).second)
1006 throw(
GenericError() <<
"unexpected error: tried filling twice the same systematic");
1014 #ifdef FAKEBKGTOOLS_ATLAS_ENVIRONMENT
1015 float energy_scale = (
m_useGeV? 0.001f : 1.f);
1017 float energy_scale = 1;
1023 if(param.
name==
"pt")
val.as_float = energy_scale *
p.pt();
1024 else if(param.
name==
"eta")
val.as_float =
p.eta();
1025 else if(param.
name==
"|eta|")
val.as_float = fabs(
p.eta());
1026 else if(param.
name==
"phi")
val.as_float =
p.phi();
1043 val.as_float =
acc(eventInfo);
1047 val.as_int =
acc(eventInfo);
1086 std::bitset<N_EFFICIENCY_TYPES>
result;
1089 result[ELECTRON_REAL_EFFICIENCY] =
true;
1090 result[MUON_REAL_EFFICIENCY] =
true;
1091 result[TAU_REAL_EFFICIENCY] =
true;
1092 result[ELECTRON_FAKE_EFFICIENCY] =
true;
1093 result[MUON_FAKE_EFFICIENCY] =
true;
1094 result[TAU_FAKE_EFFICIENCY] =
true;
1098 result[ELECTRON_FAKE_FACTOR] =
true;
1099 result[MUON_FAKE_FACTOR] =
true;
1100 result[TAU_FAKE_FACTOR] =
true;
1104 result[PHOTON_ELE_FAKE_FACTOR] =
true;
1105 result[PHOTON_ELE_FAKE_FACTOR_SF] =
true;
1107 if(
result.none())
throw(
GenericError() <<
"unrecognized client type, implementation incomplete");
1113 auto tables =
m_tables.find(wantedType);
1168 return std::string(
beg,
end+1);