16 #include <dqm_core/AlgorithmConfig.h>
17 #include <dqm_core/AlgorithmManager.h>
53 const TObject &
object,
54 const dqm_core::AlgorithmConfig &
config )
59 if (
object.
IsA()->InheritsFrom(
"TH2") ) {
60 throw dqm_core::BadConfig( ERS_HERE ,
name ,
" dimension > 1 ");
64 if (
object.
IsA()->InheritsFrom(
"TH1") ) {
67 throw dqm_core::BadConfig( ERS_HERE ,
name ,
" does not inherit from TH1");
76 std::ostringstream
msg;
77 msg <<
" Parameter \"SkipBinModulo\" is out of bounds. Value given = " << skipmod;
78 msg <<
"\n Range of values acceptable: integer >= 1";
79 msg <<
"\n Default Value: skip none. Do not set \"SkipBinModulo\" in your config to use default";
80 throw dqm_core::BadConfig(ERS_HERE,
name,
msg.str());
85 if (signL < 0.01 || signL > 0.95) {
86 std::ostringstream
msg;
87 msg <<
" Parameter \"Significance\" is out of bounds. Value given = " << signL;
88 msg <<
"\n Range of values acceptable: floating point number [0.01, 0.95]";
89 msg <<
"\n Default Value: 0.04. Do not set \"Significance\" in your config to use default";
90 throw dqm_core::BadConfig(ERS_HERE,
name,
msg.str());
96 std::ostringstream
msg;
97 msg <<
" Parameter \"MinStatPerBin\" is out of bounds. Value given = " << signL;
98 msg <<
"\n Range of values acceptable: integer >= 0";
99 msg <<
"\n Default Value: 10. Do not set \"MinStatPerBin\" in your config to use default";
100 throw dqm_core::BadConfig(ERS_HERE,
name,
msg.str());
105 const double notFound = -99999;
108 const int minbin = (
xmin == notFound) ? 1 :
histogram->GetXaxis()->FindBin(
xmin);
110 ERS_DEBUG(1,
"xmin = " <<
xmin <<
", xmax = " <<
xmax <<
" , minbin = " << minbin <<
", maxbin = " << maxbin);
114 ERS_DEBUG(1,
"useMinStat = " << useMinStat);
122 }
catch ( dqm_core::Exception & ex ) {
123 throw dqm_core::BadConfig(ERS_HERE,
name,
"Paramter: 'Threshold' is mandatory, cannot continue");
126 if ( grThr > 1.0 || reThr > 1.0) {
127 throw dqm_core::BadConfig(ERS_HERE,m_name,
"Configuration Error: Threshold>100%");
132 std::vector<float> binCountVec;
133 std::vector<size_t> binIndices;
136 for(
int k = minbin;
k < maxbin;
k++) {
138 if (
k % skipmod == 0) {}
141 binCountVec.push_back(binval);
142 if((binval <= minStat) && useMinStat) {
143 binIndices.push_back(binc);
150 int minOutliers = binIndices.size();
151 int maxOutliers, nOutliers = minOutliers;
152 int nbinsDenominator = binCountVec.size();
156 std::ostringstream
msg;
159 maxOutliers =
GrubbsTest(binCountVec,binIndices,signL);
162 msg<<
"maxOutliers = " << maxOutliers <<
"\t bincountVecsize = " << nbinsDenominator <<
"\t";
163 if(binIndices.size() <= binCountVec.size())
164 for(
size_t g= 0;
g < binIndices.size();
g++)
165 msg << binIndices[
g] <<
":" << binCountVec[binIndices[
g]] <<
"\n";
166 ERS_DEBUG(1,
msg.str());
168 }
catch ( dqm_core::Exception & ex ) {
169 msg <<
" Exception caught in GrubbsTest \n";
170 throw dqm_core::BadConfig(ERS_HERE,
name,
msg.str());
174 if(maxOutliers >= 0) {
175 nOutliers += (useMinStat ? maxOutliers : 0);
177 result->tags_.insert(std::make_pair(
"NumOfOutliers",nOutliers));
180 float totalBins = nbinsDenominator > 0 ? nbinsDenominator :
histogram->GetNbinsX();
181 float outlierFraction = 0.;
183 bool IsInf = isinf(
float(nOutliers) / totalBins);
184 bool IsNaN = isnan(
float(nOutliers) / totalBins);
186 bool IsInf = std::isinf(
float(nOutliers) / totalBins);
187 bool IsNaN = std::isnan(
float(nOutliers) / totalBins);
190 outlierFraction =
float(nOutliers) / totalBins;
192 result->tags_.insert(std::make_pair(
"OutlierFraction",outlierFraction));
194 msg <<
"Outlier fraction = "<< nOutliers <<
"/" << totalBins <<
" = " << outlierFraction;
195 ERS_DEBUG(1,
msg.str());
201 if ( outlierFraction>reThr )
203 ERS_DEBUG(1,
"[RED] Result : "<<outlierFraction);
207 else if ( outlierFraction > grThr )
209 ERS_DEBUG(1,
"[YELLOW] Result : "<<outlierFraction);
210 result->status_=dqm_core::Result::Yellow;
214 if ( outlierFraction < reThr )
216 ERS_DEBUG(1,
"[RED] Result : "<<outlierFraction);
220 else if ( outlierFraction < grThr )
222 ERS_DEBUG(1,
"[YELLOW] Result : "<<outlierFraction);
223 result->status_=dqm_core::Result::Yellow;
229 ERS_DEBUG(1,
"[GREEN] Result");
245 if(
vec.empty())
return -1;
247 size_t nItems =
vec.size();
248 if(
vec.size() < 2)
return -2;
250 float epsilon = 1.e-5;
251 if(signLevel > 1.0)
return -3;
253 if((signLevel-0.001) < epsilon)
return -4;
255 bool notEmpty = !binIndices.empty();
259 for(
size_t j = 0; j < nItems; j++)
mean +=
vec[j];
264 for(
size_t j = 0; j < nItems; j++) {
271 float zval = 0., tval = 0.,
prob = 0.;
274 for(
size_t j = 0; j < nItems; j++) {
280 tval = std::sqrt(nItems*(nItems-2)*(zval*zval)/((nItems-1)*(nItems-1)-nItems*(zval*zval)));
284 prob = 2*(1. - TMath::StudentI(tval,nItems-2));
287 if(
prob < signLevel) {
290 for(
size_t k = 0;
k < binIndices.size();
k++) {
291 if(binIndices[
k] == j) { storeMe =
false;
break; }
294 binIndices.push_back(j);
298 binIndices.push_back(j);
311 out <<
"-------------- Grubbs' Test for Outliers ----------------------------------------------------" << std::endl;
312 out <<
"Identify outlier bins in a 1d histogram. Given N = {n1, n2, n3...} where n1 = frequency in 1st bin etc..\n";
313 out <<
"Define test statistic: Z_i = |n_i - mean| / sigma, where mean = sum(n_i)/sizeof(N) and sigma = standard deviation of N\n";
314 out <<
"For each Z_i calculate the critical value (tval) of the test-statistic and find out its corresponding p-value. if p-value\n";
315 out <<
"p-value is calculated using \"two-sided t-distribution\" pdf with sizeof(N)-2 degrees of freedom.\n";
316 out <<
"If p-value <= significance, reject null hypothesis (H_0 = no outliers).\n";
317 out <<
"----------------------------------------------------------------------------------------------" << std::endl;
318 out <<
"Parameters : IsMinStatOutlier, Min, Max, MinStatPerBin, Significance, SkipBinModulo\n";
319 out <<
"----------------------------------------------------------------------------------------------" << std::endl;
320 out <<
"IsMinStatOutlier: If bin contents are less than MinStatPerBin, is it an outlier? [type = bool] [values = 0, 1] [ default: 1]\n";
321 out <<
"Min, Max : Process bin numbers corresponding to Min, Max (on x-axis). [type = float] [default: all bins]\n";
322 out <<
"MinStatPerBin : Min number of entries per bin in order to be considered for testing. [type = integer] [values >= 0] [default = 9]\n";
323 out <<
"Significance : Null hypothesis (no outliers) is rejected if critical value > significance [type = float] [range = [0.04, 0.99], default = 0.04]\n";
324 out <<
"SkipBinModulo : Skip every N'th bin. [type = integer] [values = [0, #of bins in 1d hist], default: skip none]\n";
325 out <<
"----------------------------------------------------------------------------------------------" << std::endl;