59{
60 namespace po = boost::program_options;
61 po::options_description poDescription("Common options");
62 poDescription.add_options()
63 ("help", "produce help message")
64 ("require-same-branches", "require both trees to have the same branches")
65 ("tree-name", po::value<std::string>()->required(), "tree name")
66 (
"reference-file", po::value<std::string>()->
required(),
"reference file(s), wildcards supported")
67 ("test-file", po::value<std::string>()->required(), "test file(s), wildcards supported")
68 ("branch-name", po::value<std::string>(), "base branch name (optional)");
69
70 po::options_description poDescriptionAdvanced("Advanced options");
71 poDescriptionAdvanced.add_options()
72 ("scale", "scale histograms that both have the same event count")
73 ("rebin", "do smart rebinning")
74 ("benchmark", "benchmark the code")
75 ("verbose", "verbose logging");
76
77 po::options_description poDescriptionAll;
78 poDescriptionAll.add(poDescription).add(poDescriptionAdvanced);
79
80 po::positional_options_description poPositionalOptions;
81 poPositionalOptions.add("tree-name", 1);
82 poPositionalOptions.add("reference-file", 1);
83 poPositionalOptions.add("test-file", 1);
84 poPositionalOptions.add("branch-name", 1);
85
86 po::variables_map poVariablesMap;
87 po::store(po::command_line_parser(argc, argv)
89 .positional(poPositionalOptions).
run(),
90 poVariablesMap);
91
92 if (poVariablesMap.count("help"))
93 {
94 std::cout << "Usage: compareFlatTrees [OPTION] tree-name reference-file test-file [branch-name]" << std::endl;
95 std::cout << poDescriptionAll << std::endl;
96 return 0;
97 }
98
99 po::notify(poVariablesMap);
100
101
102 std::string
treeName = poVariablesMap[
"tree-name"].as<std::string>();
104 std::replace( treeNameOut.begin(), treeNameOut.end(), '/', '_');
105 std::string referenceInput = poVariablesMap["reference-file"].as<std::string>();
106 std::string testInput = poVariablesMap["test-file"].as<std::string>();
107 std::string baseBranchName;
108 std::string outputPDF;
109 if (poVariablesMap.count("branch-name") > 0)
110 {
111 baseBranchName = poVariablesMap["branch-name"].as<std::string>();
112 outputPDF = "comparison_" + treeNameOut + "_" + baseBranchName + ".pdf";
113 }
114 else
115 {
116 outputPDF = "comparison_" + treeNameOut + ".pdf";
117 }
118
119 bool scale = poVariablesMap.count(
"scale") > 0;
120 bool rebin = poVariablesMap.count(
"rebin") > 0;
121 bool benchmark = poVariablesMap.count("benchmark") > 0;
122 bool verbose = poVariablesMap.count(
"verbose") > 0;
123
124
125
126
127
128 gROOT->SetBatch(kTRUE);
129
131 {
133 }
134
135 gStyle->SetOptStat(0);
136
137 gStyle->SetEndErrorSize(0);
138
139 ROOT::EnableImplicitMT(4);
140
141
142 ROOT::RDataFrame dataFrameRefr(treeName, referenceInput);
143 ROOT::RDataFrame dataFrameTest(treeName, testInput);
144
145
146 auto eventsRefr{dataFrameRefr.Count()};
147 auto eventsTest{dataFrameTest.Count()};
148 double eventRatio{static_cast<float>(eventsRefr.GetValue()) / static_cast<float>(eventsTest.GetValue())};
149
150
151 auto colNamesRefr = dataFrameRefr.GetColumnNames();
152 auto colNamesTest = dataFrameTest.GetColumnNames();
153 auto colNames =
intersection(colNamesRefr, colNamesTest);
154
155 bool checkMissColumns = false;
156 std::vector<std::string> missColNamesRefr, missColNamesTest;
157 if (poVariablesMap.count("require-same-branches"))
158 {
159 checkMissColumns = true;
160 missColNamesRefr =
remainder (colNamesRefr, colNames);
161 missColNamesTest =
remainder (colNamesTest, colNames);
162 }
163
164
165 std::vector<std::string> requiredColumns;
166 std::cout << "Will attempt to plot the following columns:" << std::endl;
167 for (auto &&colName : colNames)
168 {
169 if ((baseBranchName.empty() || colName.find(baseBranchName) != std::string::npos) &&
170 (colName.find("Link") == std::string::npos) &&
171 (colName.find("m_persIndex") == std::string::npos) &&
172 (colName.find("m_persKey") == std::string::npos) &&
173 (colName.find("Parent") == std::string::npos) &&
174 (colName.find("original") == std::string::npos) &&
175 (colName.find("EventInfoAuxDyn.detDescrTags") == std::string::npos) &&
176 (dataFrameRefr.GetColumnType(colName).find("xAOD") == std::string::npos) &&
177 (dataFrameRefr.GetColumnType(colName) != "ROOT::VecOps::RVec<string>") &&
178 (!dataFrameRefr.GetColumnType(colName).starts_with("ROOT::VecOps::RVec<pair")) &&
179 (dataFrameRefr.GetColumnType(colName).find("vector") == std::string::npos))
180 {
181 requiredColumns.push_back(colName);
182 std::cout << " " << colName << " " << dataFrameRefr.GetColumnType(colName) << std::endl;
183 }
184 }
185
186
188 size_t nBinsU =
static_cast<size_t>(
nBins);
189
190
191
192 bool fileOpen{};
194 size_t failedCount{};
195 std::chrono::seconds totalDuration{};
196 std::unordered_map<std::string, ROOT::RDF::RResultPtr<double>> mapMinValuesRefr;
197 std::unordered_map<std::string, ROOT::RDF::RResultPtr<double>> mapMinValuesTest;
198 std::unordered_map<std::string, ROOT::RDF::RResultPtr<double>> mapMaxValuesRefr;
199 std::unordered_map<std::string, ROOT::RDF::RResultPtr<double>> mapMaxValuesTest;
200 std::unordered_map<std::string, ROOT::RDF::RResultPtr<TH1D>> mapHistRefr;
201 std::unordered_map<std::string, ROOT::RDF::RResultPtr<TH1D>> mapHistTest;
202
203 std::cout << "Preparing ranges..." << std::endl;
204 for (const std::string &colName : requiredColumns)
205 {
206 mapMinValuesRefr.emplace(colName, dataFrameRefr.Min(colName));
207 mapMinValuesTest.emplace(colName, dataFrameTest.Min(colName));
208 mapMaxValuesRefr.emplace(colName, dataFrameRefr.Max(colName));
209 mapMaxValuesTest.emplace(colName, dataFrameTest.Max(colName));
210 }
211
212 std::cout << "Preparing histograms..." << std::endl;
213 auto start = std::chrono::high_resolution_clock::now();
214 for (auto it = requiredColumns.begin(); it != requiredColumns.end();)
215 {
216 const std::string &colName = *
it;
217 const char* colNameCh = colName.c_str();
218
219
220 double min = std::min(mapMinValuesRefr[colName].GetValue(), mapMinValuesTest[colName].GetValue());
221 double max = std::max(mapMaxValuesRefr[colName].GetValue(), mapMaxValuesTest[colName].GetValue()) * 1.02;
222 if (std::isinf(
min) || std::isinf(
max))
223 {
224 std::cout << " skipping " << colName << " ..." << std::endl;
225 it = requiredColumns.erase(it);
226 continue;
227 } else {
229 }
230
231 if (
max > 250e3 &&
min > 0.0)
232 {
234 }
235
236
237 mapHistRefr.emplace(colName, dataFrameRefr.Histo1D({colNameCh, colNameCh, nBins, min, max}, colNameCh));
238 mapHistTest.emplace(colName, dataFrameTest.Histo1D({colNameCh, colNameCh, nBins, min, max}, colNameCh));
239 }
240 auto stop = std::chrono::high_resolution_clock::now();
241 auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
243 if (benchmark)
244 {
245 std::cout <<
" Time for this step: " <<
duration.count() <<
" seconds " << std::endl;
246 std::cout << " Elapsed time: " << totalDuration.count() << " seconds (" << std::chrono::duration_cast<std::chrono::minutes>(totalDuration).count() << " minutes)" << std::endl;
247 }
248
249 if (rebin)
250 {
251 std::cout << "Rebinning histograms..." << std::endl;
252 auto start = std::chrono::high_resolution_clock::now();
253 for (const std::string &colName : requiredColumns)
254 {
255 const char* colNameCh = colName.c_str();
256 auto &histRefr = mapHistRefr[colName];
257 auto &histTest = mapHistTest[colName];
258
259
260 double min = std::min(mapMinValuesRefr[colName].GetValue(), mapMinValuesTest[colName].GetValue());
261 double max = std::max(mapMaxValuesRefr[colName].GetValue(), mapMaxValuesTest[colName].GetValue()) * 1.02;
262 if (
max > 250e3 &&
min > 0.0)
263 {
265 }
266
267
268
269 bool rangeSatisfactory{};
270 size_t rangeItrCntr{};
271 while (!rangeSatisfactory && rangeItrCntr < 10)
272 {
273 ++rangeItrCntr;
275 {
276 std::cout << std::endl
277 << " Range tuning... iteration number " << rangeItrCntr << std::endl;
278 }
279 double entriesFirstBin = histRefr.GetPtr()->GetBinContent(1);
280 double entriesLastBin = histRefr.GetPtr()->GetBinContent(nBins);
281 double entriesOtherBins{};
282 for (
size_t i{2};
i < nBinsU; ++
i)
283 {
284 entriesOtherBins += histRefr.GetPtr()->GetBinContent(i);
285 }
286 bool firstBinOK{((entriesOtherBins + entriesLastBin) / entriesFirstBin > 0.001f)};
287 bool lastBinOK{((entriesOtherBins + entriesFirstBin) / entriesLastBin > 0.001f)};
288 rangeSatisfactory = ((firstBinOK && lastBinOK) || entriesOtherBins == 0.0f);
289 if (!rangeSatisfactory)
290 {
292 {
293 std::cout <<
"Min " <<
min << std::endl;
294 std::cout <<
"Max " <<
max << std::endl;
295 std::cout << "1st " << entriesFirstBin << std::endl;
296 std::cout << "Mid " << entriesOtherBins << std::endl;
297 std::cout << "End " << entriesLastBin << std::endl;
298 std::cout << "R/F " << (entriesOtherBins + entriesLastBin) / entriesFirstBin << std::endl;
299 std::cout << "R/L " << (entriesOtherBins + entriesFirstBin) / entriesLastBin << std::endl;
300 }
301 if (!firstBinOK)
302 {
303 max = (
max -
min) /
static_cast<double>(nBins);
304 histRefr = dataFrameRefr.Histo1D({colNameCh, colNameCh,
nBins,
min,
max}, colNameCh);
305 histTest = dataFrameTest.Histo1D({colNameCh, colNameCh,
nBins,
min,
max}, colNameCh);
306 }
307 if (!lastBinOK)
308 {
309 min =
max * (1.0f - (1.0f /
static_cast<double>(
nBins)));
310 histRefr = dataFrameRefr.Histo1D({colNameCh, colNameCh,
nBins,
min,
max}, colNameCh);
311 histTest = dataFrameTest.Histo1D({colNameCh, colNameCh,
nBins,
min,
max}, colNameCh);
312 }
313 }
314 }
315 }
316
317 auto stop = std::chrono::high_resolution_clock::now();
318 auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
320 if (benchmark)
321 {
322 std::cout <<
" Time for this step: " <<
duration.count() <<
" seconds " << std::endl;
323 std::cout << " Elapsed time: " << totalDuration.count() << " seconds (" << std::chrono::duration_cast<std::chrono::minutes>(totalDuration).count() << " minutes)" << std::endl;
324 }
325 }
326
327 std::cout << "Running comparisons..." << std::endl;
328 start = std::chrono::high_resolution_clock::now();
329
330
331 std::unique_ptr<TCanvas> lastCanvas;
332
333 for (const std::string &colName : requiredColumns)
334 {
336
337 std::cout <<
"Processing column " <<
counter <<
" of " << requiredColumns.size() <<
" : " << colName <<
" ... ";
338
339 auto h1 = mapHistRefr[colName].GetPtr();
340 auto h2 = mapHistTest[colName].GetPtr();
341
342 if (scale)
343 {
344 h2->Scale(eventRatio);
345 }
346 h2->SetMarkerStyle(20);
347 h2->SetMarkerSize(0.8);
348
350 {
352 }
353 auto c1 = std::make_unique<TCanvas>();
354 auto rp = std::make_unique<TRatioPlot>(h2, h1);
356 {
358 }
359
360 rp->SetH1DrawOpt(
"PE");
361 rp->SetH2DrawOpt(
"hist");
362 rp->SetGraphDrawOpt(
"PE");
364 rp->GetUpperRefXaxis()->SetTitle(colName.c_str());
365 rp->GetUpperRefYaxis()->SetTitle(
"Count");
366 rp->GetLowerRefYaxis()->SetTitle(
"Test / Ref.");
367 rp->GetLowerRefGraph()->SetMarkerStyle(20);
368 rp->GetLowerRefGraph()->SetMarkerSize(0.8);
369
371 for (
int i{};
i <
rp->GetLowerRefGraph()->GetN();
i++)
372 {
373 if (
rp->GetLowerRefGraph()->GetY()[i] != 1.0)
374 {
376 break;
377 }
378 }
379 if (valid)
380 {
381 std::cout << "PASS" << std::endl;
382 continue;
383 }
384 else
385 {
386 std::cout << "FAILED" << std::endl;
387 ++failedCount;
388 }
389
390 rp->GetLowerRefGraph()->SetMinimum(0.5);
391 rp->GetLowerRefGraph()->SetMaximum(1.5);
392 rp->GetLowYaxis()->SetNdivisions(505);
393
394 if (!fileOpen)
395 {
396
397 c1->Print((outputPDF +
"[").c_str());
398 fileOpen = true;
399 }
400
401 c1->Print(outputPDF.c_str());
403 lastCanvas = std::move(c1);
404 }
405
406 if (fileOpen)
407 {
408
409 lastCanvas->Print((outputPDF + "]").c_str());
410 lastCanvas.reset();
411 }
412
413 stop = std::chrono::high_resolution_clock::now();
414 duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
416 if (benchmark)
417 {
418 std::cout <<
" Time for this step: " <<
duration.count() <<
" seconds " << std::endl;
419 std::cout << " Elapsed time: " << totalDuration.count() << " seconds (" << std::chrono::duration_cast<std::chrono::minutes>(totalDuration).count() << " minutes)" << std::endl;
420 }
421
422 std::cout << "========================" << std::endl;
423 std::cout << "Reference events: " << eventsRefr.GetValue() << std::endl;
424 std::cout << "Test events: " << eventsTest.GetValue() << std::endl;
425 std::cout << "Ratio: " << eventRatio << std::endl;
426 std::cout << "========================" << std::endl;
427 std::cout << "Tested columns: " << requiredColumns.size() << std::endl;
428 std::cout << "Passed: " << requiredColumns.size() - failedCount << std::endl;
429 std::cout << "Failed: " << failedCount << std::endl;
430 std::cout << "========================" << std::endl;
431 if (checkMissColumns)
432 {
433 std::cout << "Columns only in reference: " << missColNamesRefr.size();
434 for (const auto& column : missColNamesRefr)
435 std::cout <<
" " <<
column;
436 std::cout << std::endl;
437 std::cout << "Columns only in test: " << missColNamesTest.size();
438 for (const auto& column : missColNamesTest)
439 std::cout <<
" " <<
column;
440 std::cout << std::endl;
441 failedCount += missColNamesRefr.size() + missColNamesTest.size();
442 std::cout << "========================" << std::endl;
443 }
444
445 if (failedCount)
446 {
447 return 1;
448 }
449
450 return 0;
451}
std::vector< std::string > intersection(std::vector< std::string > &v1, std::vector< std::string > &v2)
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
int run(int argc, char *argv[])