ATLAS Offline Software
Loading...
Searching...
No Matches
physvalPostProcessingTools.py
Go to the documentation of this file.
1# Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
2
3import ROOT
4import yaml
5import logging
6
7crash_on_error = False
8
9
10def find_histo(root_file, path, should_delete=False):
11 obj = root_file.Get(path)
12 if not obj:
13 if crash_on_error:
14 raise RuntimeError(f"Object not found at path: {path}")
15 else:
16 logging.warning(f"Object not found at path: {path}")
17 return None
18 if should_delete:
19 # Extract directory and name for deletion
20 path_parts = path.strip("/").split("/")
21 *dirs, name = path_parts
22 current_dir = root_file
23 for d in dirs:
24 current_dir = current_dir.Get(d)
25 if not current_dir:
26 if crash_on_error:
27 raise RuntimeError(f"Directory '{d}' not found in path '{'/'.join(dirs)}' for deletion.")
28 else:
29 logging.warning(f"Directory '{d}' not found in path '{'/'.join(dirs)}' for deletion.")
30 return None
31 # logging.info(f"Deleting '{name}' in '{'/'.join(dirs)}'")
32 current_dir.Delete(f"{name};*")
33 return None
34 return obj
35
36
37def ensure_directory_exists(root_file, path):
38 current = root_file
39 for part in path.strip("/").split("/"):
40 next_dir = current.Get(part)
41 if not next_dir:
42 logging.info(f"Saving directory not exist. Creating directory '{part}' in '{current.GetName()}'")
43 current.mkdir(part)
44 current.cd(part)
45 current = ROOT.gDirectory
46
48 def __init__(self):
49 self.configs = {}
50
51 def add_configuration(self, name, config_file):
52 with open(config_file, "r") as f:
53 config_data = yaml.safe_load(f)
54 self.configs[name] = config_data
55 logging.info(f"Configuration for '{name}' loaded from {config_file}")
56
57 def get_config(self, name):
58 """Returns the config for a specific domain (e.g., 'Electron')"""
59 return self.configs.get(name, {})
60
61 def get_all_configs(self):
62 """Returns all configs as (name, config) pairs"""
63 return self.configs.items()
64
66 def __init__(self, input, output, rebin_factor, delete_original=False):
67 self.input = input
68 self.output = output
69 self.rebin_factor = rebin_factor
70 self.delete_original = delete_original
71
72 def __call__(self, root_file):
73 hist = find_histo(root_file, self.input)
74 if not hist:
75 if crash_on_error:
76 raise RuntimeError(f"Histogram '{self.input}' not found for rebinning.")
77 else:
78 logging.error(f"Histogram '{self.input}' not found for rebinning.")
79 return
80
81 path_parts = self.output.strip("/").split("/")
82 *dir_parts, output_name = path_parts
83 directory_path = "/".join(dir_parts)
84 ensure_directory_exists(root_file, directory_path)
85 root_file.cd(directory_path)
86
87 if hist.GetDimension() == 1:
88 rebinned = hist.Rebin(self.rebin_factor, output_name)
89 rebinned.Write(output_name, ROOT.TObject.kOverwrite)
90 logging.info(f"Rebinned 1D histogram and saved as '{output_name}' in '{directory_path}'.")
91 elif hist.GetDimension() == 2:
92 hist.RebinX(self.rebin_factor)
93 hist.RebinY(self.rebin_factor)
94 hist.SetName(output_name)
95 hist.Write(output_name, ROOT.TObject.kOverwrite)
96 logging.info(f"Rebinned 2D histogram and saved as '{output_name}' in '{directory_path}'.")
97 if self.delete_original:
98 find_histo(root_file, self.input, should_delete=True)
99
100 @staticmethod
101 def from_yaml(fragment):
102 return RebinningOperation(
103 input=fragment["input"],
104 output=fragment["output"],
105 rebin_factor=fragment["rebin"],
106 delete_original=fragment.get("delete_original", False)
107 )
108
110 def __init__(self, numerator, denominator, output):
111 self.numerator = numerator
112 self.denominator = denominator
113 self.output = output
114
115 def __call__(self, root_file):
116 num = find_histo(root_file, self.numerator)
117 den = find_histo(root_file, self.denominator)
118 if not num or not den:
119 if crash_on_error:
120 raise RuntimeError(f"Missing histogram: {self.numerator} or {self.denominator}")
121 else:
122 logging.error(f"Missing histogram: {self.numerator} or {self.denominator}")
123 return
124
125 if not ROOT.TEfficiency.CheckConsistency(num, den):
126 logging.warning("Inconsistency detected between numerator and denominator histograms.")
127 logging.info("Attempting to fix...")
128 for i in range(1, num.GetNbinsX() + 1):
129 num_val = num.GetBinContent(i)
130 den_val = den.GetBinContent(i)
131 if den_val == 0:
132 den.SetBinContent(i, 1e-6)
133 den_val = 1e-6 # avoid zero denominator by assigning a tiny epsilon
134 logging.info(f"Setting denominator to a smaller value for bin {i} as it is zero. Any other options?")
135 if num_val > den_val:
136 logging.info(f"For Bin {i}: Num ({num_val}) > Den ({den_val})! Adjusting by setting Den to Num. Any other options?")
137 den.SetBinContent(i, num_val)
138 if not ROOT.TEfficiency.CheckConsistency(num, den):
139 logging.error("Unable to fix histogram inconsistencies. Aborting efficiency calculation.")
140 return
141
142 eff = ROOT.TEfficiency(num, den)
143 path_parts = self.output.strip("/").split("/")
144 *dir_path, obj_name = path_parts
145 directory_path = "/".join(dir_path)
146 eff.SetName(obj_name)
147 eff.SetTitle(f"{self.numerator}/{self.denominator};{num.GetXaxis().GetTitle()};Efficiency")
148 ensure_directory_exists(root_file, directory_path)
149 root_file.cd(directory_path)
150 eff.Write()
151 logging.info(f"Saved efficiency '{obj_name}' in '{directory_path}'.")
152
153 @staticmethod
154 def from_yaml(fragment):
156 numerator=fragment["numerator"],
157 denominator=fragment["denominator"],
158 output=fragment["output"]
159 )
160
162 # Computes the s68 and s90 resolution widths for a histogram.
163 def __init__(self, histogram_name):
164 self.histogram_name = histogram_name
165
166 def __call__(self, root_file):
167 hist = find_histo(root_file, self.histogram_name)
168 if not hist:
169 if crash_on_error:
170 raise RuntimeError(f"Histogram '{self.histogram_name}' not found.")
171 else:
172 logging.error(f"Histogram '{self.histogram_name}' not found.")
173 return
174
175 # Step 1: Compute normalized cumulative sum
176 cum = [hist.GetBinContent(i) for i in range(0, hist.GetNbinsX() + 2)]
177 for i in range(1, len(cum)):
178 cum[i] += cum[i - 1]
179 total = cum[-1] or 1.0
180 cum = [x / total for x in cum]
181
182 # Step 2: Compute smallest intervals
183 s68 = self._compute_s(hist, cum, 0.683)
184 s90 = self._compute_s(hist, cum, 0.90)
185
186 logging.info(f"Resolution widths for {self.histogram_name} -> s68: {s68:.4f}, s90: {s90:.4f}")
187 logging.info("Todo: Take a 2D histo, take the quantity say xaxis, make its small intervals %s", "and then compute the Resolution widths and then save as a 1D histo with quantity on xaxis and Resolution widths on yaxis")
188
189 def _compute_s(self, hist, cum, prob):
190 # Compute the smallest interval that contains the given probability.
191 min_width = float("inf")
192 best = (0, 0)
193 for i in range(len(cum)):
194 target = cum[i] + prob
195 if target > 1:
196 break
197 for j in range(i + 1, len(cum)):
198 if cum[j] >= target:
199 width = hist.GetBinCenter(j) - hist.GetBinCenter(i)
200 if width < min_width:
201 min_width = width
202 best = (i, j)
203 break
204 return 0.5 * (hist.GetBinCenter(best[1]) - hist.GetBinCenter(best[0]))
205
206 @staticmethod
207 def from_yaml(fragment):
209 histogram_name=fragment["histogram"]
210 )
211
213 def __init__(self, signal_path, background_path, output):
214 self.signal_path = signal_path
215 self.background_path = background_path
216 self.output = output
217
218 def __call__(self, root_file):
219 sig = find_histo(root_file, self.signal_path)
220 bkg = find_histo(root_file, self.background_path)
221
222 if not sig or not bkg:
223 if crash_on_error:
224 raise RuntimeError(f"Missing histogram: {self.signal_path} or {self.background_path}")
225 else:
226 logging.error(f"Missing histogram: {self.signal_path} or {self.background_path}")
227 return
228
229 graph = self.compute_roc(sig, bkg)
230 if not graph:
231 logging.error("ROC graph is empty or could not be constructed.")
232 return
233
234 path_parts = self.output.strip("/").split("/")
235 *dir_path, obj_name = path_parts
236 directory_path = "/".join(dir_path)
237
238 graph.SetName(obj_name)
239 graph.SetTitle(f"{obj_name};Signal Efficiency;Background Rejection")
240
241 ensure_directory_exists(root_file, directory_path)
242 root_file.cd(directory_path)
243 graph.Write()
244 logging.info(f"Saved ROC curve '{obj_name}' in '{directory_path}'.")
245
246 def compute_roc(self, sig, bkg):
247 n_bins = sig.GetNbinsX()
248 total_sig = sig.Integral()
249 total_bkg = bkg.Integral()
250
251 x_vals, y_vals = [], []
252 if total_sig == 0 or total_bkg == 0:
253 logging.error("Zero total signal or background counts; cannot compute ROC.")
254 return None
255
256 for cut_bin in range(1, n_bins + 1): # Loops over histogram bins, each corresponding to a cut value on the tagger output.
257 sig_pass = sig.Integral(cut_bin, n_bins)
258 bkg_pass = bkg.Integral(cut_bin, n_bins)
259
260 eff = sig_pass / total_sig # Signal efficiency eff = (number of b-jets passing cut) / (total b-jets).
261
262 if bkg_pass <= 0:
263 # logging.info(f"Skipping cut bin {cut_bin} due to zero or negative background pass count: {bkg_pass}. Any other options?")
264 continue
265
266 rej = total_bkg / bkg_pass # Background rejection rej = (total background jets) / (number of background jets passing cut).
267
268 x_vals.append(eff)
269 y_vals.append(rej)
270
271 if not x_vals:
272 return None
273
274 graph = ROOT.TGraphErrors(len(x_vals))
275 for i, (x, y) in enumerate(zip(x_vals, y_vals)):
276 graph.SetPoint(i, x, y)
277
278 return graph
279
280 @staticmethod
281 def from_yaml(fragment):
282 return ROCCurveComputation(
283 signal_path=fragment["signal"],
284 background_path=fragment["background"],
285 output=fragment["output"]
286 )
287
289 def __init__(self, input_hist, bins_x, bins_y, projection_axis, output=None):
290 self.input_hist = input_hist
291 self.bins_x = bins_x
292 self.bins_y = bins_y
293 self.x_proj = projection_axis == "x"
294 self.y_proj = projection_axis == "y"
295 self.output = output
296
297 def __call__(self, root_file):
298 hist = find_histo(root_file, self.input_hist)
299 if not hist:
300 if crash_on_error:
301 raise RuntimeError(f"Histogram '{self.input_hist}' not found.")
302 else:
303 logging.error(f"Histogram '{self.input_hist}' not found.")
304 return
305
306 b1, b2 = hist.GetYaxis().FindBin(self.bins_x[0]), hist.GetYaxis().FindBin(self.bins_x[1])
307 b3, b4 = hist.GetXaxis().FindBin(self.bins_y[0]), hist.GetXaxis().FindBin(self.bins_y[1])
308
309 path_parts = self.output.strip("/").split("/")
310 *dir_path, output_name = path_parts
311 directory_path = "/".join(dir_path)
312 ensure_directory_exists(root_file, directory_path)
313 root_file.cd(directory_path)
314
315 if self.x_proj:
316 proj_x = hist.ProjectionX(output_name, b1, b2).Clone(output_name)
317 self.project(proj_x, hist.GetXaxis().GetTitle(), "X")
318 if self.y_proj:
319 proj_y = hist.ProjectionY(output_name, b3, b4).Clone(output_name)
320 self.project(proj_y, hist.GetYaxis().GetTitle(), "Y")
321
322 def project(self, projection, axis_title, axis):
323 projection.SetTitle(f";{axis_title};Entries")
324 projection.SetStats(False)
325 projection.Write()
326 logging.info(f"Saved projection by integrating over {axis} axis as '{projection.GetName()}'.")
327
328 @staticmethod
329 def from_yaml(fragment):
331 input_hist=fragment["hist"],
332 bins_x=fragment.get("bins_projection_x", [10, 200]),
333 bins_y=fragment.get("bins_projection_y", [-2.47, 2.47]),
334 projection_axis=fragment.get("projection_axis", "x"),
335 output=fragment.get("output")
336 )
337
339 def __init__(self, histo1, histo2, output, delete_inputs=False):
340 self.histo1 = histo1
341 self.histo2 = histo2
342 self.output = output
343 self.delete_inputs = delete_inputs
344
345 def __call__(self, root_file):
346 histo1 = find_histo(root_file, self.histo1)
347 histo2 = find_histo(root_file, self.histo2)
348 if not histo1 or not histo2:
349 if crash_on_error:
350 raise RuntimeError(f"Missing histograms: {self.histo1} or {self.histo2}")
351 else:
352 logging.error(f"Missing histograms: {self.histo1} or {self.histo2}")
353 return
354
355 path_parts = self.output.strip("/").split("/")
356 *dir_path, output_name = path_parts
357 directory_path = "/".join(dir_path)
358 summed = histo1.Clone(output_name)
359 summed.Add(histo2)
360 ensure_directory_exists(root_file, directory_path)
361 root_file.cd(directory_path)
362 summed.Write(output_name, ROOT.TObject.kOverwrite)
363 logging.info(f"Saved added histogram '{output_name}' in '{directory_path}'.")
364 if self.delete_inputs:
365 # logging.info(f"Deleting input histograms '{self.histo1}' and '{self.histo2}' after addition.")
366 find_histo(root_file, self.histo1, should_delete=True)
367 find_histo(root_file, self.histo2, should_delete=True)
368
369 @staticmethod
370 def from_yaml(fragment):
371 return HistogramAddition(
372 histo1=fragment["histo1_name"],
373 histo2=fragment["histo2_name"],
374 output=fragment["output"],
375 delete_inputs=fragment.get("delete_inputs", False)
376 )
377
379 def __init__(self, input_hist, new_path, x_label=None, y_label=None, xlow=None, xhigh=None):
380 self.input_hist = input_hist
381 self.new_path = new_path
382 self.x_label = x_label
383 self.y_label = y_label
384 self.xlow = xlow
385 self.xhigh = xhigh
386
387 def __call__(self, root_file):
388 ROOT.TH1.AddDirectory(False)
389 hist = find_histo(root_file, self.input_hist)
390 if not hist:
391 if crash_on_error:
392 raise RuntimeError(f"Histogram '{self.input_hist}' not found.")
393 else:
394 logging.error(f"Histogram '{self.input_hist}' not found.")
395 return
396
397 path_parts = self.new_path.strip("/").split("/")
398 *dir_path, new_name = path_parts
399 directory_path = "/".join(dir_path)
400 ensure_directory_exists(root_file, directory_path)
401 root_file.cd(directory_path)
402
403 if self.xlow is None or self.xhigh is None:
404 h_out = hist.Clone(new_name)
405 else:
406 ax = hist.GetXaxis()
407 bin_low = max(1, min(ax.FindBin(self.xlow), ax.GetNbins()))
408 bin_high = max(1, min(ax.FindBin(self.xhigh), ax.GetNbins()))
409 nbins = bin_high - bin_low
410 h_out = ROOT.TH1D(new_name, hist.GetTitle(), nbins + 1, float(self.xlow), float(self.xhigh))
411 for i in range(nbins + 1):
412 h_out.SetBinContent(i + 1, hist.GetBinContent(i + bin_low))
413
414 if self.x_label:
415 h_out.GetXaxis().SetTitle(self.x_label)
416 if self.y_label:
417 h_out.GetYaxis().SetTitle(self.y_label)
418 h_out.Write(new_name, ROOT.TObject.kOverwrite)
419 logging.info(f"Renamed and updated histogram '{self.input_hist}' → '{self.new_path}' in '{directory_path}' | X-axis: '{self.x_label}' | Y-axis: '{self.y_label}' | Range: ({self.xlow}, {self.xhigh})")
420
421 @staticmethod
422 def from_yaml(fragment):
423 return HistogramAdjustment(
424 input_hist=fragment["hist"],
425 new_path=fragment.get("final_histo_name", fragment["hist"]),
426 x_label=fragment.get("x_axis_label"),
427 y_label=fragment.get("y_axis_label"),
428 xlow=fragment.get("xlow"),
429 xhigh=fragment.get("xhigh")
430 )
431
433 def __init__(self, toDelete):
434 self.toDelete = toDelete
435
436 def __call__(self, root_file):
437 find_histo(root_file, self.toDelete, should_delete=True)
438
439 @staticmethod
440 def from_yaml(fragment):
441 return HistogramBlackList(
442 toDelete=fragment["toDelete"]
443 )
#define min(a, b)
Definition cfImp.cxx:40
#define max(a, b)
Definition cfImp.cxx:41
__init__(self, histo1, histo2, output, delete_inputs=False)
__init__(self, input_hist, new_path, x_label=None, y_label=None, xlow=None, xhigh=None)
__init__(self, input_hist, bins_x, bins_y, projection_axis, output=None)
__init__(self, input, output, rebin_factor, delete_original=False)
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition hcg.cxx:130
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
ensure_directory_exists(root_file, path)
— HELPER — ###
find_histo(root_file, path, should_delete=False)
— HELPER — ###