3 __author__ =
'Christopher Bock - LMU'
7 def __init__(self, root_file_name='ISF_HitAnalysispion.root', base_output_folder='RegressionOutput'):
8 """ Most of the important settings are configured/set within this function.
10 TODO: Make these options configurable via an external options file to ease the use of the script.
11 TODO: Make remaining hardcoded options configurable here.
50 "PreSamplerE",
"EME1",
"EME2",
"EME3",
51 "HEC0",
"HEC1",
"HEC2",
"HEC3",
52 "TileBar0",
"TileBar1",
"TileBar2",
53 "TileGap1",
"TileGap2",
"TileGap3",
54 "TileExt0",
"TileExt1",
"TileExt2",
55 "FCAL0",
"FCAL1",
"FCAL2"]
62 from array
import array
80 'Cumulative eta phi energy fraction distributions')
90 """ Uses the supplied histogram to fill the regression tree with values. Depending on self.log_efrac the trees
91 are either filled with the logarithm of the energy fraction or with just the energy fraction.
95 n_bins_x = cumulative_histogram.GetNbinsX()
96 n_bins_y = cumulative_histogram.GetNbinsY()
97 from past.builtins
import xrange
98 for x
in xrange(n_bins_x + 1):
99 eta_value = cumulative_histogram.GetXaxis().GetBinCenter(x)
100 for y
in xrange(n_bins_y + 1):
101 value = cumulative_histogram.GetBinContent(x, y)
105 phi_value = cumulative_histogram.GetYaxis().GetBinCenter(y)
106 r_value = math.sqrt(phi_value*phi_value + eta_value*eta_value)
126 factory = ROOT.TMVA.Factory(self.
mlp_base_file_name, output_file,
"!V:!Silent:!Color:DrawProgressBar")
128 factory.AddVariable(
"d_eta")
129 factory.AddVariable(
"d_phi")
130 factory.AddVariable(
"r")
132 factory.AddTarget(
"energy_fraction")
138 factory.PrepareTrainingAndTestTree(cut,
'nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V')
141 factory.BookMethod(ROOT.TMVA.Types.kMLP,
"MLP",
142 "!H:!V:VarTransform=Norm:NeuronType=%s:NCycles=20000:HiddenLayers=N+%i:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator"
145 factory.TrainAllMethods()
146 factory.TestAllMethods()
147 factory.EvaluateAllMethods()
154 """ This function iterates over all events and calls for each event the process_entry function. The event loop
155 breaks either once process_entry returns false or once all events in the input file have been processed.
159 print(
'--------------------------------------------')
160 print(
'Welcome to ShowerShapeRegressor.py')
163 print(
'--------------------------------------------')
168 chain = ROOT.gDirectory.Get(
'ISF_HitAnalysis/CaloHitAna')
169 entries = chain.GetEntriesFast()
171 for current_entry
in range(entries):
172 print(
' Loading entry: %i' % current_entry)
173 j = chain.LoadTree(current_entry)
178 bites_read = chain.GetEntry(current_entry)
191 """ This processes a single event in the chain. It first checks whether this event should be processed by
192 calling the take_event_into_account function. If this function returns true processing of this event continues
193 else the event will be skipped.
194 Only a single layer (self.selected_layer) will be processed. A histogram will be filled with the energy
195 distribution vs eta and phi. Corrections by the track to calo extrapolator will be applied here. Once all
196 events have been finished the histogram will be divided by its integral.
197 Once the desired number of events (self.cumulated_events_threshold) has been reached the self.create_output()
198 function will be called.
205 num_hits = len(chain.HitX)
207 eta_phi_efrac_dist = ROOT.TH2D(
'eta_phi_efrac_dist_%i' % event_number,
'eta vs phi vs efrac',
211 for i
in range(num_hits):
212 layer_id = chain.HitSampling[i]
217 pos = ROOT.TVector3(chain.HitX[i], chain.HitY[i], chain.HitZ[i])
218 eta = pos.PseudoRapidity()
221 eta_correction = chain.TTC_entrance_eta[0][layer_id]
222 phi_correction = chain.TTC_entrance_phi[0][layer_id]
224 if eta_correction < -900:
226 print(
'no valid eta_correction found')
228 if phi_correction < -900:
230 print(
'no valid phi_correction found')
232 d_eta = eta - eta_correction
235 eta_phi_efrac_dist.Fill(d_eta, d_phi, chain.HitE[i])
239 if eta_phi_efrac_dist.Integral() > 0:
240 eta_phi_efrac_dist.Scale(1.0/eta_phi_efrac_dist.Integral())
250 """ This function is used to filter events.
253 print(
' Pion does not pass energy threshold of %f skipping process_entry!' %
260 """ This function creates the average shower shape from all the energy fraction distributions and plots it. In
261 case self.do_regression is set to True, it will initialize the input trees used by TMVA (using
262 self.initialize_cumulative_trees), it will fill them (using self.fill_regression_tree) and run the regression
263 by calling self.run_regression. Once TMVA finished, regression test/control plots and our shower shape control
264 plots will be created.
272 cumulative_histogram.Add(hist)
274 if cumulative_histogram.Integral() > 0:
275 cumulative_histogram.Scale(1.0/cumulative_histogram.Integral())
279 n_bins_x = x_axis.GetNbins()
282 n_bins_y = y_axis.GetNbins()
283 for x_bin
in range(n_bins_x + 1):
284 x_center = x_axis.GetBinCenter(x_bin)
285 for y_bin
in range(n_bins_y + 1):
286 y_center = x_axis.GetBinCenter(y_bin)
288 value = hist.GetBinContent(x_bin, y_bin)
289 cumulative_histogram.Fill(x_center, y_center, value)
292 x_axis = cumulative_histogram.GetXaxis()
293 n_bins_x = x_axis.GetNbins()
295 y_axis = cumulative_histogram.GetYaxis()
296 n_bins_y = y_axis.GetNbins()
297 for x_bin
in range(n_bins_x + 1):
298 x_bin_width = x_axis.GetBinWidth(x_bin)
299 for y_bin
in range(n_bins_y + 1):
300 y_bin_width = y_axis.GetBinWidth(y_bin)
301 area = x_bin_width*y_bin_width
303 cumulative_content = cumulative_histogram.GetBinContent(x_bin, y_bin) / area
304 cumulative_histogram.SetBinContent(x_bin, y_bin, cumulative_content)
306 if cumulative_histogram.Integral() > 0:
307 cumulative_histogram.Scale(1.0/cumulative_histogram.Integral())
310 cumulative_etaphi_file_name = path.join(self.
base_output_folder,
'cumulative_eta_phi',
312 cumulative_etaphi_root_file_name = path.join(self.
base_output_folder,
'cumulative_eta_phi',
315 root_file = ROOT.TFile(cumulative_etaphi_root_file_name,
'RECREATE')
317 self.
plot_root_hist2d(cumulative_histogram.Clone(), cumulative_etaphi_file_name,
'#eta',
'#phi',
323 if cumulative_histogram.Integral() > 0:
324 cumulative_histogram.Scale(1.0/cumulative_histogram.Integral())
344 =========================================== Output Folder Functions ===============================================
345 The following are functions used to create the output folders and to obtain the names of the outputs.
346 ===================================================================================================================
353 if not path.exists(folder):
354 print(
'Output folder %s does not exist, will try to create it.' % folder)
356 if not path.exists(folder):
357 raise Exception(
'ERROR: Could not create output folder!')
359 print(
'Basic output folder successfully created :)')
382 =============================================== Plot Functions ====================================================
383 The following functions areused to create the plots. The "plot_regression_control_plots" function is a
384 reimplementation of a similar function found in TMVA's examples. It searches for TMVA's control plots inside the
385 root file and plots them.
386 ===================================================================================================================
390 """ This is a helper function to create the eta phi histograms either with equidistant or non-equidistant
391 binning depending on the settings. Please use this function to avoid code duplication.
396 eta_phi_histogram = ROOT.TH2D(name, title,
402 x_binning = [-0.4, -0.1, -0.05, -0.04, -0.03, -0.02, -0.016, -0.014, -0.012, -0.01, -0.008, -0.006,
403 -0.004, -0.002, 0, 0.002, 0.004, 0.006, 0.008, 0.01, 0.012, 0.014, 0.016, 0.02, 0.03, 0.04,
405 y_binning = x_binning
407 eta_phi_histogram = ROOT.TH2D(name, title,
408 len(x_binning) - 1, array.array(
'd', x_binning),
409 len(y_binning) - 1, array.array(
'd', y_binning))
411 return eta_phi_histogram
414 """ This function uses the input histogram's GetRandom2 function to draw random numbers according to the
415 distribution found in the input histogram and fills 50000 entries into a new histogram and in the end prints
420 ROOT.gROOT.SetBatch(
True)
422 ROOT.gStyle.SetNumberContours(999)
424 d_eta = ROOT.Double(0)
425 d_phi = ROOT.Double(0)
428 'eta vs phi vs efrac - using TH2F::GetRandom2')
431 accepted_points_target = 50000
432 while accepted_points < accepted_points_target:
433 histogram.GetRandom2(d_eta, d_phi)
435 eta_phi_efrac_dist.Fill(d_eta, d_phi, 1)
438 if eta_phi_efrac_dist.Integral() > 0:
439 eta_phi_efrac_dist.Scale(1.0/eta_phi_efrac_dist.Integral())
448 '#eta',
'#phi', inverted_ratio=
True)
453 """ This function uses the NN obtained by TMVA to plot the fitted shower shape and the ratio to the reference.
454 In case randomize_pos is set to true (which is the default), random positions (flat distributed) in the eta-phi
455 plane will drawn. The NN will be evaluated there and the value will be filled into the output histogram. This
456 method is faster than drawing a third random number and accepting the point based on whether this rnd number is
457 greater than the NN at this point or not.
458 If randomize_pos is set to false, a histogram will be filled by evaluating the NN at each point and filling the
459 value into the histogram.
463 ROOT.gROOT.SetBatch(
True)
465 ROOT.gStyle.SetNumberContours(999)
467 reader = ROOT.TMVA.Reader(
'!Color')
469 from array
import array
470 d_eta =
array(
'f', [0.])
471 d_phi =
array(
'f', [0.])
473 reader.AddVariable(
'd_eta', d_eta)
474 reader.AddVariable(
'd_phi', d_phi)
475 reader.AddVariable(
'r', r)
480 weight_file = path.join(
'weights',
'%s_%s.weights.xml' % (base_weight_name, method_name))
491 accepted_points_target = 50000
492 while accepted_points < accepted_points_target:
493 d_eta[0] = ROOT.gRandom.Rndm() - 0.5
494 d_phi[0] = ROOT.gRandom.Rndm() - 0.5
495 r[0] = math.sqrt(d_eta[0]*d_eta[0] + d_phi[0]*d_phi[0])
500 value = math.exp(value)
502 eta_phi_efrac_dist.Fill(d_eta[0], d_phi[0], value)
505 x_axis = reference.GetXaxis()
506 y_axis = reference.GetYaxis()
507 n_bins_x = x_axis.GetNbins()
508 n_bins_y = y_axis.GetNbins()
509 for x
in range(n_bins_x + 1):
510 d_eta[0] = x_axis.GetBinCenter(x)
511 for y
in range(n_bins_y + 1):
512 d_phi[0] = y_axis.GetBinCenter(y)
513 r[0] = math.sqrt(d_eta[0]*d_eta[0] + d_phi[0]*d_phi[0])
518 value = math.exp(value)
520 eta_phi_efrac_dist.Fill(d_eta[0], d_phi[0], value)
522 if eta_phi_efrac_dist.Integral() > 0:
523 eta_phi_efrac_dist.Scale(1.0/eta_phi_efrac_dist.Integral())
542 """ This is a pythonic translation of the deviations function found in the TMVA examples.
545 ROOT.gROOT.SetBatch(
True)
548 ROOT.gStyle.SetNumberContours(999)
552 input_file = ROOT.TFile(file_name)
555 for key
in input_file.GetListOfKeys():
556 print(
'Key: %s' % key.GetName())
558 if 'Method_' not in key.GetName():
560 if not ROOT.gROOT.GetClass(key.GetClassName()).InheritsFrom(
'TDirectory'):
563 method_name = key.GetName().
replace(
'Method_',
'')
565 print(
'Now plotting control plots for method %s' % method_name)
567 directory = key.ReadObj()
569 for sub_method
in directory.GetListOfKeys():
570 print(
'sub_method: %s' % sub_method.GetName())
572 if not ROOT.gROOT.GetClass(sub_method.GetClassName()).InheritsFrom(
'TDirectory'):
575 sub_method_name = sub_method.GetName()
577 print(
'Now plotting control plots for sub-method %s' % sub_method_name)
579 sub_directory = sub_method.ReadObj()
581 for plot_key
in sub_directory.GetListOfKeys():
582 print(
'plot_key: %s' % plot_key.GetName())
584 plot_obj = plot_key.ReadObj()
585 if isinstance(plot_obj, ROOT.TH2F):
586 plot_name = plot_key.GetName()
587 if '_reg_' not in plot_name:
590 if not ((show_target
and '_tgt' in plot_name)
or
591 (
not show_target
and (
'_tgt' not in plot_name))):
594 if not ((compare_to_train
and 'train' in plot_name)
or
595 (
not compare_to_train
and 'test' in plot_name)):
604 type_string =
'training'
606 canvas = ROOT.TCanvas(
'canvas_%i' % current_canvas,
607 'Regression output deviation versus %s for method %s' % (inortarg,
608 method_name), 800, 600)
610 canvas.SetRightMargin(0.10)
612 plot_obj.SetTitle(
'Output deviation for method: %s (%s sample)' % (sub_method_name,
614 plot_obj.Draw(
'colz')
616 line = ROOT.TLine(plot_obj.GetXaxis().GetXmin(), 0, plot_obj.GetXaxis().GetXmax(), 0)
627 canvas.Print(output_name,
'pdf')
634 def plot_root_hist2d(histogram, file_name='root_hist2d.pdf', x_label='X-Axis', y_label='Y-Axis',
635 z_label='Probability', logscale=False, normalize=False, root_file=None):
637 ROOT.gROOT.SetBatch(
True)
638 ROOT.gStyle.SetOptStat(1)
641 ext = path.splitext(path.basename(file_name))[1]
643 canv = ROOT.TCanvas(
'canvas',
'test', 0, 0, 600, 600)
646 canv.SetRightMargin(0.15)
653 integral = histogram.Integral()
655 histogram.Scale(1.0 / integral)
657 histogram.GetXaxis().SetTitle(x_label)
658 histogram.GetYaxis().SetTitle(y_label)
659 histogram.GetZaxis().SetTitle(z_label)
667 histogram.Draw(
'COLZ')
670 canv.Print(file_name, ext)
673 histogram.SetDirectory(root_file)
679 ========================================== Utility Functions ======================================================
680 The following section will contain utility functions to ease some repetitive tasks. Those functions should be self-
682 ===================================================================================================================
687 clone = reference.Clone(reference.GetName() +
'diff_to_ref_clone')
690 clone = histogram.Clone(histogram.GetName() +
'diff_to_ref_clone')
693 clone.Add(base, -1.0)
697 z_label =
'reference - output / output'
699 z_label =
'output - reference / reference'
701 self.
plot_root_hist2d(clone, output_name, x_label, y_label, z_label, logscale=
True)
706 if layer_id < 0
or layer_id >= len(self.
layer_names):
725 plotter.cumulated_events_threshold = 1000
727 plotter.neuron_type = neuron_type
728 plotter.num_neurons = num_neurons
730 plotter.log_efrac =
True
737 if __name__ ==
"__main__":
744 from distutils
import archive_util
746 archive_util.make_zipfile(
'reg_out_%s' % (time.strftime(
"%y-%m-%d_%H-%M")),
748 archive_util.make_zipfile(
'weights_%s' % (time.strftime(
"%y-%m-%d_%H-%M")),