ATLAS Offline Software
copyRootTree.cxx
Go to the documentation of this file.
1 /*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 #include "copyRootTree.h"
6 
7 // Root tree copy function
8 //
9 // NOTE: I don't recommend trying to understand the variable buffer
10 // classes until you see the general workflow.
11 //
12 // The main work is done in copy_root_tree below, skip down to that to
13 // understand what is going on.
14 //
15 
16 #include "HDF5Utils/HdfTuple.h"
17 
18 #include "TFile.h"
19 #include "TTree.h"
20 #include "TLeaf.h"
21 
22 #include <memory>
23 #include <regex>
24 #include <iostream>
25 #include <set>
26 #include <cmath>
27 
28 // ______________________________________________________________________
29 // Variable buffer classes
30 //
31 // These classes act as glue between HDF and ROOT: they hold pointers
32 // to the buffers that ROOT will read into, and as such are
33 // "ROOT-side" buffers.
34 //
35 // The constructors are given a reference to a `VariableFillers`
36 // container, which contains the "HDF5-side" logic. Each of the
37 // VariableFiller objects contains a "filler" function that the HDF5
38 // writer will call to read from the ROOT buffer.
39 //
40 // Some of the filler functions close over an index vector. This
41 // vector is incremented by the HDF5 Writer to read out successive
42 // entires in a std::vector stored in the root file.
43 //
44 // If this all seems complicated, it's because it's the deep internals
45 // of the code. Try to understand the copy_root_tree function first.
46 
47 // Base class, just to hold a virtual destructor
48 namespace H5Utils {
49  namespace {
50  class IBuffer
51  {
52  public:
53  virtual ~IBuffer() {}
54  };
55 
56 // Simple variables types are stored in the `Buffer` class.
57  template <typename T>
58  class Buffer: public IBuffer
59  {
60  public:
61  Buffer(VariableFillers& vars, TTree& tt, const std::string& name);
62  private:
64  };
65 
66 // Buffer for vector types
67  template <typename T>
68  class VBuf: public IBuffer
69  {
70  public:
71  // These require an index for the vector (`idx`)
72  VBuf(VariableFillers& vars, std::vector<size_t>& idx, TTree& tt,
73  const std::string& name, T default_value = T());
74  ~VBuf();
75  private:
76  std::vector<T>* m_buffer;
77  };
78 
79 // Buffer for vectors of vectors
80  template <typename T>
81  class VVBuf: public IBuffer
82  {
83  public:
84  VVBuf(VariableFillers& vars, std::vector<size_t>& idx, TTree& tt,
85  const std::string& name, T default_value = T());
86  ~VVBuf();
87  private:
88  std::vector<std::vector<T> >* m_buffer;
89  };
90 
91 
92  // Selection function
93  //
94  // Define a bool function that returns false if event doesn't
95  // pass TTreeFormula cut.
96  bool passTTreeCut(TTreeFormula* selection) {
97  // GetNdim looks to see how many valid arguements there are.
98  // Returns false for none.
99  if ( !selection || !selection->GetNdim()) return true;
100  Int_t ndata = selection->GetNdata();
101  for(Int_t current = 0; current < ndata; current++) {
102  if (selection->EvalInstance(current) == 0) return false;
103  }
104  return true;
105  }
106 
107 
108  } // close anonymous namespace
109 
110 // _____________________________________________________________________
111 // main copy root tree function
112 //
113 // The basic workflow is as follows:
114 //
115 // - Define the ROOT-side buffers that we'll read the information
116 // into. At the same time, define HDF-side objects that read out of
117 // these buffers and copy the information into the HDF5 buffer.
118 //
119 // - Build the output HDF5 files
120 //
121 // - Loop over the input tree and fill the output files
122 //
123 // Note that this means the event loop happens last: most of the work
124 // is just setting up the read and write buffers.
125 
126  void copyRootTree(TTree& tt, H5::Group& fg, const TreeCopyOpts& opts) {
127 
128  // define the buffers for root to read into
129  std::vector<std::unique_ptr<IBuffer> > buffers;
130 
131  // this keeps track of the things we couldn't read
132  std::set<std::string> skipped;
133 
134 
135  // Each `VariableFiller` must be constructed with a "filler"
136  // function (or callable object), which takes no arguments and
137  // returns the variable we want to write out. In this case they are
138  // implemented as closures over the buffers that ROOT is reading
139  // into.
140 
141  // This is the 1d variables
142  VariableFillers vars;
143  std::vector<size_t> idx_dummy;
144 
145  // These are 2d variables (i.e. vector<T> in the root file)
146  //
147  // We also need an index which the HDF5 writer increments as it
148  // fills. This is shared with the ROOT buffers to index entries in
149  // std::vectors
150  VariableFillers vars2d;
151  std::vector<size_t> idx(1,0);
152 
153  // 3d variables (index is now 2d)
154  VariableFillers vars3d;
155  std::vector<size_t> idx2(2,0);
156 
157  // Iterate over all the leaf names. There are some duplicates in the
158  // list of keys, so we have to build the set ourselves.
159  std::regex branch_filter(opts.branch_regex);
160  TIter next(tt.GetListOfLeaves());
161  TLeaf* leaf;
162  std::set<std::string> leaf_names;
163  while ((leaf = dynamic_cast<TLeaf*>(next()))) {
164  leaf_names.insert(leaf->GetName());
165  }
166  if (leaf_names.size() == 0) throw std::logic_error("no branches found");
167 
168  // Loop over all the leafs, assign buffers to each
169  //
170  // These `Buffer` classes are defined above. The buffers turn the
171  // branchs on, so we can set them all off to start.
172  tt.SetBranchStatus("*", false);
173  for (const auto& lname: leaf_names) {
174  bool keep = true;
175  if (opts.branch_regex.size() > 0) {
176  keep = std::regex_search(lname, branch_filter);
177  }
178  if (opts.verbose) {
179  std::cout << (keep ? "found " : "rejecting ") << lname << std::endl;
180  }
181  if (!keep) continue;
182 
183  leaf = tt.GetLeaf(lname.c_str());
184  std::string branchName = leaf->GetBranch()->GetName();
185  std::string leaf_type = leaf->GetTypeName();
186  if (leaf_type == "Int_t") {
187  buffers.emplace_back(new Buffer<int>(vars, tt, branchName));
188  } else if (leaf_type == "Float_t") {
189  buffers.emplace_back(new Buffer<float>(vars, tt, branchName));
190  } else if (leaf_type == "Double_t") {
191  buffers.emplace_back(new Buffer<double>(vars, tt, branchName));
192  } else if (leaf_type == "Bool_t") {
193  buffers.emplace_back(new Buffer<bool>(vars, tt, branchName));
194  } else if (leaf_type == "Long64_t") {
195  buffers.emplace_back(new Buffer<long long>(vars, tt, branchName));
196  } else if (leaf_type == "ULong64_t") {
197  buffers.emplace_back(new Buffer<unsigned long long>(vars, tt, branchName));
198  } else if (leaf_type == "UInt_t") {
199  buffers.emplace_back(new Buffer<unsigned int>(vars, tt, branchName));
200  } else if (leaf_type == "UChar_t") {
201  buffers.emplace_back(new Buffer<unsigned char>(vars, tt, branchName));
202  } else if (leaf_type == "vector<float>") {
203  buffers.emplace_back(new VBuf<float>(vars2d, idx, tt, branchName, NAN));
204  } else if (leaf_type == "vector<double>") {
205  buffers.emplace_back(new VBuf<double>(vars2d, idx, tt, branchName, NAN));
206  } else if (leaf_type == "vector<int>") {
207  buffers.emplace_back(new VBuf<int>(vars2d, idx, tt, branchName, 0));
208  } else if (leaf_type == "vector<unsigned int>") {
209  buffers.emplace_back(new VBuf<unsigned int>(vars2d, idx, tt, branchName, 0));
210  } else if (leaf_type == "vector<unsigned char>") {
211  buffers.emplace_back(new VBuf<unsigned char>(vars2d, idx, tt, branchName, 0));
212  } else if (leaf_type == "vector<bool>") {
213  buffers.emplace_back(new VBuf<bool>(vars2d, idx, tt, branchName, 0));
214  } else if (leaf_type == "vector<vector<int> >") {
215  buffers.emplace_back(new VVBuf<int>(vars3d, idx2, tt, branchName, 0));
216  } else if (leaf_type == "vector<vector<unsigned int> >") {
217  buffers.emplace_back(new VVBuf<unsigned int>(vars3d, idx2, tt, branchName, 0));
218  } else if (leaf_type == "vector<vector<unsigned char> >") {
219  buffers.emplace_back(new VVBuf<unsigned char>(vars3d, idx2, tt, branchName, 0));
220  } else if (leaf_type == "vector<vector<float> >") {
221  buffers.emplace_back(new VVBuf<float>(vars3d, idx2, tt, branchName, NAN));
222  } else if (leaf_type == "vector<vector<double> >") {
223  buffers.emplace_back(new VVBuf<double>(vars3d, idx2, tt, branchName, NAN));
224  } else if (leaf_type == "vector<vector<bool> >") {
225  buffers.emplace_back(new VVBuf<bool>(vars3d, idx2, tt, branchName, 0));
226  } else {
227  skipped.insert(leaf_type);
228  }
229  }
230 
231  // Build HDF5 Outputs
232  //
233  // In the simple case where we're not reading vectors, we store one
234  // dataset with the same name as the tree. If there are vectors, we
235  // instead create a group with the same name as the tree, and name
236  // the datasets 1d, 2d, etc.
237  //
238  const std::string tree_name = tt.GetName();
239 
240  std::unique_ptr<WriterXd> writer1d;
241  std::unique_ptr<WriterXd> writer2d;
242  std::unique_ptr<WriterXd> writer3d;
243  std::unique_ptr<H5::Group> top_group;
244  if (opts.vector_lengths.size() > 0) {
245  if (opts.vector_lengths.size() > 2) throw std::logic_error(
246  "we don't support outputs with rank > 3");
247  size_t length = opts.vector_lengths.at(0);
248  top_group.reset(new H5::Group(fg.createGroup(tree_name)));
249  if (opts.vector_lengths.size() > 1) {
250  size_t length2 = opts.vector_lengths.at(1);
251  if (vars3d.size() > 0) {
252  writer3d.reset(new WriterXd(*top_group, "3d", vars3d,
253  {length, length2}, opts.chunk_size));
254  }
255  }
256  if (vars2d.size() > 0) {
257  writer2d.reset(new WriterXd(*top_group, "2d", vars2d,
258  {length}, opts.chunk_size));
259  }
260  if (vars.size() > 0) {
261  writer1d.reset(new WriterXd(*top_group, "1d",
262  vars, {}, opts.chunk_size));
263  }
264  } else {
265  if (vars.size() > 0) {
266  writer1d.reset(new WriterXd(fg, tree_name, vars, {}, opts.chunk_size));
267  }
268  }
269 
270  // Main event loop
271  //
272  // Very little actually happens here since the buffers are already
273  // defined, as are the HDF5 reader functions.
274  //
275 
276  // Get the selection string and build a new TTreeFormula
277  std::string cut_string = opts.selection;
278  const char * cut_char = cut_string.c_str();
279  TTreeFormula *cut =0;
280  if(!cut_string.empty()){
281  // This is so a cut can be applied without requiring the
282  // branch to be output to the hdf5 file.
283  tt.SetBranchStatus("*", true);
284  cut = new TTreeFormula("selection", cut_char, &tt);
285  }
286 
287  size_t n_entries = tt.GetEntries();
288  if (opts.n_entries) n_entries = std::min(n_entries, opts.n_entries);
289  int print_interval = opts.print_interval;
290  if (print_interval == -1) {
291  print_interval = std::max(1UL, n_entries / 100);
292  }
293 
294  for (size_t iii = 0; iii < n_entries; iii++) {
295  if (print_interval && (iii % print_interval == 0)) {
296  std::cout << "events processed: " << iii
297  << " (" << std::round(iii*1e2 / n_entries) << "% of "
298  << n_entries << ")" << std::endl;
299  }
300  tt.GetEntry(iii);
301  if(cut) cut->UpdateFormulaLeaves();
302  if (!passTTreeCut(cut)) continue;
303  if (writer1d) writer1d->fillWhileIncrementing(idx_dummy);
304  if (writer2d) writer2d->fillWhileIncrementing(idx);
305  if (writer3d) writer3d->fillWhileIncrementing(idx2);
306  }
307 
308  // Flush the memory buffers on the HDF5 side. (This is done by the
309  // destructor automatically, but we do it here to make any errors
310  // more explicit.)
311  if (writer1d) writer1d->flush();
312  if (writer2d) writer2d->flush();
313  if (writer3d) writer3d->flush();
314 
315  // Print the names of any classes that we were't able to read from
316  // the root file.
317  if (opts.verbose) {
318  for (const auto& name: skipped) {
319  std::cerr << "could not read branch of type " << name << std::endl;
320  }
321  }
322  } // end copyRootTree
323 
324 // ______________________________________________________________________
325 // Buffer implementation
326 
327  namespace {
328 // 1d buffer
329 
330  template <typename T>
331  Buffer<T>::Buffer(VariableFillers& vars, TTree& tt,
332  const std::string& name)
333  {
334  tt.SetBranchStatus(name.c_str(), true);
335  tt.SetBranchAddress(name.c_str(), &m_buffer);
336  T& buf = m_buffer;
337  vars.add<T>(name, [&buf](){return buf;});
338  }
339 
340 // 2d buffer
341  template <typename T>
342  VBuf<T>::VBuf(VariableFillers& vars, std::vector<size_t>& idx, TTree& tt,
343  const std::string& name, T default_value):
344  m_buffer(new std::vector<T>)
345  {
346  tt.SetBranchStatus(name.c_str(), true);
347  tt.SetBranchAddress(name.c_str(), &m_buffer);
348  std::vector<T>& buf = *m_buffer;
349  auto filler = [&buf, &idx, default_value]() -> T {
350  if (idx.at(0) < buf.size()) {
351  return buf.at(idx.at(0));
352  } else {
353  return default_value;
354  }
355  };
356  vars.add<T>(name, filler);
357  }
358  template <typename T>
359  VBuf<T>::~VBuf() {
360  delete m_buffer;
361  }
362 
363 
364 // 3d buffers
365  template <typename T>
366  VVBuf<T>::VVBuf(VariableFillers& vars, std::vector<size_t>& idx, TTree& tt,
367  const std::string& name, T default_value):
368  m_buffer(new std::vector<std::vector<T> >)
369  {
370  tt.SetBranchStatus(name.c_str(), true);
371  tt.SetBranchAddress(name.c_str(), &m_buffer);
372  std::vector<std::vector<T> >& buf = *m_buffer;
373  auto filler = [&buf, &idx, default_value]() -> T {
374  size_t idx1 = idx.at(0);
375  size_t idx2 = idx.at(1);
376  if (idx1 < buf.size()) {
377  if (idx2 < buf.at(idx1).size()) {
378  return buf.at(idx1).at(idx2);
379  }
380  }
381  return default_value;
382  };
383  vars.add<T>(name, filler);
384  }
385  template <typename T>
386  VVBuf<T>::~VVBuf() {
387  delete m_buffer;
388  }
389 
390  } // close anonymous namespace
391 } // close H5 namespace
H5Utils::TreeCopyOpts
Definition: treeCopyOpts.h:18
fillPileUpNoiseLumi.current
current
Definition: fillPileUpNoiseLumi.py:52
TrigDefs::Group
Group
Properties of a chain group.
Definition: GroupProperties.h:13
HdfTuple.h
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
H5Utils::WriterXd::fillWhileIncrementing
void fillWhileIncrementing(std::vector< size_t > &indices)
Definition: HdfTuple.cxx:90
MuonGM::round
float round(const float toRound, const unsigned int decimals)
Definition: Mdt.cxx:27
CSV_InDetExporter.new
new
Definition: CSV_InDetExporter.py:145
H5Utils::WriterXd
WriterXd.
Definition: HdfTuple.h:154
copyRootTree.h
PrepareReferenceFile.regex
regex
Definition: PrepareReferenceFile.py:43
m_buffer
T m_buffer
Definition: copyRootTree.cxx:63
fillPileUpNoiseLumi.next
next
Definition: fillPileUpNoiseLumi.py:52
python.DecayParser.buf
buf
print ("=> [%s]"cmd)
Definition: DecayParser.py:27
ReadCalibFromCool.keep
keep
Definition: ReadCalibFromCool.py:85
vector
Definition: MultiHisto.h:13
BindingsTest.cut
cut
This script demonstrates how to call a C++ class from Python Also how to use PyROOT is shown.
Definition: BindingsTest.py:13
H5Utils::copyRootTree
void copyRootTree(TTree &tt, H5::Group &fg, const TreeCopyOpts &opts)
Definition: copyRootTree.cxx:126
H5Utils
HDF5 Tuple Writer.
Definition: common.h:20
selection
const std::string selection
Definition: fbtTestBasics.cxx:75
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:240
Buffer::Buffer
Buffer(std::unique_ptr< EventStorage::DataReader > &&reader, size_t size)
Definition: trigbs_orderedMerge.cxx:140
Buffer
Definition: trigbs_orderedMerge.cxx:114
H5Utils::WriterXd::flush
void flush()
Definition: HdfTuple.cxx:115
egammaEnergyPositionAllSamples::e2
double e2(const xAOD::CaloCluster &cluster)
return the uncorrected cluster energy in 2nd sampling
LArNewCalib_DelayDump_OFC_Cali.idx
idx
Definition: LArNewCalib_DelayDump_OFC_Cali.py:69
H5Utils::VariableFillers
Variable filler arrays.
Definition: HdfTuple.h:118
athena.opts
opts
Definition: athena.py:88
passTTreeCut
bool passTTreeCut(TTreeFormula &cut)
TileDCSDataPlotter.tt
tt
Definition: TileDCSDataPlotter.py:876
length
double length(const pvec &v)
Definition: FPGATrackSimLLPDoubletHoughTransformTool.cxx:26
TSU::T
unsigned long long T
Definition: L1TopoDataTypes.h:35