Loading [MathJax]/extensions/tex2jax.js
ATLAS Offline Software
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
histogram.icc
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 #include "H5Cpp.h"
6 #include "hdf5_hl.h"
7 
8 #include <vector>
9 #include <cassert>
10 #include <array>
11 #include <numeric>
12 #include <set>
13 
14 #include <boost/histogram.hpp>
15 
16 
17 namespace H5Utils::hist::detail {
18 
19  // various stuff to inspect the boost histograms
20 
21  // inspect the accumulator to see what we can read out
22  template <typename T>
23  concept HasValue = requires(T t) {
24  t.begin()->value();
25  };
26 
27  // figure out the output array type
28  template <typename T>
29  struct array_type
30  {
31  using type = typename T::value_type;
32  };
33  // this needs further specialization in some cases
34  template <typename T>
35  using iter_value_t = decltype(
36  std::declval<T>().begin()->value());
37  template <HasValue T>
38  struct array_type<T>
39  {
40  using type = std::remove_cv_t<std::remove_reference_t<iter_value_t<T>>>;
41  };
42  template <typename T>
43  using array_t = typename array_type<T>::type;
44 
45  // H5 typing magic
46  template <typename T>
47  const H5::DataType hdf5_t;
48  template<>
49  const H5::DataType hdf5_t<double> = H5::PredType::NATIVE_DOUBLE;
50  template<>
51  const H5::DataType hdf5_t<float> = H5::PredType::NATIVE_FLOAT;
52  template<>
53  const H5::DataType hdf5_t<int> = H5::PredType::NATIVE_INT;
54  template<>
55  const H5::DataType hdf5_t<unsigned long> = H5::PredType::NATIVE_ULONG;
56  template<>
57  const H5::DataType hdf5_t<unsigned long long> = H5::PredType::NATIVE_ULLONG;
58 
59 
60  // function to flatten the histogram array
61  template <typename T, typename F>
62  std::vector<array_t<T>> get_flat_hist_array(const T& hist,
63  const F& value_getter)
64  {
65  using out_t = std::vector<array_t<T>>;
66  namespace bh = boost::histogram;
67  std::vector<int> hist_dims(hist.rank());
68  for (size_t dim = 0; dim < hist.rank(); dim++) {
69  hist_dims.at(dim) = hist.axis(dim).size();
70  }
71 
72  // calculate a global index for the flat output array
73  auto global_index = [&hist](const auto& hist_idx)
74  {
75  // This is unrolling the index to a c-style array. We calculate
76  // a global index, starting with the last dimension. The last
77  // dimension is just added to the global index. Each previous
78  // dimension needs to multiply the index by the cumulative
79  // product of the maximum index of subsequent dimensions.
80  size_t global = 0;
81  size_t stride = 1;
82  for (size_t dim_p1 = hist.rank(); dim_p1 != 0; dim_p1--) {
83  size_t dim = dim_p1 - 1;
84  auto ops = hist.axis(dim).options();
85  size_t underflow = (ops & bh::axis::option::underflow) ? 1 : 0;
86  size_t overflow = (ops & bh::axis::option::overflow) ? 1 : 0;
87  // ugly check to decide if we have to add underflow offset
88  int h5idx_signed = hist_idx.index(dim) + underflow;
89  if (h5idx_signed < 0) throw std::logic_error("neg signed index");
90  size_t h5idx = static_cast<size_t>(h5idx_signed);
91  global += h5idx * stride;
92  stride *= hist.axis(dim).size() + underflow + overflow;
93  }
94  return global;
95  };
96 
97  out_t out(hist.size());
98  std::set<size_t> used_indices;
99  for (const auto& x : bh::indexed(hist, bh::coverage::all)) {
100  size_t global = global_index(x);
101  if (!used_indices.insert(global).second) {
102  throw std::logic_error(
103  "repeat global index: " + std::to_string(global));
104  }
105  out.at(global) = value_getter(x);
106  }
107  return out;
108  }
109 
110  struct Axis
111  {
112  std::vector<float> edges{};
113  std::string name{};
114  int extra_bins_in_hist{};
115  };
116 
117  template <typename T>
118  std::vector<Axis> get_axes(const T& hist) {
119  using out_t = std::vector<Axis>;
120  namespace opts = boost::histogram::axis::option;
121  out_t edges;
122  for (size_t ax_n = 0; ax_n < hist.rank(); ax_n++) {
123  const auto& axis = hist.axis(ax_n);
124  out_t::value_type ax;
125  bool underflow = (axis.options() & opts::underflow);
126  bool overflow = (axis.options() & opts::overflow);
127  // there are n normal bins + 1 edges, so we start with one less
128  // bin in the histogram then in the edges. Then we add under /
129  // overflow.
130  ax.extra_bins_in_hist = -1 + (underflow ? 1 : 0) + (overflow ? 1: 0);
131  for (const auto& bin: axis) {
132  ax.edges.push_back(bin.lower());
133  }
134  // add the upper edge too
135  if (axis.size() > 0) {
136  const auto& last = *axis.rbegin();
137  // for integer binning, which has the same upper and lower bin
138  // value, we have one more bonus bin in the hist and don't save
139  // all the edges
140  if (last.upper() == last.lower()) {
141  ax.extra_bins_in_hist += 1;
142  } else {
143  ax.edges.push_back(last.upper());
144  }
145  }
146  ax.name = axis.metadata();
147  edges.push_back(ax);
148  }
149  return edges;
150  }
151 
152  inline void chkerr(herr_t code, const std::string& error) {
153  if (code < 0) throw std::runtime_error("error setting " + error);
154  }
155 
156  template <typename T>
157  auto product(const std::vector<T>& v) {
158  return std::accumulate(
159  v.begin(), v.end(), T(1), std::multiplies<hsize_t>{});
160  }
161 }
162 
163 namespace H5Utils::hist {
164 
165  // main hist writing function, this should be all anyone needs to
166  // call
167  template <typename T>
168  void write_hist_to_group(
169  H5::Group& parent_group,
170  const T& hist,
171  const std::string& name)
172  {
173  using namespace detail;
174 
175  // build the data space
176  auto axes = get_axes(hist);
177  std::vector<hsize_t> ds_dims;
178  for (const auto& axis: axes) {
179  int total_size = axis.edges.size() + axis.extra_bins_in_hist;
180  if (total_size < 0) throw std::logic_error("negative bin count");
181  ds_dims.push_back(static_cast<hsize_t>(total_size));
182  }
183  H5::DataSpace space(ds_dims.size(), ds_dims.data());
184 
185  // set up group, props, type of dataset
186  H5::Group group = parent_group.createGroup(name);
187  H5::DSetCreatPropList props;
188  props.setChunk(ds_dims.size(), ds_dims.data());
189  props.setDeflate(7);
190  chkerr(
191  H5Pset_dset_no_attrs_hint(props.getId(), true),
192  "no attribute hint");
193  H5::DataType type = hdf5_t<array_t<T>>;
194 
195  // add the edge datasets
196  H5::Group axgroup = group.createGroup("axes");
197  std::vector<H5::DataSet> axes_ds;
198  for (const auto& axis: axes) {
199  std::array<hsize_t,1> axdims{axis.edges.size()};
200  H5::DataSpace ax_space(axdims.size(), axdims.data());
201  H5::DSetCreatPropList ax_props;
202  ax_props.copy(props);
203  ax_props.setChunk(axdims.size(), axdims.data());
204  const auto& ax_type = hdf5_t<float>;
205  auto ax_ds = axgroup.createDataSet(
206  axis.name,
207  ax_type,
208  ax_space,
209  ax_props);
210  ax_ds.write(axis.edges.data(), ax_type);
211  axes_ds.push_back(ax_ds);
212  }
213 
214  // Generic dataset adder. We need to call this multiple times, for
215  // each type of data that the histogram is saving.
216  auto add_moment = [&hist, &type, &space, &props, &group, &axes_ds](
217  const std::string& name,
218  const auto& func) {
219  auto flat = get_flat_hist_array<T>(hist, func);
220  assert(static_cast<std::size_t>(space.getSelectNpoints()) == flat.size());
221  H5::DataSet dataset = group.createDataSet(
222  name,
223  type,
224  space,
225  props);
226  dataset.write(flat.data(), type);
227  // attach the axes
228  size_t axnum = 0;
229  for (const H5::DataSet& ax: axes_ds) {
230  chkerr(
231  H5DSattach_scale(
232  dataset.getId(), ax.getId(), axnum++),
233  "dimenstion scale");
234  }
235  };
236 
237  // Read in the data and write to a dataset. The behavior here
238  // depends on which accumulator was used.
239  if constexpr (HasValue<T>) {
240  add_moment("histogram",
241  [](const auto& x) { return x->value(); } );
242  } else {
243  add_moment("histogram",
244  [](const auto& x) { return *x; } );
245  }
246  // add the variance
247  if constexpr (requires(T t) { t.begin()->variance(); }) {
248  add_moment(
249  "variance",
250  [](const auto& x) { return x->variance();}
251  );
252  }
253  // add the counts
254  if constexpr (requires(T t) { t.begin()->count(); }) {
255  add_moment(
256  "count",
257  [](const auto& x) { return x->count(); }
258  );
259  }
260  // add the sum of weights
261  if constexpr (requires(T t) { t.begin()->sum_of_weights(); }) {
262  add_moment(
263  "sum_of_weights",
264  [](const auto& x) { return x->sum_of_weights(); }
265  );
266  }
267  // add the sum of weights squared
268  if constexpr (requires(T t) { t.begin()->sum_of_weights_squared(); }) {
269  add_moment(
270  "sum_of_weights_squared",
271  [](const auto& x) { return x->sum_of_weights_squared(); }
272  );
273  }
274 
275  }
276 }