2 Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
14 #include <boost/histogram.hpp>
17 namespace H5Utils::hist::detail {
19 // various stuff to inspect the boost histograms
21 // inspect the accumulator to see what we can read out
23 concept HasValue = requires(T t) {
27 // figure out the output array type
31 using type = typename T::value_type;
33 // this needs further specialization in some cases
35 using iter_value_t = decltype(
36 std::declval<T>().begin()->value());
40 using type = std::remove_cv_t<std::remove_reference_t<iter_value_t<T>>>;
43 using array_t = typename array_type<T>::type;
47 const H5::DataType hdf5_t;
49 const H5::DataType hdf5_t<double> = H5::PredType::NATIVE_DOUBLE;
51 const H5::DataType hdf5_t<float> = H5::PredType::NATIVE_FLOAT;
53 const H5::DataType hdf5_t<int> = H5::PredType::NATIVE_INT;
55 const H5::DataType hdf5_t<unsigned long> = H5::PredType::NATIVE_ULONG;
57 const H5::DataType hdf5_t<unsigned long long> = H5::PredType::NATIVE_ULLONG;
60 // function to flatten the histogram array
61 template <typename T, typename F>
62 std::vector<array_t<T>> get_flat_hist_array(const T& hist,
63 const F& value_getter)
65 using out_t = std::vector<array_t<T>>;
66 namespace bh = boost::histogram;
67 std::vector<int> hist_dims(hist.rank());
68 for (size_t dim = 0; dim < hist.rank(); dim++) {
69 hist_dims.at(dim) = hist.axis(dim).size();
72 // calculate a global index for the flat output array
73 auto global_index = [&hist](const auto& hist_idx)
75 // This is unrolling the index to a c-style array. We calculate
76 // a global index, starting with the last dimension. The last
77 // dimension is just added to the global index. Each previous
78 // dimension needs to multiply the index by the cumulative
79 // product of the maximum index of subsequent dimensions.
82 for (size_t dim_p1 = hist.rank(); dim_p1 != 0; dim_p1--) {
83 size_t dim = dim_p1 - 1;
84 auto ops = hist.axis(dim).options();
85 size_t underflow = (ops & bh::axis::option::underflow) ? 1 : 0;
86 size_t overflow = (ops & bh::axis::option::overflow) ? 1 : 0;
87 // ugly check to decide if we have to add underflow offset
88 int h5idx_signed = hist_idx.index(dim) + underflow;
89 if (h5idx_signed < 0) throw std::logic_error("neg signed index");
90 size_t h5idx = static_cast<size_t>(h5idx_signed);
91 global += h5idx * stride;
92 stride *= hist.axis(dim).size() + underflow + overflow;
97 out_t out(hist.size());
98 std::set<size_t> used_indices;
99 for (const auto& x : bh::indexed(hist, bh::coverage::all)) {
100 size_t global = global_index(x);
101 if (!used_indices.insert(global).second) {
102 throw std::logic_error(
103 "repeat global index: " + std::to_string(global));
105 out.at(global) = value_getter(x);
112 std::vector<float> edges{};
114 int extra_bins_in_hist{};
117 template <typename T>
118 std::vector<Axis> get_axes(const T& hist) {
119 using out_t = std::vector<Axis>;
120 namespace opts = boost::histogram::axis::option;
122 for (size_t ax_n = 0; ax_n < hist.rank(); ax_n++) {
123 const auto& axis = hist.axis(ax_n);
124 out_t::value_type ax;
125 bool underflow = (axis.options() & opts::underflow);
126 bool overflow = (axis.options() & opts::overflow);
127 // there are n normal bins + 1 edges, so we start with one less
128 // bin in the histogram then in the edges. Then we add under /
130 ax.extra_bins_in_hist = -1 + (underflow ? 1 : 0) + (overflow ? 1: 0);
131 for (const auto& bin: axis) {
132 ax.edges.push_back(bin.lower());
134 // add the upper edge too
135 if (axis.size() > 0) {
136 const auto& last = *axis.rbegin();
137 // for integer binning, which has the same upper and lower bin
138 // value, we have one more bonus bin in the hist and don't save
140 if (last.upper() == last.lower()) {
141 ax.extra_bins_in_hist += 1;
143 ax.edges.push_back(last.upper());
146 ax.name = axis.metadata();
152 inline void chkerr(herr_t code, const std::string& error) {
153 if (code < 0) throw std::runtime_error("error setting " + error);
156 template <typename T>
157 auto product(const std::vector<T>& v) {
158 return std::accumulate(
159 v.begin(), v.end(), T(1), std::multiplies<hsize_t>{});
163 namespace H5Utils::hist {
165 // main hist writing function, this should be all anyone needs to
167 template <typename T>
168 void write_hist_to_group(
169 H5::Group& parent_group,
171 const std::string& name)
173 using namespace detail;
175 // build the data space
176 auto axes = get_axes(hist);
177 std::vector<hsize_t> ds_dims;
178 for (const auto& axis: axes) {
179 int total_size = axis.edges.size() + axis.extra_bins_in_hist;
180 if (total_size < 0) throw std::logic_error("negative bin count");
181 ds_dims.push_back(static_cast<hsize_t>(total_size));
183 H5::DataSpace space(ds_dims.size(), ds_dims.data());
185 // set up group, props, type of dataset
186 H5::Group group = parent_group.createGroup(name);
187 H5::DSetCreatPropList props;
188 props.setChunk(ds_dims.size(), ds_dims.data());
191 H5Pset_dset_no_attrs_hint(props.getId(), true),
192 "no attribute hint");
193 H5::DataType type = hdf5_t<array_t<T>>;
195 // add the edge datasets
196 H5::Group axgroup = group.createGroup("axes");
197 std::vector<H5::DataSet> axes_ds;
198 for (const auto& axis: axes) {
199 std::array<hsize_t,1> axdims{axis.edges.size()};
200 H5::DataSpace ax_space(axdims.size(), axdims.data());
201 H5::DSetCreatPropList ax_props;
202 ax_props.copy(props);
203 ax_props.setChunk(axdims.size(), axdims.data());
204 const auto& ax_type = hdf5_t<float>;
205 auto ax_ds = axgroup.createDataSet(
210 ax_ds.write(axis.edges.data(), ax_type);
211 axes_ds.push_back(ax_ds);
214 // Generic dataset adder. We need to call this multiple times, for
215 // each type of data that the histogram is saving.
216 auto add_moment = [&hist, &type, &space, &props, &group, &axes_ds](
217 const std::string& name,
219 auto flat = get_flat_hist_array<T>(hist, func);
220 assert(static_cast<std::size_t>(space.getSelectNpoints()) == flat.size());
221 H5::DataSet dataset = group.createDataSet(
226 dataset.write(flat.data(), type);
229 for (const H5::DataSet& ax: axes_ds) {
232 dataset.getId(), ax.getId(), axnum++),
237 // Read in the data and write to a dataset. The behavior here
238 // depends on which accumulator was used.
239 if constexpr (HasValue<T>) {
240 add_moment("histogram",
241 [](const auto& x) { return x->value(); } );
243 add_moment("histogram",
244 [](const auto& x) { return *x; } );
247 if constexpr (requires(T t) { t.begin()->variance(); }) {
250 [](const auto& x) { return x->variance();}
254 if constexpr (requires(T t) { t.begin()->count(); }) {
257 [](const auto& x) { return x->count(); }
260 // add the sum of weights
261 if constexpr (requires(T t) { t.begin()->sum_of_weights(); }) {
264 [](const auto& x) { return x->sum_of_weights(); }
267 // add the sum of weights squared
268 if constexpr (requires(T t) { t.begin()->sum_of_weights_squared(); }) {
270 "sum_of_weights_squared",
271 [](const auto& x) { return x->sum_of_weights_squared(); }