2 Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
18#include <boost/histogram.hpp>
21namespace H5Utils::hist::detail {
23 // various stuff to inspect the boost histograms
25 // Concepts to inspect which members the accumulator exposes.
27 concept HasValue = requires(T t) { t.begin()->value(); };
29 concept HasVariance = requires(T t) { t.begin()->variance(); };
31 concept HasCount = requires(T t) { t.begin()->count(); };
33 concept HasSumOfWeights = requires(T t) { t.begin()->sum_of_weights(); };
35 concept HasSumOfWeightsSquared = requires(T t) {
36 t.begin()->sum_of_weights_squared();
39 // figure out the output array type
43 using type = typename T::value_type;
45 // this needs further specialization in some cases
47 using iter_value_t = decltype(
48 std::declval<T>().begin()->value());
52 using type = std::remove_cv_t<std::remove_reference_t<iter_value_t<T>>>;
55 using array_t = typename array_type<T>::type;
59 const H5::DataType hdf5_t;
61 const H5::DataType hdf5_t<double> = H5::PredType::NATIVE_DOUBLE;
63 const H5::DataType hdf5_t<float> = H5::PredType::NATIVE_FLOAT;
65 const H5::DataType hdf5_t<int> = H5::PredType::NATIVE_INT;
67 const H5::DataType hdf5_t<long> = H5::PredType::NATIVE_LONG;
69 const H5::DataType hdf5_t<long long> = H5::PredType::NATIVE_LLONG;
71 const H5::DataType hdf5_t<unsigned long> = H5::PredType::NATIVE_ULONG;
73 const H5::DataType hdf5_t<unsigned long long> = H5::PredType::NATIVE_ULLONG;
76 // function to flatten the histogram array
77 template <typename T, typename F>
78 std::vector<array_t<T>> get_flat_hist_array(const T& hist,
79 const F& value_getter)
81 using out_t = std::vector<array_t<T>>;
82 namespace bh = boost::histogram;
84 // calculate a global index for the flat output array
85 auto global_index = [&hist](const auto& hist_idx)
87 // This is unrolling the index to a c-style array. We calculate
88 // a global index, starting with the last dimension. The last
89 // dimension is just added to the global index. Each previous
90 // dimension needs to multiply the index by the cumulative
91 // product of the maximum index of subsequent dimensions.
94 for (size_t dim_p1 = hist.rank(); dim_p1 != 0; dim_p1--) {
95 size_t dim = dim_p1 - 1;
96 auto ops = hist.axis(dim).options();
97 size_t underflow = (ops & bh::axis::option::underflow) ? 1 : 0;
98 size_t overflow = (ops & bh::axis::option::overflow) ? 1 : 0;
99 // ugly check to decide if we have to add underflow offset
100 int h5idx_signed = hist_idx.index(dim) + underflow;
101 if (h5idx_signed < 0) throw std::logic_error("neg signed index");
102 size_t h5idx = static_cast<size_t>(h5idx_signed);
103 global += h5idx * stride;
104 stride *= hist.axis(dim).size() + underflow + overflow;
109 out_t out(hist.size());
110 std::set<size_t> used_indices;
111 for (const auto& x : bh::indexed(hist, bh::coverage::all)) {
112 size_t global = global_index(x);
113 if (!used_indices.insert(global).second) {
114 throw std::logic_error(
115 "repeat global index: " + std::to_string(global));
117 out.at(global) = value_getter(x);
122 // Returns true when all bin widths are equal and positive (regular axis).
123 inline bool is_regular_axis(const std::vector<float>& edges) {
124 if (edges.size() < 2) return false;
125 const double width0 = edges.at(1) - edges.at(0);
126 if (width0 <= 0.0) return false;
127 for (size_t i = 2; i < edges.size(); i++) {
128 const double width = edges.at(i) - edges.at(i-1);
129 if (std::abs(width - width0) > 1e-5 * std::abs(width0)) return false;
136 std::vector<float> edges{};
138 bool underflow{false};
139 bool overflow{false};
140 bool is_int_or_category{false};
141 // set iff the axis has equal-width bins (regular axis)
142 std::optional<int> n_regular_bins{};
145 template <typename T>
146 std::vector<Axis> get_axes(const T& hist) {
147 using out_t = std::vector<Axis>;
148 namespace opts = boost::histogram::axis::option;
150 for (size_t ax_n = 0; ax_n < hist.rank(); ax_n++) {
151 const auto& axis = hist.axis(ax_n);
152 out_t::value_type ax;
153 bool underflow = (axis.options() & opts::underflow);
154 bool overflow = (axis.options() & opts::overflow);
155 ax.underflow = underflow;
156 ax.overflow = overflow;
157 for (const auto& bin: axis) {
158 ax.edges.push_back(bin.lower());
160 // add the upper edge too, unless this is an integer/category axis
161 // where every bin has the same lower and upper value — in that case
162 // we have exactly N edges for N bins rather than N+1
163 if (axis.size() > 0) {
164 const auto& last = *axis.rbegin();
165 if (last.upper() == last.lower()) {
166 ax.is_int_or_category = true;
168 ax.edges.push_back(last.upper());
171 ax.name = axis.metadata();
173 if (is_regular_axis(ax.edges)) {
174 ax.n_regular_bins = static_cast<int>(ax.edges.size()) - 1;
177 axes.push_back(std::move(ax));
182 inline void chkerr(herr_t code, const std::string& error) {
183 if (code < 0) throw std::runtime_error("error setting " + error);
186 template <typename T>
187 auto product(const std::vector<T>& v) {
188 return std::accumulate(
189 v.begin(), v.end(), T(1), std::multiplies<hsize_t>{});
194 inline void write_str_attr(H5::H5Object& obj,
195 const std::string& key,
196 const std::string& val)
198 H5::StrType strtype(H5::PredType::C_S1, H5T_VARIABLE);
199 H5::DataSpace scalar(H5S_SCALAR);
200 H5::Attribute attr = obj.createAttribute(key, strtype, scalar);
201 const char* cstr = val.c_str();
202 attr.write(strtype, &cstr);
205 inline void write_bool_attr(H5::H5Object& obj,
206 const std::string& key,
209 H5::DataSpace scalar(H5S_SCALAR);
210 uint8_t v = val ? 1 : 0;
211 H5::Attribute attr = obj.createAttribute(
212 key, H5::PredType::NATIVE_UINT8, scalar);
213 attr.write(H5::PredType::NATIVE_UINT8, &v);
216 inline void write_int_attr(H5::H5Object& obj,
217 const std::string& key,
220 H5::DataSpace scalar(H5S_SCALAR);
221 H5::Attribute attr = obj.createAttribute(
222 key, H5::PredType::NATIVE_INT64, scalar);
223 attr.write(H5::PredType::NATIVE_INT64, &val);
226 inline void write_double_attr(H5::H5Object& obj,
227 const std::string& key,
230 H5::DataSpace scalar(H5S_SCALAR);
231 H5::Attribute attr = obj.createAttribute(
232 key, H5::PredType::NATIVE_DOUBLE, scalar);
233 attr.write(H5::PredType::NATIVE_DOUBLE, &val);
236 // Return the UHI storage type string for histogram type T.
237 template <typename T>
238 std::string storage_type_name() {
239 if constexpr (HasSumOfWeightsSquared<T>) {
240 return "weighted_mean";
241 } else if constexpr (HasCount<T>) {
243 } else if constexpr (HasVariance<T>) {
245 } else if constexpr (std::is_integral_v<array_t<T>>) {
253namespace H5Utils::hist {
255 // main hist writing function, this should be all anyone needs to
257 template <typename T>
258 void write_hist_to_group(
259 H5::Group& parent_group,
261 const std::string& name)
263 using namespace detail;
265 // build the data space
266 auto axes = get_axes(hist);
267 std::vector<hsize_t> ds_dims;
268 for (const auto& axis: axes) {
269 int total_size = static_cast<int>(axis.edges.size())
270 - (axis.is_int_or_category ? 0 : 1)
271 + (axis.underflow ? 1 : 0)
272 + (axis.overflow ? 1 : 0);
273 if (total_size < 0) throw std::logic_error("negative bin count");
274 ds_dims.push_back(static_cast<hsize_t>(total_size));
276 H5::DataSpace space(ds_dims.size(), ds_dims.data());
278 // Create histogram group and write UHI schema version
279 H5::Group group = parent_group.createGroup(name);
280 write_int_attr(group, "uhi_schema", 1);
281 group.createGroup("metadata");
282 group.createGroup("writer_info");
284 // Dataset creation properties for storage datasets
285 H5::DSetCreatPropList props;
286 props.setChunk(ds_dims.size(), ds_dims.data());
289 H5Pset_dset_no_attrs_hint(props.getId(), true),
290 "no attribute hint");
291 H5::DataType type = hdf5_t<array_t<T>>;
294 H5::Group ref_axes_group = group.createGroup("ref_axes");
295 for (size_t ax_n = 0; ax_n < axes.size(); ax_n++) {
296 const auto& ax = axes.at(ax_n);
297 H5::Group ax_group = ref_axes_group.createGroup(
298 "axis_" + std::to_string(ax_n));
300 if (ax.n_regular_bins) {
301 write_str_attr(ax_group, "type", "regular");
302 write_double_attr(ax_group, "lower",
303 static_cast<double>(ax.edges.front()));
304 write_double_attr(ax_group, "upper",
305 static_cast<double>(ax.edges.back()));
306 write_int_attr(ax_group, "bins", *ax.n_regular_bins);
308 write_str_attr(ax_group, "type", "variable");
309 std::array<hsize_t,1> edgedims{ax.edges.size()};
310 H5::DataSpace edge_space(1, edgedims.data());
311 H5::DSetCreatPropList edge_props;
312 edge_props.setChunk(1, edgedims.data());
313 edge_props.setDeflate(7);
314 H5::DataSet edge_ds = ax_group.createDataSet(
315 "edges", hdf5_t<float>, edge_space, edge_props);
316 edge_ds.write(ax.edges.data(), hdf5_t<float>);
319 write_bool_attr(ax_group, "underflow", ax.underflow);
320 write_bool_attr(ax_group, "overflow", ax.overflow);
321 write_bool_attr(ax_group, "circular", false);
323 // Write axis name into a metadata subgroup (key matches Python output)
324 H5::Group ax_meta = ax_group.createGroup("metadata");
325 if (!ax.name.empty()) {
326 write_str_attr(ax_meta, "metadata", ax.name);
328 ax_group.createGroup("writer_info");
331 // Write "axes" object-reference dataset (required by the UHI reader)
333 std::vector<hobj_ref_t> axis_refs(axes.size());
334 for (size_t ax_n = 0; ax_n < axes.size(); ax_n++) {
335 std::string ax_path = "ref_axes/axis_" + std::to_string(ax_n);
336 chkerr(H5Rcreate(&axis_refs.at(ax_n), group.getId(),
337 ax_path.c_str(), H5R_OBJECT, -1),
338 "axis object reference");
340 hsize_t ref_dims[1] = {axes.size()};
341 H5::DataSpace ref_space(1, ref_dims);
342 group.createDataSet("axes", H5::PredType::STD_REF_OBJ, ref_space)
343 .write(axis_refs.data(), H5::PredType::STD_REF_OBJ);
346 // Write storage group
347 H5::Group storage = group.createGroup("storage");
348 write_str_attr(storage, "type", storage_type_name<T>());
350 // Helper to write a named dataset into the storage group
351 auto add_dataset = [&hist, &type, &space, &props, &storage](
352 const std::string& ds_name,
354 auto flat = get_flat_hist_array<T>(hist, func);
355 assert(std::cmp_equal(space.getSelectNpoints(),flat.size()));
356 storage.createDataSet(ds_name, type, space, props)
357 .write(flat.data(), type);
360 // values (the primary bin contents)
361 if constexpr (HasValue<T>) {
362 add_dataset("values",
363 [](const auto& x) { return x->value(); });
365 add_dataset("values",
366 [](const auto& x) { return *x; });
369 if constexpr (HasVariance<T>) {
370 add_dataset("variances",
371 [](const auto& x) { return x->variance(); });
373 // count (mean / weighted_mean accumulators)
374 if constexpr (HasCount<T>) {
376 [](const auto& x) { return x->count(); });
378 // sum_of_weights (weighted_mean accumulator)
379 if constexpr (HasSumOfWeights<T>) {
380 add_dataset("sum_of_weights",
381 [](const auto& x) { return x->sum_of_weights(); });
383 // sum_of_weights_squared (weighted_mean accumulator)
384 if constexpr (HasSumOfWeightsSquared<T>) {
385 add_dataset("sum_of_weights_squared",
386 [](const auto& x) { return x->sum_of_weights_squared(); });