2 Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
6#include "HDF5Utils/HistCommon.h"
18#include <boost/histogram.hpp>
21namespace H5Utils::hist::detail {
23 // various stuff to inspect the boost histograms
25 // Concepts to inspect which members the accumulator exposes.
27 concept HasValue = requires(T t) { t.begin()->value(); };
29 concept HasVariance = requires(T t) { t.begin()->variance(); };
31 concept HasCount = requires(T t) { t.begin()->count(); };
33 concept HasSumOfWeights = requires(T t) { t.begin()->sum_of_weights(); };
35 concept HasSumOfWeightsSquared = requires(T t) {
36 t.begin()->sum_of_weights_squared();
39 // figure out the output array type
43 using type = typename T::value_type;
45 // this needs further specialization in some cases
47 using iter_value_t = decltype(
48 std::declval<T>().begin()->value());
52 using type = std::remove_cv_t<std::remove_reference_t<iter_value_t<T>>>;
55 using array_t = typename array_type<T>::type;
58 // function to flatten the histogram array
59 template <typename T, typename F>
60 std::vector<array_t<T>> get_flat_hist_array(const T& hist,
61 const F& value_getter)
63 using out_t = std::vector<array_t<T>>;
64 namespace bh = boost::histogram;
66 // calculate a global index for the flat output array
67 auto global_index = [&hist](const auto& hist_idx)
69 // This is unrolling the index to a c-style array. We calculate
70 // a global index, starting with the last dimension. The last
71 // dimension is just added to the global index. Each previous
72 // dimension needs to multiply the index by the cumulative
73 // product of the maximum index of subsequent dimensions.
76 for (size_t dim_p1 = hist.rank(); dim_p1 != 0; dim_p1--) {
77 size_t dim = dim_p1 - 1;
78 auto ops = hist.axis(dim).options();
79 int underflow = (ops & bh::axis::option::underflow) ? 1 : 0;
80 int overflow = (ops & bh::axis::option::overflow) ? 1 : 0;
81 // ugly check to decide if we have to add underflow offset
82 int h5idx_signed = hist_idx.index(dim) + underflow;
83 if (h5idx_signed < 0) throw std::logic_error("neg signed index");
84 size_t h5idx = static_cast<size_t>(h5idx_signed);
85 global += h5idx * stride;
86 stride *= hist.axis(dim).size() + underflow + overflow;
91 out_t out(hist.size());
92 std::set<size_t> used_indices;
93 for (const auto& x : bh::indexed(hist, bh::coverage::all)) {
94 size_t global = global_index(x);
95 if (!used_indices.insert(global).second) {
96 throw std::logic_error(
97 "repeat global index: " + std::to_string(global));
99 out.at(global) = value_getter(x);
107 // Runtime functions to check axes, this information can't always be
108 // determined statically with the public api.
110 // Returns true when all bin widths are equal and positive (regular
112 inline bool is_regular_axis(const std::vector<double>& edges) {
113 if (edges.size() < 2) return false;
114 const double width0 = edges[1] - edges[0];
115 if (width0 <= 0.0) return false;
116 for (size_t i = 2; i < edges.size(); i++) {
117 const double width = edges[i] - edges[i-1];
118 if (std::abs(width - width0) > 1e-5 * std::abs(width0)) { return false; }
123 // Returns true if bins are contiguous integer sequence
124 inline bool is_contiguous(const std::vector<int64_t>& vals) {
125 if (vals.size() < 2) return true;
126 for (size_t i = 1; i < vals.size(); ++i) {
127 if (vals[i] != vals[i-1] + 1) return false;
132 // Helper: construct an Axis with name/underflow/overflow filled from the
133 // boost axis options. Each overload then only needs to set ax.edges.
134 inline Axis make_axis(const auto& bh_ax, const std::string& name)
136 namespace opts = boost::histogram::axis::option;
139 ax.underflow = (bh_ax.options() & opts::underflow) ? true : false;
140 ax.overflow = (bh_ax.options() & opts::overflow) ? true : false;
145 // Concepts to label descrete axes
147 // A random-access sequence of string labels.
148 // Matches std::vector<string>, std::deque<string>, etc.
150 concept LabelList = requires(const L& lst, size_t i) {
151 { lst[i] } -> std::convertible_to<std::string>;
152 { lst.size() } -> std::convertible_to<size_t>;
155 // A map-like container with key_type and at(key) -> string.
156 // Matches std::map<K, string> but not std::vector<string>.
158 concept LabelMap = requires(const M& m) {
159 { m.at(std::declval<typename M::key_type>()) }
160 -> std::convertible_to<std::string>;
161 { m.size() } -> std::convertible_to<size_t>;
164 // Either a positional label list or a keyed label map.
166 concept LabelOverride = LabelList<T> || LabelMap<T>;
168 // Positional list of per-axis label overrides.
169 // Supports std::vector, std::deque (operator[] + size() on const ref).
171 concept AxisOverrideSeq = requires(const L& l, size_t i) {
173 { l.size() } -> std::convertible_to<size_t>;
176 // String-keyed map of per-axis label overrides.
177 // at(axis_name) returns the label info; find/end used for existence
180 concept AxisOverrideMap = requires(const M& m) {
181 m.at(std::declval<const std::string&>());
182 m.find(std::declval<const std::string&>()) != m.end();
185 // General axis override match
186 template <typename Labels>
187 concept AxisOverrides = AxisOverrideSeq<Labels> || AxisOverrideMap<Labels>;
189 // Axis whose bins expose lower()/upper() (regular, variable).
190 // Discrete axes (integer, category) iterate to raw values — no lower().
191 template<typename Ax>
192 concept ContinuousAxis = requires(const Ax& ax) {
193 { ax.begin()->lower() } -> std::convertible_to<double>;
196 // Concept: axis with no lower()/upper() per bin (category, integer).
197 template<typename Ax>
198 concept DiscreteAxis = !ContinuousAxis<Ax>;
201 // Axis reading functions
203 // There are several overloads to provide maximum flexibility, both
204 // in what axes are used from boost::histogram and how the user
205 // decides to provide labels for the discrete axis cases.
207 // discrete axis where the user has provided labels for each value
208 template<LabelList List>
209 Axis read_one_axis(const DiscreteAxis auto& bh_ax,
210 const std::pair<std::string, List>& meta)
212 auto ax = make_axis(bh_ax, meta.first);
214 const auto& labels = meta.second;
215 if (labels.size() != static_cast<size_t>(bh_ax.size())) {
216 throw std::invalid_argument(
218 "axis '{}': label count ({}) != bin count ({})",
219 ax.name, labels.size(), bh_ax.size() ));
222 std::vector<std::string> edge_labels;
223 edge_labels.reserve(labels.size());
224 for (size_t i = 0; i < labels.size(); ++i)
225 edge_labels.push_back(labels[i]);
226 ax.edges = std::move(edge_labels);
230 // Map-based labeled categorical overload: key = category value,
233 // Reorders map entries to axis bin order, then delegates to
234 // LabelList overload.
235 template<LabelMap Map>
236 Axis read_one_axis(const DiscreteAxis auto& bh_ax,
237 const std::pair<std::string, Map>& meta)
239 std::vector<std::string> ordered_labels;
240 for (const auto& bin : bh_ax) {
241 ordered_labels.push_back(meta.second.at(bin));
243 return read_one_axis(bh_ax,
244 std::pair<std::string, std::vector<std::string>>{
245 meta.first, std::move(ordered_labels)});
248 // Continuous-axis overload (regular, variable): bins are interval_view.
249 Axis read_one_axis(const ContinuousAxis auto& bh_ax,
250 const std::string& meta)
252 auto ax = make_axis(bh_ax, meta);
254 std::vector<double> dedges;
255 for (const auto& bin : bh_ax) dedges.push_back(bin.lower());
256 if (bh_ax.size() > 0) dedges.push_back(bh_ax.rbegin()->upper());
257 if (is_regular_axis(dedges)) {
258 ax.edges = regular_axis_t{
259 dedges.front(), dedges.back(), dedges.size() - 1};
261 ax.edges = std::move(dedges);
266 // Continuous axes reject non-empty label overrides; name taken from
268 template<LabelOverride Override>
269 Axis read_one_axis(const ContinuousAxis auto& bh_ax,
270 const std::pair<std::string, Override>& meta)
272 if (meta.second.size() > 0) {
273 throw std::invalid_argument(std::format(
274 "axis '{}' is continuous (regular/variable): "
275 "{} label(s) provided but labels are not valid "
276 "for this axis type",
277 meta.first, meta.second.size()));
279 return read_one_axis(bh_ax, meta.first);
282 // Discrete-axis overload (integer, category): bins are raw values
284 Axis read_one_axis(const auto& bh_ax, const std::string& meta)
286 auto ax = make_axis(bh_ax, meta);
288 std::vector<int64_t> int_vals;
289 for (const auto& bin : bh_ax) int_vals.push_back(static_cast<int64_t>(bin));
290 if (is_contiguous(int_vals)) {
291 ax.edges = std::pair<int64_t,int64_t>{int_vals.front(), int_vals.back()};
293 ax.edges = std::move(int_vals);
298 // Extract axis name from plain-string or pair<string,T> metadata.
299 inline const std::string& get_axis_name(const std::string& m) {
303 inline const std::string& get_axis_name(
304 const std::pair<std::string, T>& m) {
309 // Top-level Axis builder functions
311 // Basic case with no labeling overrides. Labels provided via via
312 // per-axis metadata.
313 template <typename T>
314 std::vector<Axis> get_axes(const T& hist) {
315 std::vector<Axis> axes;
316 hist.for_each_axis([&axes](const auto& bh_ax) {
317 axes.push_back(read_one_axis(bh_ax, bh_ax.metadata()));
322 // Positional-list override: labels[i] is the LabelList or LabelMap
324 template<typename Hist, AxisOverrideSeq Labels>
325 std::vector<Axis> get_axes(const Hist& hist, const Labels& labels)
327 if (labels.size() != hist.rank()) {
328 throw std::invalid_argument(std::format(
329 "label list has {} entries but histogram has {} axes",
330 labels.size(), hist.rank()));
332 std::vector<Axis> axes;
334 hist.for_each_axis([&](const auto& bh_ax) {
335 const auto& lbl = labels[i++];
336 using lbl_t = std::decay_t<decltype(lbl)>;
337 axes.push_back(read_one_axis(
339 std::pair<std::string, lbl_t>{
340 get_axis_name(bh_ax.metadata()), lbl}));
345 // Map-based override: axes absent from the map fall back to metadata.
346 template<typename Hist, AxisOverrideMap Labels>
347 std::vector<Axis> get_axes(const Hist& hist, const Labels& labels)
349 std::vector<Axis> axes;
350 hist.for_each_axis([&](const auto& bh_ax) {
351 const std::string& name = get_axis_name(bh_ax.metadata());
352 auto it = labels.find(name);
353 if (it != labels.end()) {
354 if constexpr (ContinuousAxis<std::decay_t<decltype(bh_ax)>>) {
355 throw std::invalid_argument(std::format(
356 "axis '{}' is continuous (regular/variable) and cannot "
357 "have string labels; remove it from the label map",
360 const auto& lbl = it->second;
361 using lbl_t = std::decay_t<decltype(lbl)>;
362 axes.push_back(read_one_axis(
364 std::pair<std::string, lbl_t>{name, lbl}));
366 axes.push_back(read_one_axis(bh_ax, bh_ax.metadata()));
373 // Main histogram storage writing functions
375 // Simple utility to calculate integer products
376 template <typename T>
377 auto product(const std::vector<T>& v) {
378 return std::accumulate(
379 v.begin(), v.end(), T(1), std::multiplies<hsize_t>{});
382 // Return the UHI storage type string for histogram type T.
383 template <typename T>
384 std::string storage_type_name() {
385 if constexpr (HasSumOfWeightsSquared<T>) {
386 return "weighted_mean";
387 } else if constexpr (HasCount<T>) {
389 } else if constexpr (HasVariance<T>) {
391 } else if constexpr (std::is_integral_v<array_t<T>>) {
398 // internal top level histogram writer
399 template <typename T>
400 void write_hist_impl(
401 H5::Group& parent_group,
403 const std::string& name,
404 const std::vector<Axis>& axes)
406 // build the data space
407 std::vector<hsize_t> ds_dims;
408 for (const auto& axis: axes) {
410 std::visit([](const auto& e) { return n_bins(e); }, axis.edges)
411 + (axis.underflow ? 1u : 0u)
412 + (axis.overflow ? 1u : 0u);
413 ds_dims.push_back(static_cast<hsize_t>(total_size));
415 H5::DataSpace space(ds_dims.size(), ds_dims.data());
417 // Create histogram group and write UHI schema version
418 H5::Group group = parent_group.createGroup(name);
419 write_int_attr(group, "uhi_schema", 1);
420 group.createGroup("metadata");
421 group.createGroup("writer_info");
423 // Dataset creation properties for storage datasets
424 H5::DSetCreatPropList props;
425 props.setChunk(ds_dims.size(), ds_dims.data());
428 H5Pset_dset_no_attrs_hint(props.getId(), true),
429 "no attribute hint");
430 H5::DataType type = hdf5_t<array_t<T>>;
433 write_axes(group, axes);
435 // Write storage group
436 H5::Group storage = group.createGroup("storage");
437 write_str_attr(storage, "type", storage_type_name<T>());
439 // Helper to write a named dataset into the storage group
440 auto add_dataset = [&hist, &type, &space, &props, &storage](
441 const std::string& ds_name,
443 auto flat = get_flat_hist_array<T>(hist, func);
445 space.getSelectNpoints() == static_cast<hssize_t>(flat.size()));
446 storage.createDataSet(ds_name, type, space, props)
447 .write(flat.data(), type);
450 // values (the primary bin contents)
451 if constexpr (HasValue<T>) {
452 add_dataset("values",
453 [](const auto& x) { return x->value(); });
455 add_dataset("values",
456 [](const auto& x) { return *x; });
459 if constexpr (HasVariance<T>) {
460 add_dataset("variances",
461 [](const auto& x) { return x->variance(); });
463 // count (mean / weighted_mean accumulators)
464 if constexpr (HasCount<T>) {
466 [](const auto& x) { return x->count(); });
468 // sum_of_weights (weighted_mean accumulator)
469 if constexpr (HasSumOfWeights<T>) {
470 add_dataset("sum_of_weights",
471 [](const auto& x) { return x->sum_of_weights(); });
473 // sum_of_weights_squared (weighted_mean accumulator)
474 if constexpr (HasSumOfWeightsSquared<T>) {
475 add_dataset("sum_of_weights_squared",
476 [](const auto& x) { return x->sum_of_weights_squared(); });
481namespace H5Utils::hist {
483 // main hist writing function, this should be all anyone needs to
485 template <typename T>
486 void write_hist_to_group(
487 H5::Group& parent_group,
489 const std::string& name)
491 detail::write_hist_impl(
492 parent_group, hist, name, detail::get_axes(hist));
495 // Overload with external label override (positional list or name map).
496 template <typename T, detail::AxisOverrides Labels>
497 void write_hist_to_group(
498 H5::Group& parent_group,
500 const std::string& name,
501 const Labels& labels)
503 detail::write_hist_impl(
504 parent_group, hist, name, detail::get_axes(hist, labels));