ATLAS Offline Software
Loading...
Searching...
No Matches
histogram.icc
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
3*/
4
5#include "H5Cpp.h"
6
7#include <vector>
8#include <cassert>
9#include <array>
10#include <cmath>
11#include <cstdint>
12#include <numeric>
13#include <optional>
14#include <set>
15#include <string>
16#include <utility>
17
18#include <boost/histogram.hpp>
19
20
21namespace H5Utils::hist::detail {
22
23 // various stuff to inspect the boost histograms
24
25 // Concepts to inspect which members the accumulator exposes.
26 template <typename T>
27 concept HasValue = requires(T t) { t.begin()->value(); };
28 template <typename T>
29 concept HasVariance = requires(T t) { t.begin()->variance(); };
30 template <typename T>
31 concept HasCount = requires(T t) { t.begin()->count(); };
32 template <typename T>
33 concept HasSumOfWeights = requires(T t) { t.begin()->sum_of_weights(); };
34 template <typename T>
35 concept HasSumOfWeightsSquared = requires(T t) {
36 t.begin()->sum_of_weights_squared();
37 };
38
39 // figure out the output array type
40 template <typename T>
41 struct array_type
42 {
43 using type = typename T::value_type;
44 };
45 // this needs further specialization in some cases
46 template <typename T>
47 using iter_value_t = decltype(
48 std::declval<T>().begin()->value());
49 template <HasValue T>
50 struct array_type<T>
51 {
52 using type = std::remove_cv_t<std::remove_reference_t<iter_value_t<T>>>;
53 };
54 template <typename T>
55 using array_t = typename array_type<T>::type;
56
57 // H5 typing magic
58 template <typename T>
59 const H5::DataType hdf5_t;
60 template<>
61 const H5::DataType hdf5_t<double> = H5::PredType::NATIVE_DOUBLE;
62 template<>
63 const H5::DataType hdf5_t<float> = H5::PredType::NATIVE_FLOAT;
64 template<>
65 const H5::DataType hdf5_t<int> = H5::PredType::NATIVE_INT;
66 template<>
67 const H5::DataType hdf5_t<long> = H5::PredType::NATIVE_LONG;
68 template<>
69 const H5::DataType hdf5_t<long long> = H5::PredType::NATIVE_LLONG;
70 template<>
71 const H5::DataType hdf5_t<unsigned long> = H5::PredType::NATIVE_ULONG;
72 template<>
73 const H5::DataType hdf5_t<unsigned long long> = H5::PredType::NATIVE_ULLONG;
74
75
76 // function to flatten the histogram array
77 template <typename T, typename F>
78 std::vector<array_t<T>> get_flat_hist_array(const T& hist,
79 const F& value_getter)
80 {
81 using out_t = std::vector<array_t<T>>;
82 namespace bh = boost::histogram;
83
84 // calculate a global index for the flat output array
85 auto global_index = [&hist](const auto& hist_idx)
86 {
87 // This is unrolling the index to a c-style array. We calculate
88 // a global index, starting with the last dimension. The last
89 // dimension is just added to the global index. Each previous
90 // dimension needs to multiply the index by the cumulative
91 // product of the maximum index of subsequent dimensions.
92 size_t global = 0;
93 size_t stride = 1;
94 for (size_t dim_p1 = hist.rank(); dim_p1 != 0; dim_p1--) {
95 size_t dim = dim_p1 - 1;
96 auto ops = hist.axis(dim).options();
97 size_t underflow = (ops & bh::axis::option::underflow) ? 1 : 0;
98 size_t overflow = (ops & bh::axis::option::overflow) ? 1 : 0;
99 // ugly check to decide if we have to add underflow offset
100 int h5idx_signed = hist_idx.index(dim) + underflow;
101 if (h5idx_signed < 0) throw std::logic_error("neg signed index");
102 size_t h5idx = static_cast<size_t>(h5idx_signed);
103 global += h5idx * stride;
104 stride *= hist.axis(dim).size() + underflow + overflow;
105 }
106 return global;
107 };
108
109 out_t out(hist.size());
110 std::set<size_t> used_indices;
111 for (const auto& x : bh::indexed(hist, bh::coverage::all)) {
112 size_t global = global_index(x);
113 if (!used_indices.insert(global).second) {
114 throw std::logic_error(
115 "repeat global index: " + std::to_string(global));
116 }
117 out.at(global) = value_getter(x);
118 }
119 return out;
120 }
121
122 // Returns true when all bin widths are equal and positive (regular axis).
123 inline bool is_regular_axis(const std::vector<float>& edges) {
124 if (edges.size() < 2) return false;
125 const double width0 = edges.at(1) - edges.at(0);
126 if (width0 <= 0.0) return false;
127 for (size_t i = 2; i < edges.size(); i++) {
128 const double width = edges.at(i) - edges.at(i-1);
129 if (std::abs(width - width0) > 1e-5 * std::abs(width0)) return false;
130 }
131 return true;
132 }
133
134 struct Axis
135 {
136 std::vector<float> edges{};
137 std::string name{};
138 bool underflow{false};
139 bool overflow{false};
140 bool is_int_or_category{false};
141 // set iff the axis has equal-width bins (regular axis)
142 std::optional<int> n_regular_bins{};
143 };
144
145 template <typename T>
146 std::vector<Axis> get_axes(const T& hist) {
147 using out_t = std::vector<Axis>;
148 namespace opts = boost::histogram::axis::option;
149 out_t axes;
150 for (size_t ax_n = 0; ax_n < hist.rank(); ax_n++) {
151 const auto& axis = hist.axis(ax_n);
152 out_t::value_type ax;
153 bool underflow = (axis.options() & opts::underflow);
154 bool overflow = (axis.options() & opts::overflow);
155 ax.underflow = underflow;
156 ax.overflow = overflow;
157 for (const auto& bin: axis) {
158 ax.edges.push_back(bin.lower());
159 }
160 // add the upper edge too, unless this is an integer/category axis
161 // where every bin has the same lower and upper value — in that case
162 // we have exactly N edges for N bins rather than N+1
163 if (axis.size() > 0) {
164 const auto& last = *axis.rbegin();
165 if (last.upper() == last.lower()) {
166 ax.is_int_or_category = true;
167 } else {
168 ax.edges.push_back(last.upper());
169 }
170 }
171 ax.name = axis.metadata();
172
173 if (is_regular_axis(ax.edges)) {
174 ax.n_regular_bins = static_cast<int>(ax.edges.size()) - 1;
175 }
176
177 axes.push_back(std::move(ax));
178 }
179 return axes;
180 }
181
182 inline void chkerr(herr_t code, const std::string& error) {
183 if (code < 0) throw std::runtime_error("error setting " + error);
184 }
185
186 template <typename T>
187 auto product(const std::vector<T>& v) {
188 return std::accumulate(
189 v.begin(), v.end(), T(1), std::multiplies<hsize_t>{});
190 }
191
192 // Attribute helpers
193
194 inline void write_str_attr(H5::H5Object& obj,
195 const std::string& key,
196 const std::string& val)
197 {
198 H5::StrType strtype(H5::PredType::C_S1, H5T_VARIABLE);
199 H5::DataSpace scalar(H5S_SCALAR);
200 H5::Attribute attr = obj.createAttribute(key, strtype, scalar);
201 const char* cstr = val.c_str();
202 attr.write(strtype, &cstr);
203 }
204
205 inline void write_bool_attr(H5::H5Object& obj,
206 const std::string& key,
207 bool val)
208 {
209 H5::DataSpace scalar(H5S_SCALAR);
210 uint8_t v = val ? 1 : 0;
211 H5::Attribute attr = obj.createAttribute(
212 key, H5::PredType::NATIVE_UINT8, scalar);
213 attr.write(H5::PredType::NATIVE_UINT8, &v);
214 }
215
216 inline void write_int_attr(H5::H5Object& obj,
217 const std::string& key,
218 int64_t val)
219 {
220 H5::DataSpace scalar(H5S_SCALAR);
221 H5::Attribute attr = obj.createAttribute(
222 key, H5::PredType::NATIVE_INT64, scalar);
223 attr.write(H5::PredType::NATIVE_INT64, &val);
224 }
225
226 inline void write_double_attr(H5::H5Object& obj,
227 const std::string& key,
228 double val)
229 {
230 H5::DataSpace scalar(H5S_SCALAR);
231 H5::Attribute attr = obj.createAttribute(
232 key, H5::PredType::NATIVE_DOUBLE, scalar);
233 attr.write(H5::PredType::NATIVE_DOUBLE, &val);
234 }
235
236 // Return the UHI storage type string for histogram type T.
237 template <typename T>
238 std::string storage_type_name() {
239 if constexpr (HasSumOfWeightsSquared<T>) {
240 return "weighted_mean";
241 } else if constexpr (HasCount<T>) {
242 return "mean";
243 } else if constexpr (HasVariance<T>) {
244 return "weighted";
245 } else if constexpr (std::is_integral_v<array_t<T>>) {
246 return "int";
247 } else {
248 return "double";
249 }
250 }
251}
252
253namespace H5Utils::hist {
254
255 // main hist writing function, this should be all anyone needs to
256 // call
257 template <typename T>
258 void write_hist_to_group(
259 H5::Group& parent_group,
260 const T& hist,
261 const std::string& name)
262 {
263 using namespace detail;
264
265 // build the data space
266 auto axes = get_axes(hist);
267 std::vector<hsize_t> ds_dims;
268 for (const auto& axis: axes) {
269 int total_size = static_cast<int>(axis.edges.size())
270 - (axis.is_int_or_category ? 0 : 1)
271 + (axis.underflow ? 1 : 0)
272 + (axis.overflow ? 1 : 0);
273 if (total_size < 0) throw std::logic_error("negative bin count");
274 ds_dims.push_back(static_cast<hsize_t>(total_size));
275 }
276 H5::DataSpace space(ds_dims.size(), ds_dims.data());
277
278 // Create histogram group and write UHI schema version
279 H5::Group group = parent_group.createGroup(name);
280 write_int_attr(group, "uhi_schema", 1);
281 group.createGroup("metadata");
282 group.createGroup("writer_info");
283
284 // Dataset creation properties for storage datasets
285 H5::DSetCreatPropList props;
286 props.setChunk(ds_dims.size(), ds_dims.data());
287 props.setDeflate(7);
288 chkerr(
289 H5Pset_dset_no_attrs_hint(props.getId(), true),
290 "no attribute hint");
291 H5::DataType type = hdf5_t<array_t<T>>;
292
293 // Write ref_axes
294 H5::Group ref_axes_group = group.createGroup("ref_axes");
295 for (size_t ax_n = 0; ax_n < axes.size(); ax_n++) {
296 const auto& ax = axes.at(ax_n);
297 H5::Group ax_group = ref_axes_group.createGroup(
298 "axis_" + std::to_string(ax_n));
299
300 if (ax.n_regular_bins) {
301 write_str_attr(ax_group, "type", "regular");
302 write_double_attr(ax_group, "lower",
303 static_cast<double>(ax.edges.front()));
304 write_double_attr(ax_group, "upper",
305 static_cast<double>(ax.edges.back()));
306 write_int_attr(ax_group, "bins", *ax.n_regular_bins);
307 } else {
308 write_str_attr(ax_group, "type", "variable");
309 std::array<hsize_t,1> edgedims{ax.edges.size()};
310 H5::DataSpace edge_space(1, edgedims.data());
311 H5::DSetCreatPropList edge_props;
312 edge_props.setChunk(1, edgedims.data());
313 edge_props.setDeflate(7);
314 H5::DataSet edge_ds = ax_group.createDataSet(
315 "edges", hdf5_t<float>, edge_space, edge_props);
316 edge_ds.write(ax.edges.data(), hdf5_t<float>);
317 }
318
319 write_bool_attr(ax_group, "underflow", ax.underflow);
320 write_bool_attr(ax_group, "overflow", ax.overflow);
321 write_bool_attr(ax_group, "circular", false);
322
323 // Write axis name into a metadata subgroup (key matches Python output)
324 H5::Group ax_meta = ax_group.createGroup("metadata");
325 if (!ax.name.empty()) {
326 write_str_attr(ax_meta, "metadata", ax.name);
327 }
328 ax_group.createGroup("writer_info");
329 }
330
331 // Write "axes" object-reference dataset (required by the UHI reader)
332 {
333 std::vector<hobj_ref_t> axis_refs(axes.size());
334 for (size_t ax_n = 0; ax_n < axes.size(); ax_n++) {
335 std::string ax_path = "ref_axes/axis_" + std::to_string(ax_n);
336 chkerr(H5Rcreate(&axis_refs.at(ax_n), group.getId(),
337 ax_path.c_str(), H5R_OBJECT, -1),
338 "axis object reference");
339 }
340 hsize_t ref_dims[1] = {axes.size()};
341 H5::DataSpace ref_space(1, ref_dims);
342 group.createDataSet("axes", H5::PredType::STD_REF_OBJ, ref_space)
343 .write(axis_refs.data(), H5::PredType::STD_REF_OBJ);
344 }
345
346 // Write storage group
347 H5::Group storage = group.createGroup("storage");
348 write_str_attr(storage, "type", storage_type_name<T>());
349
350 // Helper to write a named dataset into the storage group
351 auto add_dataset = [&hist, &type, &space, &props, &storage](
352 const std::string& ds_name,
353 const auto& func) {
354 auto flat = get_flat_hist_array<T>(hist, func);
355 assert(std::cmp_equal(space.getSelectNpoints(),flat.size()));
356 storage.createDataSet(ds_name, type, space, props)
357 .write(flat.data(), type);
358 };
359
360 // values (the primary bin contents)
361 if constexpr (HasValue<T>) {
362 add_dataset("values",
363 [](const auto& x) { return x->value(); });
364 } else {
365 add_dataset("values",
366 [](const auto& x) { return *x; });
367 }
368 // variances
369 if constexpr (HasVariance<T>) {
370 add_dataset("variances",
371 [](const auto& x) { return x->variance(); });
372 }
373 // count (mean / weighted_mean accumulators)
374 if constexpr (HasCount<T>) {
375 add_dataset("count",
376 [](const auto& x) { return x->count(); });
377 }
378 // sum_of_weights (weighted_mean accumulator)
379 if constexpr (HasSumOfWeights<T>) {
380 add_dataset("sum_of_weights",
381 [](const auto& x) { return x->sum_of_weights(); });
382 }
383 // sum_of_weights_squared (weighted_mean accumulator)
384 if constexpr (HasSumOfWeightsSquared<T>) {
385 add_dataset("sum_of_weights_squared",
386 [](const auto& x) { return x->sum_of_weights_squared(); });
387 }
388
389 }
390}