32std::string read_str_attr(
const H5::H5Object& obj,
33 const std::string& key)
35 H5::Attribute attr =
obj.openAttribute(key);
37 attr.read(attr.getDataType(), val);
41bool read_bool_attr(
const H5::H5Object& obj,
const std::string& key) {
42 H5::Attribute attr =
obj.openAttribute(key);
44 attr.read(H5::PredType::NATIVE_UINT8, &val);
48double read_double_attr(
const H5::H5Object& obj,
const std::string& key) {
49 H5::Attribute attr =
obj.openAttribute(key);
51 attr.read(H5::PredType::NATIVE_DOUBLE, &val);
55int64_t read_int_attr(
const H5::H5Object& obj,
const std::string& key) {
56 H5::Attribute attr =
obj.openAttribute(key);
58 attr.read(H5::PredType::NATIVE_INT64, &val);
65std::vector<std::string> read_string_categories(
const H5::DataSet& ds)
67 H5::StrType strtype(H5::PredType::C_S1, H5T_VARIABLE);
68 H5::DataSpace
sp =
ds.getSpace();
69 hsize_t npts =
static_cast<hsize_t
>(
sp.getSimpleExtentNpoints());
70 std::vector<char*> ptrs(npts,
nullptr);
78 ~VlGuard() { H5Treclaim(tid, sid, H5P_DEFAULT, buf); }
79 } guard{strtype.getId(),
sp.getId(), ptrs.data()};
80 ds.read(ptrs.data(), strtype);
81 std::vector<std::string>
labels;
83 for (
char* p : ptrs) {
84 labels.emplace_back(p ? p :
"");
92std::vector<Axis> read_axes(
const H5::Group& grp) {
93 H5::Group ref_axes = grp.openGroup(
"ref_axes");
94 std::vector<Axis> axes;
95 for (
int n = 0; ; ++
n) {
96 std::string ax_name =
"axis_" + std::to_string(n);
97 htri_t
exists = H5Lexists(ref_axes.getId(), ax_name.c_str(), H5P_DEFAULT);
100 H5::Group ax_grp = ref_axes.openGroup(ax_name);
103 std::string
type = read_str_attr(ax_grp,
"type");
104 if (
type ==
"regular") {
105 double lower = read_double_attr(ax_grp,
"lower");
106 double upper = read_double_attr(ax_grp,
"upper");
107 int64_t
bins = read_int_attr (ax_grp,
"bins");
109 }
else if (
type ==
"variable") {
110 H5::DataSet edge_ds = ax_grp.openDataSet(
"edges");
111 hsize_t npts = edge_ds.getSpace().getSimpleExtentNpoints();
112 std::vector<double> edges(npts);
113 edge_ds.read(edges.data(), H5::PredType::NATIVE_DOUBLE);
114 ax.edges = std::move(edges);
115 }
else if (
type ==
"integer") {
116 int64_t
start = read_int_attr(ax_grp,
"start");
117 int64_t
stop = read_int_attr(ax_grp,
"stop");
118 ax.edges = std::pair<int64_t,int64_t>{
start,
stop};
119 }
else if (
type ==
"category") {
120 H5::DataSet cat_ds = ax_grp.openDataSet(
"categories");
121 H5::DataType
dtype = cat_ds.getDataType();
122 hsize_t npts = cat_ds.getSpace().getSimpleExtentNpoints();
123 if (
dtype.getClass() == H5T_STRING) {
124 ax.edges = read_string_categories(cat_ds);
126 std::vector<int64_t> int_vals(npts);
127 cat_ds.read(int_vals.data(), H5::PredType::NATIVE_INT64);
128 ax.edges = std::move(int_vals);
132 ax.underflow = read_bool_attr(ax_grp,
"underflow");
133 ax.overflow = read_bool_attr(ax_grp,
"overflow");
136 htri_t has_meta = H5Lexists(ax_grp.getId(),
"metadata", H5P_DEFAULT);
138 H5::Group
meta = ax_grp.openGroup(
"metadata");
139 if (
meta.attrExists(
"metadata")) {
140 ax.name = read_str_attr(
meta,
"metadata");
144 axes.push_back(std::move(ax));
154std::vector<hsize_t> read_dims(
const H5::Group& grp) {
155 H5::DataSet
ds = grp.openGroup(
"storage").openDataSet(
"values");
156 H5::DataSpace
sp =
ds.getSpace();
157 int ndims =
sp.getSimpleExtentNdims();
158 std::vector<hsize_t> dims(
static_cast<size_t>(ndims));
159 sp.getSimpleExtentDims(dims.data());
163size_t flat_size(
const std::vector<hsize_t>& dims) {
164 return std::accumulate(dims.begin(), dims.end(),
165 hsize_t(1), std::multiplies<hsize_t>{});
175std::pair<std::vector<T>, H5::DataType>
176read_dataset(
const H5::Group& grp,
const std::string& name)
178 H5::DataSet
ds = grp.openDataSet(name);
179 H5::DataType
dtype =
ds.getDataType();
180 hsize_t
n =
ds.getSpace().getSimpleExtentNpoints();
181 std::vector<T>
data(n);
187auto get_type_checker(
const H5::DataType& expected) {
188 return [expected](
auto p) {
189 if (
p.second != expected)
190 throw std::runtime_error(
191 "histogram datasets have inconsistent HDF5 types");
192 return std::move(
p.first);
199void write_dataset(H5::Group& grp,
200 const std::string& name,
201 const std::vector<T>&
data,
202 const std::vector<hsize_t>& dims,
203 const H5::DataType& file_dtype)
205 H5::DataSpace space(
static_cast<int>(dims.size()), dims.data());
206 H5::DSetCreatPropList props;
207 props.setChunk(
static_cast<int>(dims.size()), dims.data());
209 chkerr(H5Pset_dset_no_attrs_hint(props.getId(),
true),
"no attribute hint");
210 if (space.getSelectNpoints() !=
static_cast<hssize_t
>(
data.size()))
211 throw std::runtime_error(
212 "write_dataset: dataspace size does not match data size");
213 grp.createDataSet(name, file_dtype, space, props)
220void check_axes_consistent(
const std::vector<Axis>& stored,
221 const std::vector<Axis>& incoming)
223 if (stored != incoming)
224 throw std::runtime_error(
"histogram axes are inconsistent between input files");
230H5::Group open_or_create_group(H5::Group& parent,
const std::string& name) {
233 return parent.createGroup(name);
246 SimpleHistogram(std::vector<Axis> axes,
247 std::vector<hsize_t> dims)
248 : m_axes(std::move(axes))
249 , m_dims(std::move(dims))
250 , m_values(flat_size(m_dims),
T(0))
253 void add(
const H5::Group& src)
override {
254 check_axes_consistent(m_axes, read_axes(src));
255 H5::Group storage =
src.openGroup(
"storage");
256 auto [
v,
dtype] = read_dataset<T>(storage,
"values");
257 m_storage_dtype =
dtype;
258 if (
v.size() != m_values.size())
259 throw std::runtime_error(
260 "histogram bin count in input file does not match expected size");
261 for (
size_t i = 0;
i < m_values.size(); ++
i) m_values[i] += v[i];
264 void write(H5::Group& parent,
const std::string& name)
const override {
265 H5::Group grp =
parent.createGroup(name);
267 grp.createGroup(
"metadata");
268 grp.createGroup(
"writer_info");
271 H5::Group storage = grp.createGroup(
"storage");
272 write_str_attr(storage,
"type", std::is_integral_v<T> ?
"int" :
"double");
273 write_dataset<T>(storage,
"values", m_values, m_dims, m_storage_dtype);
277 std::vector<Axis> m_axes;
278 std::vector<hsize_t> m_dims;
279 std::vector<T> m_values;
280 H5::DataType m_storage_dtype;
288 WeightedHistogram(std::vector<Axis> axes, std::vector<hsize_t> dims)
289 : m_axes(std::move(axes))
290 , m_dims(std::move(dims))
291 , m_values(flat_size(m_dims), 0.0)
292 , m_variances(flat_size(m_dims), 0.0)
295 void add(
const H5::Group& src)
override {
296 check_axes_consistent(m_axes, read_axes(src));
297 H5::Group storage =
src.openGroup(
"storage");
298 auto [
v,
dtype] = read_dataset<double>(storage,
"values");
299 m_storage_dtype =
dtype;
300 auto check = get_type_checker(dtype);
301 std::vector<double>
w =
check(read_dataset<double>(storage,
"variances"));
302 if (
v.size() != m_values.size() ||
w.size() != m_variances.size())
303 throw std::runtime_error(
304 "histogram bin count in input file does not match expected size");
305 for (
size_t i = 0;
i < m_values.size(); ++
i) {
307 m_variances[
i] +=
w[
i];
311 void write(H5::Group& parent,
const std::string& name)
const override {
312 H5::Group grp =
parent.createGroup(name);
314 grp.createGroup(
"metadata");
315 grp.createGroup(
"writer_info");
318 H5::Group storage = grp.createGroup(
"storage");
320 write_dataset<double>(storage,
"values", m_values, m_dims, m_storage_dtype);
321 write_dataset<double>(storage,
"variances", m_variances, m_dims, m_storage_dtype);
325 std::vector<Axis> m_axes;
326 std::vector<hsize_t> m_dims;
327 std::vector<double> m_values;
328 std::vector<double> m_variances;
329 H5::DataType m_storage_dtype;
341 MeanHistogram(std::vector<Axis> axes, std::vector<hsize_t> dims)
342 : m_axes(std::move(axes))
343 , m_dims(std::move(dims))
344 , m_n(flat_size(m_dims), 0.0)
345 , m_sums(flat_size(m_dims), 0.0)
346 , m_M2(flat_size(m_dims), 0.0)
349 void add(
const H5::Group& src)
override {
350 check_axes_consistent(m_axes, read_axes(src));
351 H5::Group storage =
src.openGroup(
"storage");
352 auto [means,
dtype] = read_dataset<double>(storage,
"values");
353 m_storage_dtype =
dtype;
354 auto check = get_type_checker(dtype);
355 std::vector<double>
counts =
check(read_dataset<double>(storage,
"count"));
357 std::vector<double> variances(m_n.size(), 0.0);
358 htri_t has_var = H5Lexists(storage.getId(),
"variances", H5P_DEFAULT);
360 variances =
check(read_dataset<double>(storage,
"variances"));
363 if (
counts.size() != m_n.size() || means.size() != m_n.size())
364 throw std::runtime_error(
365 "histogram bin count in input file does not match expected size");
367 for (
size_t i = 0;
i < m_n.size(); ++
i) {
372 if (nc == 0.0)
continue;
374 double mean_b = means[
i];
375 double mean_a = (na > 0.0) ? m_sums[i] / na : 0.0;
376 double delta = mean_b - mean_a;
382 double M2_b = (
nb > 1.0) ? variances[i] * (nb - 1.0) : 0.0;
383 m_M2[
i] += M2_b + delta * delta * na *
nb /
nc;
384 m_sums[
i] = na * mean_a +
nb * mean_b;
389 void write(H5::Group& parent,
const std::string& name)
const override {
390 H5::Group grp =
parent.createGroup(name);
392 grp.createGroup(
"metadata");
393 grp.createGroup(
"writer_info");
396 H5::Group storage = grp.createGroup(
"storage");
399 size_t n = m_n.size();
400 std::vector<double>
values(n);
401 std::vector<double> variances(n);
402 for (
size_t i = 0;
i <
n; ++
i) {
406 variances[
i] = (m_n[
i] > 1.0) ? m_M2[i] / (m_n[i] - 1.0) : 0.0;
410 write_dataset<double>(storage,
"count", m_n, m_dims, m_storage_dtype);
411 write_dataset<double>(storage,
"values", values, m_dims, m_storage_dtype);
412 write_dataset<double>(storage,
"variances", variances, m_dims, m_storage_dtype);
416 std::vector<Axis> m_axes;
417 std::vector<hsize_t> m_dims;
418 std::vector<double> m_n;
419 std::vector<double> m_sums;
420 std::vector<double> m_M2;
421 H5::DataType m_storage_dtype;
433 WeightedMeanHistogram(std::vector<Axis> axes, std::vector<hsize_t> dims)
434 : m_axes(std::move(axes))
435 , m_dims(std::move(dims))
436 , m_sum_w(flat_size(m_dims), 0.0)
437 , m_sum_w2(flat_size(m_dims), 0.0)
438 , m_counts(flat_size(m_dims), 0.0)
439 , m_weighted_sums(flat_size(m_dims), 0.0)
442 void add(
const H5::Group& src)
override {
443 check_axes_consistent(m_axes, read_axes(src));
444 H5::Group storage =
src.openGroup(
"storage");
445 auto [
values,
dtype] = read_dataset<double>(storage,
"values");
446 m_storage_dtype =
dtype;
447 auto check = get_type_checker(dtype);
448 std::vector<double> sum_w =
check(
449 read_dataset<double>(storage,
"sum_of_weights"));
450 std::vector<double> sum_w2 =
check(
451 read_dataset<double>(storage,
"sum_of_weights_squared"));
453 if (
values.size() != m_sum_w.size())
454 throw std::runtime_error(
455 "histogram bin count in input file does not match expected size");
458 std::vector<double>
counts(m_sum_w.size(), 0.0);
459 htri_t has_count = H5Lexists(storage.getId(),
"count", H5P_DEFAULT);
461 counts =
check(read_dataset<double>(storage,
"count"));
464 for (
size_t i = 0;
i < m_sum_w.size(); ++
i) {
465 m_weighted_sums[
i] += sum_w[
i] *
values[
i];
466 m_sum_w[
i] += sum_w[
i];
467 m_sum_w2[
i] += sum_w2[
i];
472 void write(H5::Group& parent,
const std::string& name)
const override {
473 H5::Group grp =
parent.createGroup(name);
475 grp.createGroup(
"metadata");
476 grp.createGroup(
"writer_info");
479 H5::Group storage = grp.createGroup(
"storage");
482 size_t n = m_sum_w.size();
483 std::vector<double>
values(n, 0.0);
484 for (
size_t i = 0;
i <
n; ++
i) {
485 if (m_sum_w[i] != 0.0) {
486 values[
i] = m_weighted_sums[
i] / m_sum_w[
i];
490 write_dataset<double>(storage,
"values", values, m_dims, m_storage_dtype);
491 write_dataset<double>(storage,
"sum_of_weights", m_sum_w, m_dims, m_storage_dtype);
492 write_dataset<double>(storage,
"sum_of_weights_squared", m_sum_w2, m_dims, m_storage_dtype);
493 write_dataset<double>(storage,
"count", m_counts, m_dims, m_storage_dtype);
497 std::vector<Axis> m_axes;
498 std::vector<hsize_t> m_dims;
499 std::vector<double> m_sum_w;
500 std::vector<double> m_sum_w2;
501 std::vector<double> m_counts;
502 std::vector<double> m_weighted_sums;
503 H5::DataType m_storage_dtype;
517std::unique_ptr<IHistogram>
519 std::vector<Axis> axes = read_axes(src);
520 std::vector<hsize_t> dims = read_dims(src);
522 H5::Group storage = src.openGroup(
"storage");
523 std::string
type = read_str_attr(storage,
"type");
525 if (
type ==
"double") {
526 return std::make_unique<SimpleHistogram<double>>(
527 std::move(axes), std::move(dims));
530 return std::make_unique<SimpleHistogram<int64_t>>(
531 std::move(axes), std::move(dims));
533 if (
type ==
"weighted") {
534 return std::make_unique<WeightedHistogram>(
535 std::move(axes), std::move(dims));
537 if (
type ==
"mean") {
538 return std::make_unique<MeanHistogram>(
539 std::move(axes), std::move(dims));
541 if (
type ==
"weighted_mean") {
542 return std::make_unique<WeightedMeanHistogram>(
543 std::move(axes), std::move(dims));
545 throw std::runtime_error(
"HistogramMerger: unknown UHI storage type: " +
type);
558 std::vector<std::string> parts;
559 std::istringstream
ss(path);
561 while (std::getline(
ss, token,
'/')) {
562 if (!token.empty()) parts.push_back(token);
564 if (parts.empty())
continue;
567 H5::Group parent = root;
568 for (
size_t i = 0; i + 1 < parts.size(); ++i) {
569 parent = open_or_create_group(parent, parts[i]);
571 hist->write(parent, parts.back());
char data[hepevt_bytes_allocation_ATLAS]
static const std::vector< std::string > bins
std::unique_ptr< IHistogram > make(const H5::Group &src)
Factory: read the storage type from src and return a zero-initialised concrete IHistogram of the appr...
std::map< std::string, std::unique_ptr< IHistogram > > m_hists
void add(const std::string &path, const H5::Group &src)
Accumulate one UHI histogram group.
void write(H5::Group &root) const
Write every accumulated histogram into the output file.
Abstract interface for a mergeable histogram.
bool exists(const std::string &filename)
does a file exist
bool add(const std::string &hname, TKey *tobj)
void chkerr(herr_t code, const std::string &error)
void write_axes(H5::Group &hist_grp, const std::vector< Axis > &axes)
void write_str_attr(H5::H5Object &obj, const std::string &key, const std::string &val)
const H5::DataType hdf5_t
void write_int_attr(H5::H5Object &obj, const std::string &key, int64_t val)