2   Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
 
   14 #include <boost/histogram.hpp>
 
   17 namespace H5Utils::hist::detail {
 
   19   // various stuff to inspect the boost histograms
 
   21   // inspect the accumulator to see what we can read out
 
   23   concept HasValue = requires(T t) {
 
   27   // figure out the output array type
 
   31     using type = typename T::value_type;
 
   33   // this needs further specialization in some cases
 
   35   using iter_value_t = decltype(
 
   36     std::declval<T>().begin()->value());
 
   40     using type = std::remove_cv_t<std::remove_reference_t<iter_value_t<T>>>;
 
   43   using array_t = typename array_type<T>::type;
 
   47   const H5::DataType hdf5_t;
 
   49   const H5::DataType hdf5_t<double> = H5::PredType::NATIVE_DOUBLE;
 
   51   const H5::DataType hdf5_t<float> = H5::PredType::NATIVE_FLOAT;
 
   53   const H5::DataType hdf5_t<int> = H5::PredType::NATIVE_INT;
 
   55   const H5::DataType hdf5_t<unsigned long> = H5::PredType::NATIVE_ULONG;
 
   57   const H5::DataType hdf5_t<unsigned long long> = H5::PredType::NATIVE_ULLONG;
 
   60   // function to flatten the histogram array
 
   61   template <typename T, typename F>
 
   62   std::vector<array_t<T>> get_flat_hist_array(const T& hist,
 
   63                                               const F& value_getter)
 
   65     using out_t = std::vector<array_t<T>>;
 
   66     namespace bh = boost::histogram;
 
   67     std::vector<int> hist_dims(hist.rank());
 
   68     for (size_t dim = 0; dim < hist.rank(); dim++) {
 
   69       hist_dims.at(dim) = hist.axis(dim).size();
 
   72     // calculate a global index for the flat output array
 
   73     auto global_index = [&hist](const auto& hist_idx)
 
   75       // This is unrolling the index to a c-style array. We calculate
 
   76       // a global index, starting with the last dimension. The last
 
   77       // dimension is just added to the global index. Each previous
 
   78       // dimension needs to multiply the index by the cumulative
 
   79       // product of the maximum index of subsequent dimensions.
 
   82       for (size_t dim_p1 = hist.rank(); dim_p1 != 0; dim_p1--) {
 
   83         size_t dim = dim_p1 - 1;
 
   84         auto ops = hist.axis(dim).options();
 
   85         size_t underflow = (ops & bh::axis::option::underflow) ? 1 : 0;
 
   86         size_t overflow = (ops & bh::axis::option::overflow) ? 1 : 0;
 
   87         // ugly check to decide if we have to add underflow offset
 
   88         int h5idx_signed = hist_idx.index(dim) + underflow;
 
   89         if (h5idx_signed < 0) throw std::logic_error("neg signed index");
 
   90         size_t h5idx = static_cast<size_t>(h5idx_signed);
 
   91         global += h5idx * stride;
 
   92         stride *= hist.axis(dim).size() + underflow + overflow;
 
   97     out_t out(hist.size());
 
   98     std::set<size_t> used_indices;
 
   99     for (const auto& x : bh::indexed(hist, bh::coverage::all)) {
 
  100       size_t global = global_index(x);
 
  101       if (!used_indices.insert(global).second) {
 
  102         throw std::logic_error(
 
  103           "repeat global index: " + std::to_string(global));
 
  105       out.at(global) = value_getter(x);
 
  112     std::vector<float> edges{};
 
  114     int extra_bins_in_hist{};
 
  117   template <typename T>
 
  118   std::vector<Axis> get_axes(const T& hist) {
 
  119     using out_t = std::vector<Axis>;
 
  120     namespace opts = boost::histogram::axis::option;
 
  122     for (size_t ax_n = 0; ax_n < hist.rank(); ax_n++) {
 
  123       const auto& axis = hist.axis(ax_n);
 
  124       out_t::value_type ax;
 
  125       bool underflow = (axis.options() & opts::underflow);
 
  126       bool overflow = (axis.options() & opts::overflow);
 
  127       // there are n normal bins + 1 edges, so we start with one less
 
  128       // bin in the histogram then in the edges. Then we add under /
 
  130       ax.extra_bins_in_hist = -1 + (underflow ? 1 : 0) + (overflow ? 1: 0);
 
  131       for (const auto& bin: axis) {
 
  132         ax.edges.push_back(bin.lower());
 
  134       // add the upper edge too
 
  135       if (axis.size() > 0) {
 
  136         const auto& last = *axis.rbegin();
 
  137         // for integer binning, which has the same upper and lower bin
 
  138         // value, we have one more bonus bin in the hist and don't save
 
  140         if (last.upper() == last.lower()) {
 
  141           ax.extra_bins_in_hist += 1;
 
  143           ax.edges.push_back(last.upper());
 
  146       ax.name = axis.metadata();
 
  152   inline void chkerr(herr_t code, const std::string& error) {
 
  153     if (code < 0) throw std::runtime_error("error setting " + error);
 
  156   template <typename T>
 
  157   auto product(const std::vector<T>& v) {
 
  158     return std::accumulate(
 
  159       v.begin(), v.end(), T(1), std::multiplies<hsize_t>{});
 
  163 namespace H5Utils::hist {
 
  165   // main hist writing function, this should be all anyone needs to
 
  167   template <typename T>
 
  168   void write_hist_to_group(
 
  169     H5::Group& parent_group,
 
  171     const std::string& name)
 
  173     using namespace detail;
 
  175     // build the data space
 
  176     auto axes = get_axes(hist);
 
  177     std::vector<hsize_t> ds_dims;
 
  178     for (const auto& axis: axes) {
 
  179       int total_size = axis.edges.size() + axis.extra_bins_in_hist;
 
  180       if (total_size < 0) throw std::logic_error("negative bin count");
 
  181       ds_dims.push_back(static_cast<hsize_t>(total_size));
 
  183     H5::DataSpace space(ds_dims.size(), ds_dims.data());
 
  185     // set up group, props, type of dataset
 
  186     H5::Group group = parent_group.createGroup(name);
 
  187     H5::DSetCreatPropList props;
 
  188     props.setChunk(ds_dims.size(), ds_dims.data());
 
  191       H5Pset_dset_no_attrs_hint(props.getId(), true),
 
  192       "no attribute hint");
 
  193     H5::DataType type = hdf5_t<array_t<T>>;
 
  195     // add the edge datasets
 
  196     H5::Group axgroup = group.createGroup("axes");
 
  197     std::vector<H5::DataSet> axes_ds;
 
  198     for (const auto& axis: axes) {
 
  199       std::array<hsize_t,1> axdims{axis.edges.size()};
 
  200       H5::DataSpace ax_space(axdims.size(), axdims.data());
 
  201       H5::DSetCreatPropList ax_props;
 
  202       ax_props.copy(props);
 
  203       ax_props.setChunk(axdims.size(), axdims.data());
 
  204       const auto& ax_type = hdf5_t<float>;
 
  205       auto ax_ds = axgroup.createDataSet(
 
  210       ax_ds.write(axis.edges.data(), ax_type);
 
  211       axes_ds.push_back(ax_ds);
 
  214     // Generic dataset adder. We need to call this multiple times, for
 
  215     // each type of data that the histogram is saving.
 
  216     auto add_moment = [&hist, &type, &space, &props, &group, &axes_ds](
 
  217       const std::string& name,
 
  219       auto flat = get_flat_hist_array<T>(hist, func);
 
  220       assert(static_cast<std::size_t>(space.getSelectNpoints()) == flat.size());
 
  221       H5::DataSet dataset = group.createDataSet(
 
  226       dataset.write(flat.data(), type);
 
  229       for (const H5::DataSet& ax: axes_ds) {
 
  232             dataset.getId(), ax.getId(), axnum++),
 
  237     // Read in the data and write to a dataset. The behavior here
 
  238     // depends on which accumulator was used.
 
  239     if constexpr (HasValue<T>) {
 
  240       add_moment("histogram",
 
  241         [](const auto& x) { return x->value(); } );
 
  243       add_moment("histogram",
 
  244         [](const auto& x) { return *x; } );
 
  247     if constexpr (requires(T t) { t.begin()->variance(); }) {
 
  250         [](const auto& x) { return x->variance();}
 
  254     if constexpr (requires(T t) { t.begin()->count(); }) {
 
  257         [](const auto& x) { return x->count(); }
 
  260     // add the sum of weights
 
  261     if constexpr (requires(T t) { t.begin()->sum_of_weights(); }) {
 
  264         [](const auto& x) { return x->sum_of_weights(); }
 
  267     // add the sum of weights squared
 
  268     if constexpr (requires(T t) { t.begin()->sum_of_weights_squared(); }) {
 
  270         "sum_of_weights_squared",
 
  271         [](const auto& x) { return x->sum_of_weights_squared(); }