ATLAS Offline Software
Loading...
Searching...
No Matches
Writer.h
Go to the documentation of this file.
1// this is -*- C++ -*-
2/*
3 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
4*/
5#ifndef HDF_TUPLE_HH
6#define HDF_TUPLE_HH
7
15
16#include "WriterConfiguration.h"
17#include "H5Traits.h"
18#include "CompressedTypes.h"
19#include "common.h"
20#include "defaults.h"
21
22#include "H5Cpp.h"
23
24#include <functional>
25#include <vector>
26#include <memory>
27#include <cassert>
28#include <set>
29
30namespace H5Utils {
31
34
35 namespace internal {
36
45 template <typename I>
47 {
48 public:
49 virtual ~IDataConsumer() {}
50 virtual data_buffer_t getBuffer(I) const = 0;
51 virtual data_buffer_t getDefault() const = 0;
52 virtual H5::DataType getType() const = 0;
53 virtual H5::DataType getWriteType() const = 0;
54 virtual std::string name() const = 0;
55 typedef I input_type;
56 };
57
59 template <typename T, typename I>
60 class DataConsumer: public IDataConsumer<I>
61 {
62 public:
63 DataConsumer(const std::string&,
64 const std::function<T(I)>&,
65 const T default_value = T(),
67 data_buffer_t getBuffer(I) const override;
68 data_buffer_t getDefault() const override;
69 H5::DataType getType() const override;
70 H5::DataType getWriteType() const override;
71 std::string name() const override;
72 private:
73 std::function<T(I)> m_getter;
74 std::string m_name;
76 H5::DataType m_write_type;
77 };
78 template <typename T, typename I>
80 const std::function<T(I)>& func,
81 const T default_value,
82 Compression comp):
83 m_getter(func),
84 m_name(name),
85 m_default_value(default_value),
87 {
88 }
89 template <typename T, typename I>
91 data_buffer_t buffer;
92 H5Traits<T>::ref(buffer) = m_getter(args);
93 return buffer;
94 }
95 template <typename T, typename I>
97 data_buffer_t default_value;
98 H5Traits<T>::ref(default_value) = m_default_value;
99 return default_value;
100 }
101 template <typename T, typename I>
102 H5::DataType DataConsumer<T, I>::getType() const {
103 return H5Traits<T>::type;
104 }
105 template <typename T, typename I>
106 H5::DataType DataConsumer<T, I>::getWriteType() const {
107 return m_write_type;
108 }
109 template <typename T, typename I>
110 std::string DataConsumer<T, I>::name() const {
111 return m_name;
112 }
113
114 }
116
125 template <typename I>
126 using SharedConsumer = std::shared_ptr<internal::IDataConsumer<I> >;
127 template <typename I>
129 {
130 public:
131
133 template <typename T>
134 void add(const std::string& name, const std::function<T(I)>&,
135 const T& default_value = T(),
137
139 template <
140 typename F,
141 typename R=decltype(std::declval<F>()(std::declval<I>())),
142 typename T=R
143 >
144 void add(const std::string& name, const F func, const T& def = T(),
146 add<R>(name, std::function<R(I)>(std::move(func)), R(def), comp);
147 }
148
149
150 const std::vector<SharedConsumer<I> > & getConsumers() const;
151
152 using input_type = I;
153 template <typename T>
154 using function_type = std::function<T(I)>;
155
156 private:
157 std::vector<SharedConsumer<I> > m_consumers;
158 std::set<std::string> m_used;
159 };
160
161
162 template <typename I>
163 template <typename T>
164 void Consumers<I>::add(const std::string& name,
165 const std::function<T(I)>& fun,
166 const T& def_val,
167 Compression comp)
168 {
169 if (m_used.count(name)) {
170 throw std::logic_error("tried to insert '" + name + "' twice");
171 }
172 m_consumers.push_back(
173 std::make_shared<internal::DataConsumer<T,I>>(name, fun, def_val, comp));
174 m_used.insert(name);
175 }
176
177 template <typename I>
178 const std::vector<SharedConsumer<I>> & Consumers<I>::getConsumers() const {
179 return m_consumers;
180 }
181
182
185 namespace internal {
186
187 // This class exists to inspect the way the fill(...) function is
188 // called, so that errors can be caught with a static assert and
189 // give a much less cryptic error message.
190 //
191 // We check to make sure the depth of the input matches the rank
192 // of the output, and that the types match.
193 using std::begin;
194 template <size_t N, typename T, typename I, typename = void>
195 struct CheckType {
196 static const bool ok_type = std::is_convertible<T,I>::value;
197 static const bool any_type = ok_type;
198 static const int depth = 0;
199 };
200 template <size_t N, typename T, typename I>
201 struct CheckType <N,T,I,decltype(*begin(std::declval<T&>()), void())> {
202 typedef CheckType<N-1,decltype(*begin(std::declval<T&>())),I> subtype;
203 static const bool ok_type = subtype::ok_type;
204 static const bool any_type = (
205 subtype::any_type || std::is_convertible<T,I>::value);
206 static const int depth = subtype::depth + 1;
207 };
208 template <typename T, typename I>
209 struct CheckType <0,T,I,decltype(*begin(std::declval<T&>()), void())> {
211 static const bool ok_type = std::is_convertible<T,I>::value;
212 static const bool any_type = subtype::any_type || ok_type;
213 static const int depth = subtype::depth + 1;
214 };
215
223 template <size_t N, typename F, typename T, size_t M = N>
225
226 std::vector<data_buffer_t> buffer;
227 std::vector<std::array<hsize_t, N> > element_offsets;
228 DataFlattener(const F& filler, T args,
229 const std::array<hsize_t,M>& extent):
230 buffer() {
231 hsize_t offset = 0;
232 for (const auto& arg: args) {
233 const size_t this_dim_max = extent.at(extent.size() - N);
234 if (offset >= this_dim_max) return;
235 DataFlattener<N-1, F, decltype(arg), M> in(filler, arg, extent);
236 buffer.insert(buffer.end(), in.buffer.begin(), in.buffer.end());
237 for (const auto& in_ele: in.element_offsets){
238 std::array<hsize_t, N> element_pos;
239 element_pos.at(0) = offset;
240 std::copy(in_ele.begin(), in_ele.end(), element_pos.begin() + 1);
241 element_offsets.push_back(element_pos);
242 }
243 offset++;
244 }
245 }
246 };
247 template <typename F, typename T, size_t M>
248 struct DataFlattener<0, F, T, M> {
249 std::vector<data_buffer_t> buffer;
250 std::vector<std::array<hsize_t, 0> > element_offsets;
251 DataFlattener(const F& f, T args,
252 const std::array<hsize_t,M>& /*extent*/):
253 buffer(),
254 element_offsets(1) {
255 for (const auto& filler: f) {
256 buffer.push_back(filler->getBuffer(args));
257 }
258 }
259 };
260
263 template<typename I>
264 H5::CompType buildType(const std::vector<SharedConsumer<I> >& consumers) {
265 if (consumers.size() < 1) {
266 throw std::logic_error(
267 "you must specify at least one consumer when initializing the HDF5"
268 "writer");
269 }
270
271 H5::CompType type(consumers.size() * sizeof(data_buffer_t));
272 size_t dt_offset = 0;
273 for (const SharedConsumer<I>& filler: consumers) {
274 type.insertMember(filler->name(), dt_offset, filler->getType());
275 dt_offset += sizeof(data_buffer_t);
276 }
277 return type;
278 }
279 template<typename I>
280 H5::CompType buildWriteType(const std::vector<SharedConsumer<I> >& con) {
281 H5::CompType type(con.size() * sizeof(data_buffer_t));
282 size_t dt_offset = 0;
283 for (const SharedConsumer<I>& filler: con) {
284 type.insertMember(filler->name(), dt_offset, filler->getWriteType());
285 dt_offset += sizeof(data_buffer_t);
286 }
287 type.pack();
288 return type;
289 }
290
292 template <typename I, size_t N>
294 DSParameters(const std::vector<SharedConsumer<I> >& fillers,
295 const std::array<hsize_t,N>& extent,
296 hsize_t batch_size);
297 H5::CompType type{};
298 std::array<hsize_t,N> extent{};
299 hsize_t batch_size{};
300 };
301
302 // DS parameters
303 template <typename I, size_t N>
305 const std::array<hsize_t,N>& extent_,
306 hsize_t batch_size_):
307 type(buildType(cons)),
308 extent(extent_),
309 batch_size(batch_size_)
310 {
311 }
312
313
314 template<typename I>
315 std::vector<data_buffer_t> buildDefault(const std::vector<SharedConsumer<I> >& f) {
316 std::vector<data_buffer_t> def;
317 for (const SharedConsumer<I>& filler: f) {
318 def.push_back(filler->getDefault());
319 }
320 return def;
321 }
322
323 // some internal functions take a vector, others take arrays
324 template <size_t N>
325 std::vector<hsize_t> vec(std::array<hsize_t,N> a) {
326 return std::vector<hsize_t>(a.begin(),a.end());
327 }
328
329 // default initalizer for writers where the extent isn't specified
330 template <hsize_t N>
331 std::array<hsize_t, N> uniform(size_t val) {
332 std::array<hsize_t, N> ar;
333 ar.fill(val);
334 //coverity[UNINIT:FALSE]
335 return ar;
336 }
337
338
339 }
341
349 template <size_t N, typename I>
350 class Writer {
351 public:
352 Writer(H5::Group& group, const std::string& name,
353 const Consumers<I>& consumers,
354 const std::array<hsize_t, N>& extent = internal::uniform<N>(5),
355 hsize_t batch_size = defaults::batch_size);
356 Writer(H5::Group& group,
357 const Consumers<I>& consumers,
359 Writer(const Writer&) = delete;
360 Writer(Writer&&) = default;
363 template <typename T>
364 void fill(T);
365 void flush();
366 size_t index() const;
368 using input_type = I;
369 template <typename T>
370 using function_type = typename consumer_type::template function_type<T>;
372 private:
374 hsize_t m_offset;
376 std::vector<internal::data_buffer_t> m_buffer;
377 std::vector<SharedConsumer<I> > m_consumers;
378 H5::DataSet m_ds;
379 H5::DataSpace m_file_space;
380 };
381
382 template <size_t N, typename I>
383 Writer<N, I>::Writer(H5::Group& group, const std::string& name,
384 const Consumers<I>& consumers,
385 const std::array<hsize_t,N>& extent,
386 hsize_t batch_size):
387 Writer<N,I>(
388 group, consumers, WriterConfiguration<N>{
389 name, // name
390 extent, // extent
391 batch_size, // batch_size
392 extent, // chunks
393 defaults::deflate // deflate
394 })
395 {}
396
397 template <size_t N, typename I>
398 Writer<N, I>::Writer(H5::Group& group,
399 const Consumers<I>& consumers,
400 const WriterConfiguration<N>& cfg):
401 m_par(consumers.getConsumers(), cfg.extent,
402 cfg.batch_size ? *cfg.batch_size : defaults::batch_size),
403 m_offset(0),
404 m_buffer_rows(0),
405 m_consumers(consumers.getConsumers()),
406 m_file_space(H5S_SIMPLE)
407 {
409 if (m_par.batch_size < 1) {
410 throw std::logic_error("batch size must be > 0");
411 }
412 // create space
413 H5::DataSpace space = internal::getUnlimitedSpace(
414 internal::vec(cfg.extent));
415
416 // create params
417 H5::DSetCreatPropList params = internal::getChunckedDatasetParams(cfg);
418 std::vector<data_buffer_t> default_value = internal::buildDefault(
419 consumers.getConsumers());
420 params.setFillValue(m_par.type, default_value.data());
421
422 // create ds
423 internal::throwIfExists(cfg.name, group);
424 H5::CompType packed_type = buildWriteType(consumers.getConsumers());
425 m_ds = group.createDataSet(cfg.name, packed_type, space, params);
426 m_file_space = m_ds.getSpace();
427 m_file_space.selectNone();
428 }
429
430 template <size_t N, typename I>
432 using namespace H5Utils;
433 try {
434 flush();
435 } catch (H5::Exception& err) {
436 internal::printDestructorError(err.getDetailMsg());
437 } catch (std::exception& err) {
439 }
440 }
441
442 template <size_t N, typename I>
443 template <typename T>
444 void Writer<N, I>::fill(T arg) {
445 if (m_buffer_rows == m_par.batch_size) {
446 flush();
447 }
448
449 // make some assertions to simplify debugging, the errors can be
450 // pretty nasty wtih all these templates.
451 typedef internal::CheckType<N, T, I> checkType;
452 static_assert(
453 checkType::depth >= N,
454 "\n\n"
455 " ** H5 Writer rank is greater than the depth of fill(...) input! **"
456 " \n");
457 static_assert(
458 // Suppress cppcheck warning here. It reports that the condition
459 // below is always true if `N = 0`. In N > 0 cases this
460 // condition might be false though, so I consider this a
461 // spurious warning.
462 //
463 // <3 Dan Guest, 2024-07-16 <3
464 //
465 // cppcheck-suppress incorrectLogicOperator
466 !(checkType::any_type && !checkType::ok_type),
467 "\n\n"
468 " ** H5 Writer input type matches fill(...), but rank is incorrect! **"
469 " \n");
470 static_assert(
471 checkType::any_type,
472 "\n\n"
473 " ** H5 Writer input type doesn't match input for fill(...)! **"
474 " \n");
475
476 internal::DataFlattener<N, decltype(m_consumers), T> buf(
477 m_consumers, std::move(arg), m_par.extent);
478 hsize_t n_el = buf.element_offsets.size();
479 std::vector<hsize_t> elements;
480 for (const auto& el_local: buf.element_offsets) {
481 std::array<hsize_t, N+1> el_global;
482 el_global[0] = m_offset + m_buffer_rows;
483 std::copy(el_local.begin(), el_local.end(), el_global.begin() + 1);
484 elements.insert(elements.end(), el_global.begin(), el_global.end());
485 }
486 if (n_el > 0) {
487 m_file_space.selectElements(H5S_SELECT_APPEND, n_el, elements.data());
488 }
489 m_buffer.insert(m_buffer.end(), buf.buffer.begin(), buf.buffer.end());
491 }
492
493 template <size_t N, typename I>
495 const hsize_t buffer_size = m_buffer_rows;
496 if (buffer_size == 0) return;
497
498 // extend the ds
499 std::vector<hsize_t> total_dims{buffer_size + m_offset};
500 total_dims.insert(total_dims.end(),
501 m_par.extent.begin(),
502 m_par.extent.end());
503 m_ds.extend(total_dims.data());
504 m_file_space.setExtentSimple(total_dims.size(), total_dims.data());
505
506 // write out
507 hsize_t n_buffer_pts = m_buffer.size() / m_consumers.size();
508 assert(m_file_space.getSelectNpoints() >= 0);
509 assert(static_cast<hsize_t>(m_file_space.getSelectNpoints())
510 == n_buffer_pts);
511 H5::DataSpace mem_space(1, &n_buffer_pts);
512 m_ds.write(m_buffer.data(), m_par.type, mem_space, m_file_space);
513 m_offset += buffer_size;
514 m_buffer.clear();
515 m_buffer_rows = 0;
516 m_file_space.selectNone();
517 }
518
519 template <size_t N, typename I>
520 size_t Writer<N, I>::index() const {
521 return m_buffer_rows + m_offset;
522 }
523
534 template <size_t N, class I>
536 H5::Group& group, const std::string& name,
537 const Consumers<I>& consumers,
538 const std::array<hsize_t, N>& extent = internal::uniform<N>(5),
539 hsize_t batch_size = defaults::batch_size) {
540 return Writer<N,I>(group, name, consumers, extent, batch_size);
541 }
542
549 template <typename T>
551
552
559 template <size_t N, typename T>
561
562
569 template <typename T>
571
572}
573
574#endif
std::vector< size_t > vec
static Double_t a
#define F(x, y, z)
Definition MD5.cxx:112
#define I(x, y, z)
Definition MD5.cxx:116
const std::vector< SharedConsumer< I > > & getConsumers() const
Definition Writer.h:178
std::function< T(I)> function_type
Definition Writer.h:154
void add(const std::string &name, const std::function< T(I)> &, const T &default_value=T(), Compression=Compression::STANDARD)
This should be the only method you need in this class.
Definition Writer.h:164
std::vector< SharedConsumer< const T & > > m_consumers
Definition Writer.h:157
std::set< std::string > m_used
Definition Writer.h:158
void add(const std::string &name, const F func, const T &def=T(), Compression comp=Compression::STANDARD)
overload to cast lambdas into functions
Definition Writer.h:144
Writer.
Definition Writer.h:350
Writer(H5::Group &group, const std::string &name, const Consumers< I > &consumers, const std::array< hsize_t, N > &extent=internal::uniform< N >(5), hsize_t batch_size=defaults::batch_size)
Definition Writer.h:383
Writer(const Writer &)=delete
Writer(Writer &&)=default
Writer & operator=(Writer &)=delete
std::vector< internal::data_buffer_t > m_buffer
Definition Writer.h:376
std::vector< SharedConsumer< const T & > > m_consumers
Definition Writer.h:377
Writer(H5::Group &group, const Consumers< I > &consumers, const WriterConfiguration< N > &=WriterConfiguration< N >())
Definition Writer.h:398
void fill(T)
Definition Writer.h:444
const internal::DSParameters< const T &, N > m_par
Definition Writer.h:373
void flush()
Definition Writer.h:494
Consumers< I > consumer_type
Definition Writer.h:367
WriterConfiguration< N > configuration_type
Definition Writer.h:371
size_t index() const
Definition Writer.h:520
typename consumer_type::template function_type< T > function_type
Definition Writer.h:370
implementation for variable filler
Definition Writer.h:61
data_buffer_t getDefault() const override
Definition Writer.h:96
DataConsumer(const std::string &, const std::function< T(I)> &, const T default_value=T(), Compression=Compression::STANDARD)
Definition Writer.h:79
H5::DataType getWriteType() const override
Definition Writer.h:106
H5::DataType getType() const override
Definition Writer.h:102
std::string name() const override
Definition Writer.h:110
data_buffer_t getBuffer(I) const override
Definition Writer.h:90
std::function< T(I)> m_getter
Definition Writer.h:73
DataConsumer classes.
Definition Writer.h:47
virtual std::string name() const =0
virtual data_buffer_t getDefault() const =0
virtual H5::DataType getWriteType() const =0
virtual data_buffer_t getBuffer(I) const =0
virtual H5::DataType getType() const =0
const hsize_t batch_size
Definition defaults.h:9
const int deflate
Definition defaults.h:10
clssses to add type traits for H5
Definition common.h:21
H5::DSetCreatPropList getChunckedDatasetParams(const WriterConfiguration< N > &)
H5::CompType buildWriteType(const std::vector< SharedConsumer< I > > &con)
Definition Writer.h:280
std::vector< hsize_t > vec(std::array< hsize_t, N > a)
Definition Writer.h:325
H5::DataType getCompressedType(Compression comp)
H5::DataSpace getUnlimitedSpace(const std::vector< hsize_t > &max_length)
Definition common.cxx:31
std::array< hsize_t, N > uniform(size_t val)
Definition Writer.h:331
void printDestructorError(const std::string &msg)
Definition common.cxx:24
const H5::DataType H5Traits< int >::type
Definition H5Traits.cxx:16
void throwIfExists(const std::string &name, const H5::Group &in_group)
Definition common.cxx:46
std::vector< data_buffer_t > buildDefault(const std::vector< SharedConsumer< I > > &f)
Definition Writer.h:315
H5::CompType buildType(const std::vector< SharedConsumer< I > > &consumers)
Adapter to translate configuration info into the objects needed by the writer.
Definition Writer.h:264
HDF5 Tuple Writer.
Definition common.h:20
std::shared_ptr< internal::IDataConsumer< I > > SharedConsumer
Consumer Class.
Definition Writer.h:126
Writer< N, const T & > CRefWriter
CRefWriter.
Definition Writer.h:560
Writer< N, I > makeWriter(H5::Group &group, const std::string &name, const Consumers< I > &consumers, const std::array< hsize_t, N > &extent=internal::uniform< N >(5), hsize_t batch_size=defaults::batch_size)
makeWriter
Definition Writer.h:535
Writer< 0, const T & > SimpleWriter
SimpleWriter.
Definition Writer.h:570
Consumers< const T & > CRefConsumer
CRefConsumer.
Definition Writer.h:550
CheckType< 0, decltype(*begin(std::declval< T & >())), I > subtype
Definition Writer.h:210
CheckType< N-1, decltype(*begin(std::declval< T & >())), I > subtype
Definition Writer.h:202
Constant parameters for the writer.
Definition Writer.h:293
std::array< hsize_t, N > extent
Definition Writer.h:298
DSParameters(const std::vector< SharedConsumer< I > > &fillers, const std::array< hsize_t, N > &extent, hsize_t batch_size)
Definition Writer.h:304
std::vector< data_buffer_t > buffer
Definition Writer.h:249
DataFlattener(const F &f, T args, const std::array< hsize_t, M > &)
Definition Writer.h:251
std::vector< std::array< hsize_t, 0 > > element_offsets
Definition Writer.h:250
Data flattener class: this is used by the writer to read in the elements one by one and put them in a...
Definition Writer.h:224
DataFlattener(const F &filler, T args, const std::array< hsize_t, M > &extent)
Definition Writer.h:228
std::vector< data_buffer_t > buffer
Definition Writer.h:226
std::vector< std::array< hsize_t, N > > element_offsets
Definition Writer.h:227
We have lots of code to get around HDF5's rather weak typing.
Definition H5Traits.h:54
consumer_t getConsumers()