ATLAS Offline Software
Loading...
Searching...
No Matches
Writer.h
Go to the documentation of this file.
1// this is -*- C++ -*-
2/*
3 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
4*/
5#ifndef HDF_TUPLE_HH
6#define HDF_TUPLE_HH
7
15
16#include "WriterConfiguration.h"
17#include "H5Traits.h"
18#include "CompressedTypes.h"
19#include "common.h"
20#include "defaults.h"
21
22#include "H5Cpp.h"
23
24#include <functional>
25#include <vector>
26#include <memory>
27#include <cassert>
28#include <set>
29#include <mutex>
30
31namespace H5Utils {
32
35
36 namespace internal {
37
46 template <typename I>
48 {
49 public:
50 virtual ~IDataConsumer() {}
51 virtual data_buffer_t getBuffer(I) const = 0;
52 virtual data_buffer_t getDefault() const = 0;
53 virtual H5::DataType getType() const = 0;
54 virtual H5::DataType getWriteType() const = 0;
55 virtual std::string name() const = 0;
56 typedef I input_type;
57 };
58
60 template <typename T, typename I>
61 class DataConsumer: public IDataConsumer<I>
62 {
63 public:
64 DataConsumer(const std::string&,
65 const std::function<T(I)>&,
66 const T default_value = T(),
68 data_buffer_t getBuffer(I) const override;
69 data_buffer_t getDefault() const override;
70 H5::DataType getType() const override;
71 H5::DataType getWriteType() const override;
72 std::string name() const override;
73 private:
74 std::function<T(I)> m_getter;
75 std::string m_name;
77 H5::DataType m_write_type;
78 };
79 template <typename T, typename I>
81 const std::function<T(I)>& func,
82 const T default_value,
83 Compression comp):
84 m_getter(func),
85 m_name(name),
86 m_default_value(default_value),
88 {
89 }
90 template <typename T, typename I>
92 data_buffer_t buffer;
93 H5Traits<T>::ref(buffer) = m_getter(args);
94 return buffer;
95 }
96 template <typename T, typename I>
98 data_buffer_t default_value;
99 H5Traits<T>::ref(default_value) = m_default_value;
100 return default_value;
101 }
102 template <typename T, typename I>
103 H5::DataType DataConsumer<T, I>::getType() const {
104 return H5Traits<T>::type;
105 }
106 template <typename T, typename I>
107 H5::DataType DataConsumer<T, I>::getWriteType() const {
108 return m_write_type;
109 }
110 template <typename T, typename I>
111 std::string DataConsumer<T, I>::name() const {
112 return m_name;
113 }
114
115 }
117
126 template <typename I>
127 using SharedConsumer = std::shared_ptr<internal::IDataConsumer<I> >;
128 template <typename I>
130 {
131 public:
132
134 template <typename T>
135 void add(const std::string& name, const std::function<T(I)>&,
136 const T& default_value = T(),
138
140 template <
141 typename F,
142 typename R=decltype(std::declval<F>()(std::declval<I>())),
143 typename T=R
144 >
145 void add(const std::string& name, const F func, const T& def = T(),
147 add<R>(name, std::function<R(I)>(std::move(func)), R(def), comp);
148 }
149
150
151 const std::vector<SharedConsumer<I> > & getConsumers() const;
152
153 using input_type = I;
154 template <typename T>
155 using function_type = std::function<T(I)>;
156
157 private:
158 std::vector<SharedConsumer<I> > m_consumers;
159 std::set<std::string> m_used;
160 };
161
162
163 template <typename I>
164 template <typename T>
165 void Consumers<I>::add(const std::string& name,
166 const std::function<T(I)>& fun,
167 const T& def_val,
168 Compression comp)
169 {
170 if (m_used.count(name)) {
171 throw std::logic_error("tried to insert '" + name + "' twice");
172 }
173 m_consumers.push_back(
174 std::make_shared<internal::DataConsumer<T,I>>(name, fun, def_val, comp));
175 m_used.insert(name);
176 }
177
178 template <typename I>
179 const std::vector<SharedConsumer<I>> & Consumers<I>::getConsumers() const {
180 return m_consumers;
181 }
182
183
186 namespace internal {
187
188 // This class exists to inspect the way the fill(...) function is
189 // called, so that errors can be caught with a static assert and
190 // give a much less cryptic error message.
191 //
192 // We check to make sure the depth of the input matches the rank
193 // of the output, and that the types match.
194 using std::begin;
195 template <size_t N, typename T, typename I, typename = void>
196 struct CheckType {
197 static const bool ok_type = std::is_convertible<T,I>::value;
198 static const bool any_type = ok_type;
199 static const int depth = 0;
200 };
201 template <size_t N, typename T, typename I>
202 struct CheckType <N,T,I,decltype(*begin(std::declval<T&>()), void())> {
203 typedef CheckType<N-1,decltype(*begin(std::declval<T&>())),I> subtype;
204 static const bool ok_type = subtype::ok_type;
205 static const bool any_type = (
206 subtype::any_type || std::is_convertible<T,I>::value);
207 static const int depth = subtype::depth + 1;
208 };
209 template <typename T, typename I>
210 struct CheckType <0,T,I,decltype(*begin(std::declval<T&>()), void())> {
212 static const bool ok_type = std::is_convertible<T,I>::value;
213 static const bool any_type = subtype::any_type || ok_type;
214 static const int depth = subtype::depth + 1;
215 };
216
224 template <size_t N, typename F, typename T, size_t M = N>
226
227 std::vector<data_buffer_t> buffer;
228 std::vector<std::array<hsize_t, N> > element_offsets;
229 DataFlattener(const F& filler, T args,
230 const std::array<hsize_t,M>& extent):
231 buffer() {
232 hsize_t offset = 0;
233 for (const auto& arg: args) {
234 const size_t this_dim_max = extent.at(extent.size() - N);
235 if (offset >= this_dim_max) return;
236 DataFlattener<N-1, F, decltype(arg), M> in(filler, arg, extent);
237 buffer.insert(buffer.end(), in.buffer.begin(), in.buffer.end());
238 for (const auto& in_ele: in.element_offsets){
239 std::array<hsize_t, N> element_pos;
240 element_pos.at(0) = offset;
241 std::copy(in_ele.begin(), in_ele.end(), element_pos.begin() + 1);
242 element_offsets.push_back(element_pos);
243 }
244 offset++;
245 }
246 }
247 };
248 template <typename F, typename T, size_t M>
249 struct DataFlattener<0, F, T, M> {
250 std::vector<data_buffer_t> buffer;
251 std::vector<std::array<hsize_t, 0> > element_offsets;
252 DataFlattener(const F& f, T args,
253 const std::array<hsize_t,M>& /*extent*/):
254 buffer(),
255 element_offsets(1) {
256 for (const auto& filler: f) {
257 buffer.push_back(filler->getBuffer(args));
258 }
259 }
260 };
261
264 template<typename I>
265 H5::CompType buildType(const std::vector<SharedConsumer<I> >& consumers) {
266 if (consumers.size() < 1) {
267 throw std::logic_error(
268 "you must specify at least one consumer when initializing the HDF5"
269 "writer");
270 }
271
272 H5::CompType type(consumers.size() * sizeof(data_buffer_t));
273 size_t dt_offset = 0;
274 for (const SharedConsumer<I>& filler: consumers) {
275 type.insertMember(filler->name(), dt_offset, filler->getType());
276 dt_offset += sizeof(data_buffer_t);
277 }
278 return type;
279 }
280 template<typename I>
281 H5::CompType buildWriteType(const std::vector<SharedConsumer<I> >& con) {
282 H5::CompType type(con.size() * sizeof(data_buffer_t));
283 size_t dt_offset = 0;
284 for (const SharedConsumer<I>& filler: con) {
285 type.insertMember(filler->name(), dt_offset, filler->getWriteType());
286 dt_offset += sizeof(data_buffer_t);
287 }
288 type.pack();
289 return type;
290 }
291
293 template <typename I, size_t N>
295 DSParameters(const std::vector<SharedConsumer<I> >& fillers,
296 const std::array<hsize_t,N>& extent,
297 hsize_t batch_size);
298 H5::CompType type{};
299 std::array<hsize_t,N> extent{};
300 hsize_t batch_size{};
301 };
302
303 // DS parameters
304 template <typename I, size_t N>
306 const std::array<hsize_t,N>& extent_,
307 hsize_t batch_size_):
308 type(buildType(cons)),
309 extent(extent_),
310 batch_size(batch_size_)
311 {
312 }
313
314
315 template<typename I>
316 std::vector<data_buffer_t> buildDefault(const std::vector<SharedConsumer<I> >& f) {
317 std::vector<data_buffer_t> def;
318 for (const SharedConsumer<I>& filler: f) {
319 def.push_back(filler->getDefault());
320 }
321 return def;
322 }
323
324 // some internal functions take a vector, others take arrays
325 template <size_t N>
326 std::vector<hsize_t> vec(std::array<hsize_t,N> a) {
327 return std::vector<hsize_t>(a.begin(),a.end());
328 }
329
330 // default initalizer for writers where the extent isn't specified
331 template <hsize_t N>
332 std::array<hsize_t, N> uniform(size_t val) {
333 std::array<hsize_t, N> ar;
334 ar.fill(val);
335 //coverity[UNINIT:FALSE]
336 return ar;
337 }
338
339
340 }
342
350 template <size_t N, typename I>
351 class Writer {
352 public:
353 Writer(H5::Group& group, const std::string& name,
354 const Consumers<I>& consumers,
355 const std::array<hsize_t, N>& extent = internal::uniform<N>(5),
356 hsize_t batch_size = defaults::batch_size);
357 Writer(H5::Group& group,
358 const Consumers<I>& consumers,
360 Writer(const Writer&) = delete;
361 Writer(Writer&&) = default;
364 template <typename T>
365 void fill(T);
366 void flush();
367 size_t index() const;
369 using input_type = I;
370 template <typename T>
371 using function_type = typename consumer_type::template function_type<T>;
373 private:
375 hsize_t m_offset;
377 std::vector<internal::data_buffer_t> m_buffer;
378 std::vector<SharedConsumer<I> > m_consumers;
379 H5::DataSet m_ds;
380 H5::DataSpace m_file_space;
381 std::recursive_mutex m_mutex;
382 };
383
384 template <size_t N, typename I>
385 Writer<N, I>::Writer(H5::Group& group, const std::string& name,
386 const Consumers<I>& consumers,
387 const std::array<hsize_t,N>& extent,
388 hsize_t batch_size):
389 Writer<N,I>(
390 group, consumers, WriterConfiguration<N>{
391 .name = name, // name
392 .extent = extent, // extent
393 .batch_size = batch_size, // batch_size
394 .chunks = extent, // chunks
395 .deflate = defaults::deflate, // deflate
396 .plist_callbacks = {} // plist_callbacks
397 })
398 {}
399
400 template <size_t N, typename I>
401 Writer<N, I>::Writer(H5::Group& group,
402 const Consumers<I>& consumers,
403 const WriterConfiguration<N>& cfg):
404 m_par(consumers.getConsumers(), cfg.extent,
405 cfg.batch_size ? *cfg.batch_size : defaults::batch_size),
406 m_offset(0),
407 m_buffer_rows(0),
408 m_consumers(consumers.getConsumers()),
409 m_file_space(H5S_SIMPLE)
410 {
412 if (m_par.batch_size < 1) {
413 throw std::logic_error("batch size must be > 0");
414 }
415 // create space
416 H5::DataSpace space = internal::getUnlimitedSpace(
417 internal::vec(cfg.extent));
418
419 // create params
420 H5::DSetCreatPropList params = internal::getChunckedDatasetParams(cfg);
421 std::vector<data_buffer_t> default_value = internal::buildDefault(
422 consumers.getConsumers());
423 params.setFillValue(m_par.type, default_value.data());
424
425 // create ds
426 internal::throwIfExists(cfg.name, group);
427 H5::CompType packed_type = buildWriteType(consumers.getConsumers());
428 m_ds = group.createDataSet(cfg.name, packed_type, space, params);
429 m_file_space = m_ds.getSpace();
430 m_file_space.selectNone();
431 }
432
433 template <size_t N, typename I>
435 using namespace H5Utils;
436 try {
437 flush();
438 } catch (H5::Exception& err) {
439 internal::printDestructorError(err.getDetailMsg());
440 } catch (std::exception& err) {
442 }
443 }
444
445 template <size_t N, typename I>
446 template <typename T>
447 void Writer<N, I>::fill(T arg) {
448 // lock witin a scope here to check the buffer size and
449 // (potentially) flush, but we can release it to compute the
450 // output array since that's thread local
451 {
452 std::lock_guard lock(m_mutex);
453 if (m_buffer_rows == m_par.batch_size) {
454 flush();
455 }
456 }
457
458 // make some assertions to simplify debugging, the errors can be
459 // pretty nasty wtih all these templates.
460 typedef internal::CheckType<N, T, I> checkType;
461 static_assert(
462 checkType::depth >= N,
463 "\n\n"
464 " ** H5 Writer rank is greater than the depth of fill(...) input! **"
465 " \n");
466 static_assert(
467 // Suppress cppcheck warning here. It reports that the condition
468 // below is always true if `N = 0`. In N > 0 cases this
469 // condition might be false though, so I consider this a
470 // spurious warning.
471 //
472 // <3 Dan Guest, 2024-07-16 <3
473 //
474 // cppcheck-suppress incorrectLogicOperator
475 !(checkType::any_type && !checkType::ok_type),
476 "\n\n"
477 " ** H5 Writer input type matches fill(...), but rank is incorrect! **"
478 " \n");
479 static_assert(
480 checkType::any_type,
481 "\n\n"
482 " ** H5 Writer input type doesn't match input for fill(...)! **"
483 " \n");
484
485 internal::DataFlattener<N, decltype(m_consumers), T> buf(
486 m_consumers, std::move(arg), m_par.extent);
487 hsize_t n_el = buf.element_offsets.size();
488 std::vector<hsize_t> elements;
489
490 // lock again here since we're done with the local stuff: there's
491 // some access to class variables below which we need to do one
492 // thread at a time.
493 std::lock_guard lock(m_mutex);
494 for (const auto& el_local: buf.element_offsets) {
495 std::array<hsize_t, N+1> el_global;
496 el_global[0] = m_offset + m_buffer_rows;
497 std::copy(el_local.begin(), el_local.end(), el_global.begin() + 1);
498 elements.insert(elements.end(), el_global.begin(), el_global.end());
499 }
500 if (n_el > 0) {
501 m_file_space.selectElements(H5S_SELECT_APPEND, n_el, elements.data());
502 }
503 m_buffer.insert(m_buffer.end(), buf.buffer.begin(), buf.buffer.end());
505 }
506
507 template <size_t N, typename I>
509 std::lock_guard lock(m_mutex);
510 const hsize_t buffer_size = m_buffer_rows;
511 if (buffer_size == 0) return;
512
513 // extend the ds
514 std::vector<hsize_t> total_dims{buffer_size + m_offset};
515 total_dims.insert(total_dims.end(),
516 m_par.extent.begin(),
517 m_par.extent.end());
518 m_ds.extend(total_dims.data());
519 m_file_space.setExtentSimple(total_dims.size(), total_dims.data());
520
521 // write out
522 hsize_t n_buffer_pts = m_buffer.size() / m_consumers.size();
523 assert(m_file_space.getSelectNpoints() >= 0);
524 assert(static_cast<hsize_t>(m_file_space.getSelectNpoints())
525 == n_buffer_pts);
526 H5::DataSpace mem_space(1, &n_buffer_pts);
527 m_ds.write(m_buffer.data(), m_par.type, mem_space, m_file_space);
528 m_offset += buffer_size;
529 m_buffer.clear();
530 m_buffer_rows = 0;
531 m_file_space.selectNone();
532 }
533
534 template <size_t N, typename I>
535 size_t Writer<N, I>::index() const {
536 return m_buffer_rows + m_offset;
537 }
538
549 template <size_t N, class I>
551 H5::Group& group, const std::string& name,
552 const Consumers<I>& consumers,
553 const std::array<hsize_t, N>& extent = internal::uniform<N>(5),
554 hsize_t batch_size = defaults::batch_size) {
556 config.name = name;
557 config.extent = extent;
558 config.batch_size = batch_size;
559 return Writer<N,I>(group, consumers, config);
560 }
561 template <size_t N, class I>
563 H5::Group& group,
564 const Consumers<I>& consumers,
566 return Writer<N,I>(group, consumers, config);
567 }
568
575 template <typename T>
577
578
585 template <size_t N, typename T>
587
588
595 template <typename T>
597
598}
599
600#endif
std::vector< size_t > vec
static Double_t a
#define F(x, y, z)
Definition MD5.cxx:112
#define I(x, y, z)
Definition MD5.cxx:116
const std::vector< SharedConsumer< I > > & getConsumers() const
Definition Writer.h:179
std::function< T(I)> function_type
Definition Writer.h:155
void add(const std::string &name, const std::function< T(I)> &, const T &default_value=T(), Compression=Compression::STANDARD)
This should be the only method you need in this class.
Definition Writer.h:165
std::vector< SharedConsumer< const T & > > m_consumers
Definition Writer.h:158
std::set< std::string > m_used
Definition Writer.h:159
void add(const std::string &name, const F func, const T &def=T(), Compression comp=Compression::STANDARD)
overload to cast lambdas into functions
Definition Writer.h:145
Writer.
Definition Writer.h:351
Writer(const Writer &)=delete
Writer(Writer &&)=default
Writer & operator=(Writer &)=delete
std::vector< internal::data_buffer_t > m_buffer
Definition Writer.h:377
std::vector< SharedConsumer< const T & > > m_consumers
Definition Writer.h:378
void fill(T)
Definition Writer.h:447
Writer(H5::Group &group, const Consumers< I > &consumers, const WriterConfiguration< N > &=WriterConfiguration< N >())
Definition Writer.h:401
void flush()
Definition Writer.h:508
const internal::DSParameters< const T &, N > m_par
Definition Writer.h:374
Writer(H5::Group &group, const std::string &name, const Consumers< I > &consumers, const std::array< hsize_t, N > &extent=internal::uniform< N >(5), hsize_t batch_size=defaults::batch_size)
Definition Writer.h:385
std::recursive_mutex m_mutex
Definition Writer.h:381
Consumers< I > consumer_type
Definition Writer.h:368
WriterConfiguration< N > configuration_type
Definition Writer.h:372
size_t index() const
Definition Writer.h:535
typename consumer_type::template function_type< T > function_type
Definition Writer.h:371
implementation for variable filler
Definition Writer.h:62
data_buffer_t getDefault() const override
Definition Writer.h:97
DataConsumer(const std::string &, const std::function< T(I)> &, const T default_value=T(), Compression=Compression::STANDARD)
Definition Writer.h:80
H5::DataType getWriteType() const override
Definition Writer.h:107
H5::DataType getType() const override
Definition Writer.h:103
std::string name() const override
Definition Writer.h:111
data_buffer_t getBuffer(I) const override
Definition Writer.h:91
std::function< T(I)> m_getter
Definition Writer.h:74
DataConsumer classes.
Definition Writer.h:48
virtual std::string name() const =0
virtual data_buffer_t getDefault() const =0
virtual H5::DataType getWriteType() const =0
virtual data_buffer_t getBuffer(I) const =0
virtual H5::DataType getType() const =0
const hsize_t batch_size
Definition defaults.h:9
const int deflate
Definition defaults.h:10
clssses to add type traits for H5
Definition common.h:21
H5::DSetCreatPropList getChunckedDatasetParams(const WriterConfiguration< N > &)
H5::CompType buildWriteType(const std::vector< SharedConsumer< I > > &con)
Definition Writer.h:281
std::vector< hsize_t > vec(std::array< hsize_t, N > a)
Definition Writer.h:326
H5::DataType getCompressedType(Compression comp)
H5::DataSpace getUnlimitedSpace(const std::vector< hsize_t > &max_length)
Definition common.cxx:31
std::array< hsize_t, N > uniform(size_t val)
Definition Writer.h:332
void printDestructorError(const std::string &msg)
Definition common.cxx:24
const H5::DataType H5Traits< int >::type
Definition H5Traits.cxx:16
void throwIfExists(const std::string &name, const H5::Group &in_group)
Definition common.cxx:46
std::vector< data_buffer_t > buildDefault(const std::vector< SharedConsumer< I > > &f)
Definition Writer.h:316
H5::CompType buildType(const std::vector< SharedConsumer< I > > &consumers)
Adapter to translate configuration info into the objects needed by the writer.
Definition Writer.h:265
HDF5 Tuple Writer.
Definition common.h:20
std::shared_ptr< internal::IDataConsumer< I > > SharedConsumer
Consumer Class.
Definition Writer.h:127
Writer< N, const T & > CRefWriter
CRefWriter.
Definition Writer.h:586
Writer< N, I > makeWriter(H5::Group &group, const std::string &name, const Consumers< I > &consumers, const std::array< hsize_t, N > &extent=internal::uniform< N >(5), hsize_t batch_size=defaults::batch_size)
makeWriter
Definition Writer.h:550
Writer< 0, const T & > SimpleWriter
SimpleWriter.
Definition Writer.h:596
Consumers< const T & > CRefConsumer
CRefConsumer.
Definition Writer.h:576
CheckType< 0, decltype(*begin(std::declval< T & >())), I > subtype
Definition Writer.h:211
CheckType< N-1, decltype(*begin(std::declval< T & >())), I > subtype
Definition Writer.h:203
Constant parameters for the writer.
Definition Writer.h:294
std::array< hsize_t, N > extent
Definition Writer.h:299
DSParameters(const std::vector< SharedConsumer< I > > &fillers, const std::array< hsize_t, N > &extent, hsize_t batch_size)
Definition Writer.h:305
std::vector< data_buffer_t > buffer
Definition Writer.h:250
DataFlattener(const F &f, T args, const std::array< hsize_t, M > &)
Definition Writer.h:252
std::vector< std::array< hsize_t, 0 > > element_offsets
Definition Writer.h:251
Data flattener class: this is used by the writer to read in the elements one by one and put them in a...
Definition Writer.h:225
DataFlattener(const F &filler, T args, const std::array< hsize_t, M > &extent)
Definition Writer.h:229
std::vector< data_buffer_t > buffer
Definition Writer.h:227
std::vector< std::array< hsize_t, N > > element_offsets
Definition Writer.h:228
We have lots of code to get around HDF5's rather weak typing.
Definition H5Traits.h:54
consumer_t getConsumers()