HDF5 Tuple Writer. More...

Namespaces
	defaults

	hist

	internal
	clssses to add type traits for H5

	Print

Classes
struct	AppOpts

class	Consumers

class	DefaultMerger

class	IH5Merger

struct	IOOpts

struct	TreeCopyOpts

class	VariableFillers
	Variable filler arrays. More...

class	Writer
	Writer. More...

struct	WriterConfiguration

class	WriterXd
	WriterXd. More...

Typedefs
template<typename T >
using	CRefConsumer = Consumers< const T & >
	CRefConsumer. More...

template<size_t N, typename T >
using	CRefWriter = Writer< N, const T & >
	CRefWriter. More...

template<typename T >
using	SimpleWriter = Writer< 0, const T & >
	SimpleWriter. More...

Enumerations
enum	Compression { Compression::STANDARD, Compression::HALF_PRECISION, Compression::HALF_PRECISION_LARGE }

Functions
bool	checkDatasetsToMerge (const H5::DataSet &target, const H5::DataSet &source, hsize_t mergeAxis)
	Make sure that two datasets can be merged. More...

bool	checkDatasetsToMerge (const H5::DataSet &target, const H5::DataSet &source, hsize_t mergeAxis, std::string &errMsg)
	Make sure that two datasets can be merged. More...

void	mergeDatasets (H5::DataSet &target, const H5::DataSet &source, hsize_t mergeAxis, std::size_t bufferSize=-1)
	Merge two datasets. More...

H5::DataSet	createDataSet (H5::H5Location &targetLocation, const H5::DataSet &source, hsize_t mergeAxis, int chunkSize=-1, int mergeExtent=-1)
	Make a new dataset using the properties of another. More...

std::size_t	getRowSize (const H5::DataSet &ds, hsize_t axis)
	Calculate the size of a row of a dataset in bytes. More...

template<size_t N, class I >
Writer< N, I >	makeWriter (H5::Group &group, const std::string &name, const Consumers< I > &consumers, const std::array< hsize_t, N > &extent=internal::uniform< N >(5), hsize_t batch_size=defaults::batch_size)
	makeWriter More...

void	copyRootTree (TTree &tt, H5::Group &fg, const TreeCopyOpts &opts)

std::string	getTree (const std::string &file_name)

AppOpts	getTreeCopyOpts (int argc, char *argv[])

Variables
const size_t	CHUNK_SIZE = 128

Detailed Description

HDF5 Tuple Writer.

HDF5 Writer.

This is a tool to write N-dimensional arrays of compound data types to HDF5 files.

Skip down to the WriterXd and VariableFillers classes below to see the stuff that you'll have to interact with.

Skip down to the Writer and Consumers classes below to see the stuff that you'll have to interact with.

Typedef Documentation

◆ CRefConsumer

template<typename T >

using H5Utils::CRefConsumer = typedef Consumers<const T&>

CRefConsumer.

Convenience wrapper, CRefConsumer<T> is equivelent to H5Utils::Consumers<const T&>.

Definition at line 549 of file Writer.h.

◆ CRefWriter

template<size_t N, typename T >

using H5Utils::CRefWriter = typedef Writer<N, const T&>

CRefWriter.

Convenience wrapper, CRefWriter<N,T> is equivelent to H5Utils::Writer<N, const T&>.

Definition at line 559 of file Writer.h.

◆ SharedConsumer

template<typename I >

using H5Utils::SharedConsumer = typedef std::shared_ptr<internal::IDataConsumer<I> >

Consumer Class.

The elements added to this container each specify one element in the output HDF5 DataSet. You need to give each variable a name and a function that fills the variable.

Definition at line 126 of file Writer.h.

◆ SimpleWriter

template<typename T >

using H5Utils::SimpleWriter = typedef Writer<0, const T&>

SimpleWriter.

Convenience wrapper, SimpleWriter<T> is equivelent to H5Utils::Writer<0, const T&>.

Definition at line 569 of file Writer.h.

Enumeration Type Documentation

◆ Compression

enum H5Utils::Compression

strong

Enumerator
STANDARD
HALF_PRECISION
HALF_PRECISION_LARGE

Definition at line 11 of file CompressionEnums.h.

11 {STANDARD, HALF_PRECISION, HALF_PRECISION_LARGE};

Function Documentation

◆ checkDatasetsToMerge() [1/2]

bool H5Utils::checkDatasetsToMerge	(	const H5::DataSet &	target,
		const H5::DataSet &	source,
		hsize_t	mergeAxis
	)

Make sure that two datasets can be merged.

Parameters

target	The dataset to merge into
source	The dataset to merge from
mergeAxis	The axis to merged along.

Returns: False if the datasets cannot be merged

Definition at line 53 of file MergeUtils.cxx.

   {
     std::string sink;
     return checkDatasetsToMerge(target, source, mergeAxis, sink);
   }

◆ checkDatasetsToMerge() [2/2]

bool H5Utils::checkDatasetsToMerge	(	const H5::DataSet &	target,
		const H5::DataSet &	source,
		hsize_t	mergeAxis,
		std::string &	errMsg
	)

Make sure that two datasets can be merged.

Parameters

	target	The dataset to merge into
	source	The dataset to merge from
	mergeAxis	The axis to merged along.
[out]	errMsg	If the datasets cannot be merged, fill this string with an explanation

Returns: False if the datasets cannot be merged

Definition at line 62 of file MergeUtils.cxx.

   {
     // Check that the datasets hold the same types
     // Note that H5 *can* do type comparisons but this function assumes that we
     // should only merge the same types
     if (target.getDataType() != source.getDataType() ) {
       errMsg = "Target and source datasets hold different types.";
       return false;
     }
  
     // Get the dataspaces
     H5::DataSpace targetSpace = target.getSpace();
     H5::DataSpace sourceSpace = source.getSpace();
     if (!targetSpace.isSimple() || !sourceSpace.isSimple() ) {
       errMsg = "Only simple dataspaces are understood.";
       return false;
     }
  
     // Make sure that the dataspaces have the same dimensions
     int nDims = targetSpace.getSimpleExtentNdims();
     if (nDims != sourceSpace.getSimpleExtentNdims() ) {
       errMsg = "Target and source dataspaces have different dimensions, " + 
           std::to_string(nDims) + " and " +
           std::to_string(sourceSpace.getSimpleExtentNdims() ) + " respectively";
       return false;
     }
  
     // Make sure that the merge axis fits in the dimension
     if (nDims <= static_cast<int>(mergeAxis)) {
       errMsg = "Dataset dimension " + std::to_string(nDims) +
         " is not compatible with the merge axis " +
         std::to_string(mergeAxis);
       return false;
     }
  
     // Now make sure that the extent matches
     std::vector<hsize_t> targetDims(nDims, 0);
     std::vector<hsize_t> maxTargetDims(nDims, 0);
     targetSpace.getSimpleExtentDims(targetDims.data(), maxTargetDims.data() );
     std::vector<hsize_t> sourceDims(nDims, 0);
     sourceSpace.getSimpleExtentDims(sourceDims.data() );
     
     for (int ii = 0; ii < nDims; ++ii) {
       // Skip the merge axis in this check
       if (ii == static_cast<int>(mergeAxis) )
         continue;
       if (targetDims.at(ii) != sourceDims.at(ii) ) {
         errMsg = "Target and source databases dimensions differ on axis " + 
           std::to_string(ii) + ", " + std::to_string(targetDims.at(ii) ) + 
           " and " + std::to_string(sourceDims.at(ii) ) + " respectively";
         return false;
       }
     }
  
     // Check the maximum extent is sufficient
     if (maxTargetDims.at(mergeAxis) < (
           targetDims.at(mergeAxis) + sourceDims.at(mergeAxis) ) ) {
       errMsg = "Merged dataset will not fit into target dataset";
       return false;
     }
  
     return true;
   } //> end function checkDatasetsToMerge

◆ copyRootTree()

void H5Utils::copyRootTree	(	TTree &	tt,
		H5::Group &	fg,
		const TreeCopyOpts &	opts
	)

Definition at line 125 of file copyRootTree.cxx.

                                                                       {
  
     // define the buffers for root to read into
     std::vector<std::unique_ptr<IBuffer> > buffers;
  
     // this keeps track of the things we couldn't read
     std::set<std::string> skipped;
  
  
     // Each `VariableFiller` must be constructed with a "filler"
     // function (or callable object), which takes no arguments and
     // returns the variable we want to write out. In this case they are
     // implemented as closures over the buffers that ROOT is reading
     // into.
  
     // This is the 1d variables
     VariableFillers vars;
     std::vector<size_t> idx_dummy;
  
     // These are 2d variables (i.e. vector<T> in the root file)
     //
     // We also need an index which the HDF5 writer increments as it
     // fills. This is shared with the ROOT buffers to index entries in
     // std::vectors
     VariableFillers vars2d;
     std::vector<size_t> idx(1,0);
  
     // 3d variables (index is now 2d)
     VariableFillers vars3d;
     std::vector<size_t> idx2(2,0);
  
     // Iterate over all the leaf names. There are some duplicates in the
     // list of keys, so we have to build the set ourselves.
     std::regex branch_filter(opts.branch_regex);
     TIter next(tt.GetListOfLeaves());
     TLeaf* leaf;
     std::set<std::string> leaf_names;
     while ((leaf = dynamic_cast<TLeaf*>(next()))) {
       leaf_names.insert(leaf->GetName());
     }
     if (leaf_names.size() == 0) throw std::logic_error("no branches found");
  
     // Loop over all the leafs, assign buffers to each
     //
     // These `Buffer` classes are defined above. The buffers turn the
     // branchs on, so we can set them all off to start.
     tt.SetBranchStatus("*", false);
     for (const auto& lname: leaf_names) {
       bool keep = true;
       if (opts.branch_regex.size() > 0) {
         keep = std::regex_search(lname, branch_filter);
       }
       if (opts.verbose) {
         std::cout << (keep ? "found " : "rejecting ") << lname << std::endl;
       }
       if (!keep) continue;
  
       leaf = tt.GetLeaf(lname.c_str());
       std::string branchName = leaf->GetBranch()->GetName();
       std::string leaf_type = leaf->GetTypeName();
       if (leaf_type == "Int_t") {
         buffers.emplace_back(new Buffer<int>(vars, tt, branchName));
       } else if (leaf_type == "Float_t") {
         buffers.emplace_back(new Buffer<float>(vars, tt, branchName));
       } else if (leaf_type == "Double_t") {
         buffers.emplace_back(new Buffer<double>(vars, tt, branchName));
       } else if (leaf_type == "Bool_t") {
         buffers.emplace_back(new Buffer<bool>(vars, tt, branchName));
       } else if (leaf_type == "Long64_t") {
         buffers.emplace_back(new Buffer<long long>(vars, tt, branchName));
       } else if (leaf_type == "ULong64_t") {
         buffers.emplace_back(new Buffer<unsigned long long>(vars, tt, branchName));
       } else if (leaf_type == "UInt_t") {
         buffers.emplace_back(new Buffer<unsigned int>(vars, tt, branchName));
       } else if (leaf_type == "UChar_t") {
         buffers.emplace_back(new Buffer<unsigned char>(vars, tt, branchName));
       } else if (leaf_type == "vector<float>") {
         buffers.emplace_back(new VBuf<float>(vars2d, idx, tt, branchName, NAN));
       } else if (leaf_type == "vector<double>") {
         buffers.emplace_back(new VBuf<double>(vars2d, idx, tt, branchName, NAN));
       } else if (leaf_type == "vector<int>") {
         buffers.emplace_back(new VBuf<int>(vars2d, idx, tt, branchName, 0));
       } else if (leaf_type == "vector<unsigned int>") {
         buffers.emplace_back(new VBuf<unsigned int>(vars2d, idx, tt, branchName, 0));
       } else if (leaf_type == "vector<unsigned char>") {
         buffers.emplace_back(new VBuf<unsigned char>(vars2d, idx, tt, branchName, 0));
       } else if (leaf_type == "vector<bool>") {
         buffers.emplace_back(new VBuf<bool>(vars2d, idx, tt, branchName, 0));
       } else if (leaf_type == "vector<vector<int> >") {
         buffers.emplace_back(new VVBuf<int>(vars3d, idx2, tt, branchName, 0));
       } else if (leaf_type == "vector<vector<unsigned int> >") {
         buffers.emplace_back(new VVBuf<unsigned int>(vars3d, idx2, tt, branchName, 0));
       } else if (leaf_type == "vector<vector<unsigned char> >") {
         buffers.emplace_back(new VVBuf<unsigned char>(vars3d, idx2, tt, branchName, 0));
       } else if (leaf_type == "vector<vector<float> >") {
         buffers.emplace_back(new VVBuf<float>(vars3d, idx2, tt, branchName, NAN));
       } else if (leaf_type == "vector<vector<double> >") {
         buffers.emplace_back(new VVBuf<double>(vars3d, idx2, tt, branchName, NAN));
       } else if (leaf_type == "vector<vector<bool> >") {
         buffers.emplace_back(new VVBuf<bool>(vars3d, idx2, tt, branchName, 0));
       } else {
         skipped.insert(leaf_type);
       }
     }
  
     // Build HDF5 Outputs
     //
     // In the simple case where we're not reading vectors, we store one
     // dataset with the same name as the tree. If there are vectors, we
     // instead create a group with the same name as the tree, and name
     // the datasets 1d, 2d, etc.
     //
     const std::string tree_name = tt.GetName();
  
     std::unique_ptr<WriterXd> writer1d;
     std::unique_ptr<WriterXd> writer2d;
     std::unique_ptr<WriterXd> writer3d;
     std::unique_ptr<H5::Group> top_group;
     if (opts.vector_lengths.size() > 0) {
       if (opts.vector_lengths.size() > 2) throw std::logic_error(
         "we don't support outputs with rank > 3");
       size_t length = opts.vector_lengths.at(0);
       top_group.reset(new H5::Group(fg.createGroup(tree_name)));
       if (opts.vector_lengths.size() > 1) {
         size_t length2 = opts.vector_lengths.at(1);
         if (vars3d.size() > 0) {
           writer3d.reset(new WriterXd(*top_group, "3d", vars3d,
                                       {length, length2}, opts.chunk_size));
         }
       }
       if (vars2d.size() > 0) {
         writer2d.reset(new WriterXd(*top_group, "2d", vars2d,
                                     {length}, opts.chunk_size));
       }
       if (vars.size() > 0) {
         writer1d.reset(new WriterXd(*top_group, "1d",
                                     vars, {}, opts.chunk_size));
       }
     } else {
       if (vars.size() > 0) {
         writer1d.reset(new WriterXd(fg, tree_name, vars, {}, opts.chunk_size));
       }
     }
  
     // Main event loop
     //
     // Very little actually happens here since the buffers are already
     // defined, as are the HDF5 reader functions.
     //
     
     // Get the selection string and build a new TTreeFormula
     std::string cut_string = opts.selection;
     const char * cut_char = cut_string.c_str();
     TTreeFormula *cut =0;
     if(!cut_string.empty()){
       // This is so a cut can be applied without requiring the 
       // branch to be output to the hdf5 file.
       tt.SetBranchStatus("*", true);
       cut = new TTreeFormula("selection", cut_char, &tt);
     }
  
     size_t n_entries = tt.GetEntries();
     if (opts.n_entries) n_entries = std::min(n_entries, opts.n_entries);
     int print_interval = opts.print_interval;
     if (print_interval == -1) {
       print_interval = std::max(1UL, n_entries / 100);
     }
  
     for (size_t iii = 0; iii < n_entries; iii++) {
       if (print_interval && (iii % print_interval == 0)) {
         std::cout << "events processed: " << iii
                   << " (" << std::round(iii*1e2 / n_entries) << "% of "
                   << n_entries << ")" << std::endl;
       }
       tt.GetEntry(iii);
       if(cut) cut->UpdateFormulaLeaves();
       if (!passTTreeCut(cut)) continue;
       if (writer1d) writer1d->fillWhileIncrementing(idx_dummy);
       if (writer2d) writer2d->fillWhileIncrementing(idx);
       if (writer3d) writer3d->fillWhileIncrementing(idx2);
     }
  
     // Flush the memory buffers on the HDF5 side. (This is done by the
     // destructor automatically, but we do it here to make any errors
     // more explicit.)
     if (writer1d) writer1d->flush();
     if (writer2d) writer2d->flush();
     if (writer3d) writer3d->flush();
  
     // Print the names of any classes that we were't able to read from
     // the root file.
     if (opts.verbose) {
       for (const auto& name: skipped) {
         std::cerr << "could not read branch of type " << name << std::endl;
       }
     }
   } // end copyRootTree

◆ createDataSet()

H5::DataSet H5Utils::createDataSet	(	H5::H5Location &	targetLocation,
		const H5::DataSet &	source,
		hsize_t	mergeAxis,
		int	chunkSize = `-1`,
		int	mergeExtent = `-1`
	)

Make a new dataset using the properties of another.

Parameters

targetLocation	The location to place the new dataset
source	The dataset to create from
mergeAxis	The axis to merge along
chunkSize	The chunk size to use. If negative then the chunk size from the source is used.
mergeExtent	The maximum extent to allow along the merge axis. -1 means unlimited.

This will not merge the source dataset into the new one!

Definition at line 222 of file MergeUtils.cxx.

   {
     H5::DataSpace sourceSpace = source.getSpace();
     // Get the new extent
     std::vector<hsize_t> DSExtent(sourceSpace.getSimpleExtentNdims(), 0);
     sourceSpace.getSimpleExtentDims(DSExtent.data() );
     // Set the merge axis to be 0 length to begin with
     DSExtent.at(mergeAxis) = 0;
     std::vector<hsize_t> maxDSExtent = DSExtent;
     maxDSExtent.at(mergeAxis) = mergeExtent;
  
     // Get the existing dataset creation properties
     H5::DSetCreatPropList cList = source.getCreatePlist();
     if (chunkSize > 0) {
       std::vector<hsize_t> chunks = DSExtent;
       chunks.at(mergeAxis) = chunkSize;
       cList.setChunk(chunks.size(), chunks.data() );
     }
  
     // Create the new space
     H5::DataSpace space(DSExtent.size(), DSExtent.data(), maxDSExtent.data());
     // This does nothing with the acc property list because I don't know
     // what it is
     return targetLocation.createDataSet(
         source.getObjName(), source.getDataType(), space, cList);
   }

◆ getRowSize()

std::size_t H5Utils::getRowSize	(	const H5::DataSet &	ds,
		hsize_t	axis
	)

Calculate the size of a row of a dataset in bytes.

Parameters

ds	The dataset to use
axis	The axis that the row is orthogonal to

A row is the hyperplane orthogonal to the axis. This will throw an overflow error if the row size overflows a std::size_t. This is rather unlikely because that means that there wouldn't be enough memory addresses to hold a single row in memory!

Definition at line 254 of file MergeUtils.cxx.

                                                           {
     // The size of one element
     std::size_t eleSize = ds.getDataType().getSize();
  
     // The dimensions of the space
     H5::DataSpace space = ds.getSpace();
     std::vector<hsize_t> spaceDims(space.getSimpleExtentNdims(), 0);
     space.getSimpleExtentDims(spaceDims.data() );
  
     std::size_t nRowElements = 1;
     for (std::size_t ii = 0; ii < spaceDims.size(); ++ii)
       if (ii != axis)
         nRowElements *= spaceDims.at(ii);
  
     // Double check that this fits. This is probably over cautious but fine...
     if (std::size_t(-1) / nRowElements < eleSize)
       throw std::overflow_error("The size of one row would overflow the register!");
  
     return eleSize * nRowElements;
   }

◆ getTree()

std::string H5Utils::getTree ( const std::string & file_name )

Definition at line 36 of file getTree.cxx.

                                                 {
     if (!exists(file_name) && !is_remote(file_name)) {
       throw std::logic_error(file_name + " doesn't exist");
     }
     std::unique_ptr<TFile> file(TFile::Open(file_name.c_str()));
     if (!file || !file->IsOpen() || file->IsZombie()) {
       throw std::logic_error("can't open " + file_name);
     }
     std::set<std::string> keys;
     int n_keys = file->GetListOfKeys()->GetSize();
     if (n_keys == 0) {
       throw std::logic_error("no keys found in file");
     }
     for (int keyn = 0; keyn < n_keys; keyn++) {
       keys.insert(file->GetListOfKeys()->At(keyn)->GetName());
     }
     size_t n_unique = keys.size();
     if (n_unique > 1) {
       std::string prob = "Can't decide which tree to use, choose one of {";
       size_t uniq_n = 0;
       for (const auto& key: keys) {
         prob.append(key);
         uniq_n++;
         if (uniq_n < n_unique) prob.append(", ");
       }
       prob.append("} with the --tree-name option");
       throw std::logic_error(prob);
     }
     auto* key = dynamic_cast<TKey*>(file->GetListOfKeys()->At(0));
     std::string name = key->GetName();
     file->Close();
     return name;
   }

◆ getTreeCopyOpts()

AppOpts H5Utils::getTreeCopyOpts	(	int	argc,
		char *	argv[]
	)

Definition at line 12 of file treeCopyOpts.cxx.

   {
     namespace po = boost::program_options;
     AppOpts app;
     std::string usage = "usage: " + std::string(argv[0]) + " <files>..."
       + " -o <output> [-h] [opts...]\n";
     po::options_description opt(usage + "\nConvert a root tree to HDF5");
     opt.add_options()
       ("in-file",
        po::value(&app.file.in)->required()->multitoken(),
        "input file name")
       ("out-file,o",
        po::value(&app.file.out)->required(),
        "output file name")
       ("tree-name,t",
        po::value(&app.file.tree)->default_value("", "found"),
        "tree to use, use whatever is there by default (or crash if multiple)")
       ("help,h", "Print help messages")
       ("branch-regex,r",
        po::value(&app.tree.branch_regex)->default_value(""),
        "regex to filter branches")
       ("vector-lengths,l",
        po::value(&app.tree.vector_lengths)->multitoken()->value_name("args..."),
        "max size of vectors to write")
       ("verbose,v",
        po::bool_switch(&app.tree.verbose),
        "print branches copied")
       ("n-entries,n",
        po::value(&app.tree.n_entries)->default_value(0, "all")->implicit_value(1),
        "number of entries to copy")
       ("chunk-size,c",
        po::value(&app.tree.chunk_size)->default_value(CHUNK_SIZE),
        "chunk size in HDF5 file")
       ("selection,s",
        po::value(&app.tree.selection)->default_value(""),
        "selection string applied to ntuples")
       ("print-interval,p",
        po::value(&app.tree.print_interval)->default_value(0, "never")->implicit_value(-1, "1%"),
        "print progress")
  
       ;
     po::positional_options_description pos_opts;
     pos_opts.add("in-file", -1);
  
     po::variables_map vm;
     try {
       po::store(po::command_line_parser(argc, argv).options(opt)
                 .positional(pos_opts).run(), vm);
       if ( vm.count("help") ) {
         std::cout << opt << std::endl;
         app.exit_code = 1;
       }
       po::notify(vm);
     } catch (po::error& err) {
       std::cerr << usage << "ERROR: " << err.what() << std::endl;
       app.exit_code = 1;
     }
     return app;
   }

◆ makeWriter()

template<size_t N, class I >

Writer<N,I> H5Utils::makeWriter	(	H5::Group &	group,
		const std::string &	name,
		const Consumers< I > &	consumers,
		const std::array< hsize_t, N > &	extent = `internal::uniform<N>(5)`,
		hsize_t	batch_size = `defaults::batch_size`
	)

makeWriter

Convenience function to make a writer from an existing list of Consumers. Allows you to deduce the input type from consumers.

To be used like

auto writer = H5Utils::makeWriter<2>(group, name, consumers);

Definition at line 534 of file Writer.h.

                                              {
     return Writer<N,I>(group, name, consumers, extent, batch_size);
   }

◆ mergeDatasets()

void H5Utils::mergeDatasets	(	H5::DataSet &	target,
		const H5::DataSet &	source,
		hsize_t	mergeAxis,
		std::size_t	bufferSize = `-1`
	)

Merge two datasets.

Parameters

target	The dataset to merge into
source	The dataset to merge from
mergeAxis	The axis to merged along.
bufferSize	The maximum size of the buffer to use. Take care when setting this, if it is too large then the job may run into memory issues! This size is measured in bytes.

Note that this does nothing to dataset attributes. This function ignores the chunking of the source and target datasets, only splitting up the source dataset along the merge axis.

Definition at line 130 of file MergeUtils.cxx.

   {
     std::string errMsg;
     if (!checkDatasetsToMerge(target, source, mergeAxis, errMsg) )
       throw std::invalid_argument(errMsg);
  
     // Get information about the target and source datasets
     H5::DataSpace targetSpace = target.getSpace();
     H5::DataSpace sourceSpace = source.getSpace();
     int nDims = targetSpace.getSimpleExtentNdims();
  
     // Now make sure that the extent matches
     std::vector<hsize_t> targetDims(nDims, 0);
     targetSpace.getSimpleExtentDims(targetDims.data() );
     std::vector<hsize_t> sourceDims(nDims, 0);
     sourceSpace.getSimpleExtentDims(sourceDims.data() );
  
     // Start by extending the target dataset
     std::vector<hsize_t> newDims = targetDims;
     newDims.at(mergeAxis) += sourceDims.at(mergeAxis);
     target.extend(newDims.data() );
     targetSpace.setExtentSimple(newDims.size(), newDims.data() );
  
     // Now we need to work out how far we need to subdivide the source dataset
     // to fit it inside the buffer. 
     std::size_t rowSize = getRowSize(source, mergeAxis);
     // How many rows can we fit into one buffer
     std::size_t nRowsBuffer = bufferSize / rowSize;
     if (nRowsBuffer == 0) 
       throw std::invalid_argument(
           "Allocated buffer is smaller than a single row! Merging is impossible.");
  
     // We have to allocate an area in memory for the buffer. Unlike normally in
     // C++ we aren't allocating a space for an object but a specific size. This
     // means that we have to use malloc.
     // Smart pointers require some annoying syntax to use with malloc, but we
     // can implement the same pattern with a simple struct.
     SmartMalloc buffer;
  
     // Keep track of the offset from the target dataset
     std::vector<hsize_t> targetOffset(nDims, 0);
     // Start it from its end point before we extended it
     targetOffset.at(mergeAxis) = targetDims.at(mergeAxis);
  
     // Step through the source dataset in increments equal to the number of
     // source rows that can fit into the buffer.
     std::size_t nSourceRows = sourceDims.at(mergeAxis);
     for (std::size_t iRow = 0; iRow < nSourceRows; iRow += nRowsBuffer) {
       // Construct the size and offset of the source slab
       std::vector<hsize_t> sourceOffset(nDims, 0);
       sourceOffset.at(mergeAxis) = iRow;
       // The number of rows to write
       std::size_t nRowsToWrite = std::min(nSourceRows-iRow, nRowsBuffer);
       std::vector<hsize_t> sourceSize(sourceDims);
       sourceSize.at(mergeAxis) = nRowsToWrite;
       // Create the source hyperslab
       sourceSpace.selectNone();
       sourceSpace.selectHyperslab(
           H5S_SELECT_SET,
           sourceSize.data(),
           sourceOffset.data() );
  
       // Create the target hyperslab
       targetSpace.selectNone();
       targetSpace.selectHyperslab(
           H5S_SELECT_SET,
           sourceSize.data(),
           targetOffset.data() );
  
       H5::DataSpace memorySpace(sourceSize.size(), sourceSize.data() );
       memorySpace.selectAll();
  
       // Prepare the buffer
       buffer.allocate(nRowsToWrite*rowSize);
       // Read into it
       source.read(buffer.data, source.getDataType(), memorySpace, sourceSpace);
       // Write from it
       target.write(buffer.data, target.getDataType(), memorySpace, targetSpace);
       // Increment the target offset
       targetOffset.at(mergeAxis) += nRowsToWrite;
     }
     // Sanity check - make sure that the final targetOffset is where we think it
     // should be
     if (targetOffset.at(mergeAxis) != newDims.at(mergeAxis) )
       throw std::logic_error(
           "Target dataset was not filled! This indicates a logic error in the code!");
   }

Variable Documentation

◆ CHUNK_SIZE

const size_t H5Utils::CHUNK_SIZE = 128

Definition at line 15 of file treeCopyOpts.h.

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

◆ CRefConsumer

◆ CRefWriter

◆ SharedConsumer

◆ SimpleWriter

Enumeration Type Documentation

◆ Compression

Function Documentation

◆ checkDatasetsToMerge() [1/2]

◆ checkDatasetsToMerge() [2/2]

◆ copyRootTree()

◆ createDataSet()

◆ getRowSize()

◆ getTree()

◆ getTreeCopyOpts()

◆ makeWriter()

◆ mergeDatasets()

Variable Documentation

◆ CHUNK_SIZE