ATLAS Offline Software
Loading...
Searching...
No Matches
hdf5-merge.cxx File Reference
#include "H5Cpp.h"
#include <HDF5Utils/DefaultMerger.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <iostream>
#include <iomanip>
Include dependency graph for hdf5-merge.cxx:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 A simple script to merge HDF5 files.

Function Documentation

◆ main()

int main ( int argc,
char * argv[] )

A simple script to merge HDF5 files.

This script is intended to read in a list of HDF5 files and create a new file with all datasets contained inside them concatenated along a particular axis.

Definition at line 22 of file hdf5-merge.cxx.

22 {
23 // The options
24 std::string outputFile = "merged.h5";
25 std::string inCSV = "";
26 std::vector<std::string> inputFiles;
27 hsize_t mergeAxis = 0;
28 int chunkSize = -1;
29 bool requireSameFormat = true;
30 std::size_t bufferSizeMB = 100;
31 std::size_t bufferSizeRows = -1;
32 bool overwrite = false;
33 bool inPlace = false;
34
35 namespace po = boost::program_options;
36 po::options_description desc("Allowed options");
37 desc.add_options()
38 ("output,o", po::value(&outputFile), "The output file.")
39 ("input,i", po::value(&inCSV), "A comma separated list of input files")
40 ("allowDifferentFormats", po::bool_switch(&requireSameFormat),
41 "Allow input files to have different formats.")
42 ("mergeAxis,a", po::value(&mergeAxis),
43 "The axis along which to merge datasets")
44 ("chunkSize,c", po::value(&chunkSize),
45 "The chunk size to use along the merge axis. If left negative uses the same chunks as the first input.")
46 ("bufferSizeMB,B", po::value(&bufferSizeMB),
47 "The size of the buffer to use in MB. Cannot be set with 'bufferSizeRows'")
48 ("bufferSizeRows,b", po::value(&bufferSizeRows),
49 "The size of the buffer to use in rows. Cannot be set with 'bufferSizeMB'")
50 ("overwrite,w", po::bool_switch(&overwrite),
51 "Overwrite the output file if it already exists. Cannot be set with 'in-place'")
52 ("in-place,p", po::bool_switch(&inPlace),
53 "The output file is modified in place. Cannot be set with 'overwrite'")
54 ("help,h", "Print this message and exit.");
55
56 po::options_description hidden;
57 hidden.add_options()
58 ("inputFiles", po::value(&inputFiles), "The input files");
59 po::positional_options_description positional;
60 positional.add("inputFiles", -1); //> All positional arguments are input files
61
62 po::variables_map vm;
63 po::options_description allOptions;
64 allOptions.add(desc).add(hidden);
65
66 po::store(
67 po::command_line_parser(argc, argv).
68 options(allOptions).
69 positional(positional).
70 run(),
71 vm);
72 // Do help before notify - notify will verify input arguments which we don't
73 // want to do with help
74 if (vm.count("help") ) {
75 std::cout << "Merge HDF5 files. Usage:" << std::endl << std::endl;
76 std::cout << "hdf5-merge [options] [--input input1,input2,... | input1 [input2 ...]]" << std::endl << std::endl;
77 std::cout << desc << std::endl;
78 return 0;
79 }
80 po::notify(vm);
81
82 if (inCSV.size() > 0) {
83 std::vector<std::string> splitCSV;
84 boost::algorithm::split(splitCSV, inCSV, boost::algorithm::is_any_of(",") );
85 for (const std::string& i : splitCSV)
86 inputFiles.push_back(boost::algorithm::trim_copy(i) );
87 }
88 if (inputFiles.size() == 0) {
89 std::cerr << "You must specify at least 1 input file!" << std::endl;
90 return 1;
91 }
92 if (overwrite && inPlace) {
93 std::cerr << "You cannot specify both overwrite and in-place!" << std::endl;
94 return 1;
95 }
96 if (vm.count("bufferSizeMB") && vm.count("bufferSizeRows") ) {
97 std::cerr << "You cannot specify both bufferSizeMB and bufferSizeRows!" << std::endl;
98 return 1;
99 }
100 std::size_t buffer;
101 bool bufferInRows;
102 if (vm.count("bufferSizeRows") ) {
103 buffer = bufferSizeRows;
104 bufferInRows = true;
105 }
106 else {
107 // Default used if neither was set or if bufferSizeMB is set
108 std::size_t MB = 1024*1024;
109 if (std::size_t(-1) / bufferSizeMB < MB)
110 throw std::overflow_error(
111 "Requested buffer size would overflow the register!");
112 buffer = bufferSizeMB * MB;
113 bufferInRows = false;
114 }
115
116 // Make the merger
118 mergeAxis, chunkSize, requireSameFormat, buffer, bufferInRows);
119
120 // Make the output file
121 H5::H5File fOut(outputFile,
122 overwrite ? H5F_ACC_TRUNC : (inPlace ? H5F_ACC_RDWR : H5F_ACC_EXCL) );
123 // Loop over the input files and merge them
124 for (const std::string& inName : inputFiles) {
125 std::cout << "Merging file " << inName << std::endl;
126 H5::H5File fIn(inName, H5F_ACC_RDONLY);
127 merger.merge(fOut, fIn);
128 }
129
130
131 return 0;
132}
Definition run.py:1