This script is intended to read in a list of HDF5 files and create a new file with all datasets contained inside them concatenated along a particular axis.
22 {
23
25 std::string inCSV = "";
27 hsize_t mergeAxis = 0;
28 int chunkSize = -1;
29 bool requireSameFormat = true;
30 std::size_t bufferSizeMB = 100;
31 std::size_t bufferSizeRows = -1;
32 bool overwrite = false;
33 bool inPlace = false;
34
35 namespace po = boost::program_options;
36 po::options_description
desc(
"Allowed options");
38 ("output,o", po::value(&outputFile), "The output file.")
39 ("input,i", po::value(&inCSV), "A comma separated list of input files")
40 ("allowDifferentFormats", po::bool_switch(&requireSameFormat),
41 "Allow input files to have different formats.")
42 ("mergeAxis,a", po::value(&mergeAxis),
43 "The axis along which to merge datasets")
44 ("chunkSize,c", po::value(&chunkSize),
45 "The chunk size to use along the merge axis. If left negative uses the same chunks as the first input.")
46 ("bufferSizeMB,B", po::value(&bufferSizeMB),
47 "The size of the buffer to use in MB. Cannot be set with 'bufferSizeRows'")
48 ("bufferSizeRows,b", po::value(&bufferSizeRows),
49 "The size of the buffer to use in rows. Cannot be set with 'bufferSizeMB'")
50 ("overwrite,w", po::bool_switch(&overwrite),
51 "Overwrite the output file if it already exists. Cannot be set with 'in-place'")
52 ("in-place,p", po::bool_switch(&inPlace),
53 "The output file is modified in place. Cannot be set with 'overwrite'")
54 ("help,h", "Print this message and exit.");
55
56 po::options_description hidden;
57 hidden.add_options()
58 ("inputFiles", po::value(&inputFiles), "The input files");
59 po::positional_options_description positional;
60 positional.add("inputFiles", -1);
61
62 po::variables_map vm;
63 po::options_description allOptions;
64 allOptions.add(desc).add(hidden);
65
66 po::store(
67 po::command_line_parser(argc, argv).
69 positional(positional).
71 vm);
72
73
74 if (vm.count("help") ) {
75 std::cout << "Merge HDF5 files. Usage:" << std::endl << std::endl;
76 std::cout << "hdf5-merge [options] [--input input1,input2,... | input1 [input2 ...]]" << std::endl << std::endl;
77 std::cout <<
desc << std::endl;
78 return 0;
79 }
80 po::notify(vm);
81
82 if (inCSV.size() > 0) {
83 std::vector<std::string> splitCSV;
84 boost::algorithm::split(splitCSV, inCSV, boost::algorithm::is_any_of(",") );
85 for (const std::string& i : splitCSV)
86 inputFiles.push_back(boost::algorithm::trim_copy(i) );
87 }
89 std::cerr << "You must specify at least 1 input file!" << std::endl;
90 return 1;
91 }
92 if (overwrite && inPlace) {
93 std::cerr << "You cannot specify both overwrite and in-place!" << std::endl;
94 return 1;
95 }
96 if (vm.count("bufferSizeMB") && vm.count("bufferSizeRows") ) {
97 std::cerr << "You cannot specify both bufferSizeMB and bufferSizeRows!" << std::endl;
98 return 1;
99 }
101 bool bufferInRows;
102 if (vm.count("bufferSizeRows") ) {
104 bufferInRows = true;
105 }
106 else {
107
108 std::size_t MB = 1024*1024;
109 if (std::size_t(-1) / bufferSizeMB < MB)
110 throw std::overflow_error(
111 "Requested buffer size would overflow the register!");
112 buffer = bufferSizeMB * MB;
113 bufferInRows = false;
114 }
115
116
118 mergeAxis, chunkSize, requireSameFormat, buffer, bufferInRows);
119
120
121 H5::H5File
fOut(outputFile,
122 overwrite ? H5F_ACC_TRUNC : (inPlace ? H5F_ACC_RDWR : H5F_ACC_EXCL) );
123
124 for (const std::string& inName : inputFiles) {
125 std::cout << "Merging file " << inName << std::endl;
126 H5::H5File
fIn(inName, H5F_ACC_RDONLY);
127 merger.merge(fOut, fIn);
128 }
129
130
131 return 0;
132}