22int main(
int argc,
char* argv[]) {
24 std::string outputFile =
"merged.h5";
25 std::string inCSV =
"";
26 std::vector<std::string> inputFiles;
27 hsize_t mergeAxis = 0;
29 bool requireSameFormat =
true;
30 std::size_t bufferSizeMB = 100;
31 std::size_t bufferSizeRows = -1;
32 bool overwrite =
false;
35 namespace po = boost::program_options;
36 po::options_description desc(
"Allowed options");
38 (
"output,o", po::value(&outputFile),
"The output file.")
39 (
"input,i", po::value(&inCSV),
"A comma separated list of input files")
40 (
"allowDifferentFormats", po::bool_switch(&requireSameFormat),
41 "Allow input files to have different formats.")
42 (
"mergeAxis,a", po::value(&mergeAxis),
43 "The axis along which to merge datasets")
44 (
"chunkSize,c", po::value(&chunkSize),
45 "The chunk size to use along the merge axis. If left negative uses the same chunks as the first input.")
46 (
"bufferSizeMB,B", po::value(&bufferSizeMB),
47 "The size of the buffer to use in MB. Cannot be set with 'bufferSizeRows'")
48 (
"bufferSizeRows,b", po::value(&bufferSizeRows),
49 "The size of the buffer to use in rows. Cannot be set with 'bufferSizeMB'")
50 (
"overwrite,w", po::bool_switch(&overwrite),
51 "Overwrite the output file if it already exists. Cannot be set with 'in-place'")
52 (
"in-place,p", po::bool_switch(&inPlace),
53 "The output file is modified in place. Cannot be set with 'overwrite'")
54 (
"help,h",
"Print this message and exit.");
56 po::options_description hidden;
58 (
"inputFiles", po::value(&inputFiles),
"The input files");
59 po::positional_options_description positional;
60 positional.add(
"inputFiles", -1);
63 po::options_description allOptions;
64 allOptions.add(desc).add(hidden);
67 po::command_line_parser(argc, argv).
69 positional(positional).
74 if (vm.count(
"help") ) {
75 std::cout <<
"Merge HDF5 files. Usage:" << std::endl << std::endl;
76 std::cout <<
"hdf5-merge [options] [--input input1,input2,... | input1 [input2 ...]]" << std::endl << std::endl;
77 std::cout << desc << std::endl;
82 if (inCSV.size() > 0) {
83 std::vector<std::string> splitCSV;
84 boost::algorithm::split(splitCSV, inCSV, boost::algorithm::is_any_of(
",") );
85 for (
const std::string& i : splitCSV)
86 inputFiles.push_back(boost::algorithm::trim_copy(i) );
88 if (inputFiles.size() == 0) {
89 std::cerr <<
"You must specify at least 1 input file!" << std::endl;
92 if (overwrite && inPlace) {
93 std::cerr <<
"You cannot specify both overwrite and in-place!" << std::endl;
96 if (vm.count(
"bufferSizeMB") && vm.count(
"bufferSizeRows") ) {
97 std::cerr <<
"You cannot specify both bufferSizeMB and bufferSizeRows!" << std::endl;
102 if (vm.count(
"bufferSizeRows") ) {
103 buffer = bufferSizeRows;
108 std::size_t MB = 1024*1024;
109 if (std::size_t(-1) / bufferSizeMB < MB)
110 throw std::overflow_error(
111 "Requested buffer size would overflow the register!");
112 buffer = bufferSizeMB * MB;
113 bufferInRows =
false;
118 mergeAxis, chunkSize, requireSameFormat, buffer, bufferInRows);
121 H5::H5File fOut(outputFile,
122 overwrite ? H5F_ACC_TRUNC : (inPlace ? H5F_ACC_RDWR : H5F_ACC_EXCL) );
124 for (
const std::string& inName : inputFiles) {
125 std::cout <<
"Merging file " << inName << std::endl;
126 H5::H5File fIn(inName, H5F_ACC_RDONLY);
127 merger.
merge(fOut, fIn);