ATLAS Offline Software
Functions
hdf5-merge.cxx File Reference
#include "H5Cpp.h"
#include <HDF5Utils/DefaultMerger.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <iostream>
#include <iomanip>
Include dependency graph for hdf5-merge.cxx:

Go to the source code of this file.

Functions

int main (int argc, char *argv[])
 A simple script to merge HDF5 files. More...
 

Function Documentation

◆ main()

int main ( int  argc,
char *  argv[] 
)

A simple script to merge HDF5 files.

This script is intended to read in a list of HDF5 files and create a new file with all datasets contained inside them concatenated along a particular axis.

Definition at line 22 of file hdf5-merge.cxx.

22  {
23  // The options
24  std::string outputFile = "merged.h5";
25  std::string inCSV = "";
26  std::vector<std::string> inputFiles;
27  hsize_t mergeAxis = 0;
28  int chunkSize = -1;
29  bool requireSameFormat = true;
30  std::size_t bufferSizeMB = 100;
31  std::size_t bufferSizeRows = -1;
32  bool overwrite = false;
33  bool inPlace = false;
34 
35  namespace po = boost::program_options;
36  po::options_description desc("Allowed options");
37  desc.add_options()
38  ("output,o", po::value(&outputFile), "The output file.")
39  ("input,i", po::value(&inCSV), "A comma separated list of input files")
40  ("allowDifferentFormats", po::bool_switch(&requireSameFormat),
41  "Allow input files to have different formats.")
42  ("mergeAxis,a", po::value(&mergeAxis),
43  "The axis along which to merge datasets")
44  ("chunkSize,c", po::value(&chunkSize),
45  "The chunk size to use along the merge axis. If left negative uses the same chunks as the first input.")
46  ("bufferSizeMB,B", po::value(&bufferSizeMB),
47  "The size of the buffer to use in MB. Cannot be set with 'bufferSizeRows'")
48  ("bufferSizeRows,b", po::value(&bufferSizeRows),
49  "The size of the buffer to use in rows. Cannot be set with 'bufferSizeMB'")
50  ("overwrite,w", po::bool_switch(&overwrite),
51  "Overwrite the output file if it already exists. Cannot be set with 'in-place'")
52  ("in-place,p", po::bool_switch(&inPlace),
53  "The output file is modified in place. Cannot be set with 'overwrite'")
54  ("help,h", "Print this message and exit.");
55 
56  po::options_description hidden;
57  hidden.add_options()
58  ("inputFiles", po::value(&inputFiles), "The input files");
59  po::positional_options_description positional;
60  positional.add("inputFiles", -1); //> All positional arguments are input files
61 
62  po::variables_map vm;
63  po::options_description allOptions;
64  allOptions.add(desc).add(hidden);
65 
66  po::store(
67  po::command_line_parser(argc, argv).
68  options(allOptions).
69  positional(positional).
70  run(),
71  vm);
72  // Do help before notify - notify will verify input arguments which we don't
73  // want to do with help
74  if (vm.count("help") ) {
75  std::cout << "Merge HDF5 files. Usage:" << std::endl << std::endl;
76  std::cout << "hdf5-merge [options] [--input input1,input2,... | input1 [input2 ...]]" << std::endl << std::endl;
77  std::cout << desc << std::endl;
78  return 0;
79  }
80  po::notify(vm);
81 
82  if (inCSV.size() > 0) {
83  std::vector<std::string> splitCSV;
84  boost::algorithm::split(splitCSV, inCSV, boost::algorithm::is_any_of(",") );
85  for (const std::string& i : splitCSV)
86  inputFiles.push_back(boost::algorithm::trim_copy(i) );
87  }
88  if (inputFiles.size() == 0) {
89  std::cerr << "You must specify at least 1 input file!" << std::endl;
90  return 1;
91  }
92  if (overwrite && inPlace) {
93  std::cerr << "You cannot specify both overwrite and in-place!" << std::endl;
94  return 1;
95  }
96  if (vm.count("bufferSizeMB") && vm.count("bufferSizeRows") ) {
97  std::cerr << "You cannot specify both bufferSizeMB and bufferSizeRows!" << std::endl;
98  return 1;
99  }
100  std::size_t buffer;
101  bool bufferInRows;
102  if (vm.count("bufferSizeRows") ) {
103  buffer = bufferSizeRows;
104  bufferInRows = true;
105  }
106  else {
107  // Default used if neither was set or if bufferSizeMB is set
108  std::size_t MB = 1024*1024;
109  if (std::size_t(-1) / bufferSizeMB < MB)
110  throw std::overflow_error(
111  "Requested buffer size would overflow the register!");
112  buffer = bufferSizeMB * MB;
113  bufferInRows = false;
114  }
115 
116  // Make the merger
117  H5Utils::DefaultMerger merger(
118  mergeAxis, chunkSize, requireSameFormat, buffer, bufferInRows);
119 
120  // Make the output file
121  H5::H5File fOut(outputFile,
122  overwrite ? H5F_ACC_TRUNC : (inPlace ? H5F_ACC_RDWR : H5F_ACC_EXCL) );
123  // Loop over the input files and merge them
124  for (const std::string& inName : inputFiles) {
125  std::cout << "Merging file " << inName << std::endl;
126  H5::H5File fIn(inName, H5F_ACC_RDONLY);
127  merger.merge(fOut, fIn);
128  }
129 
130 
131  return 0;
132 }
SGTest::store
TestStore store
Definition: TestStore.cxx:23
Epos_Base_Fragment.inputFiles
string inputFiles
Definition: Epos_Base_Fragment.py:18
run
int run(int argc, char *argv[])
Definition: ttree2hdf5.cxx:28
athena.value
value
Definition: athena.py:124
AthExHiveOpts.chunkSize
chunkSize
Definition: AthExHiveOpts.py:101
compareGeometries.outputFile
string outputFile
Definition: compareGeometries.py:25
createCoolChannelIdFile.buffer
buffer
Definition: createCoolChannelIdFile.py:12
CaloCondBlobAlgs_fillNoiseFromASCII.desc
desc
Definition: CaloCondBlobAlgs_fillNoiseFromASCII.py:54
lumiFormat.i
int i
Definition: lumiFormat.py:85
LArCellNtuple.argv
argv
Definition: LArCellNtuple.py:152
makeTOC.fOut
fOut
Definition: makeTOC.py:37
python.AtlRunQueryLib.options
options
Definition: AtlRunQueryLib.py:379
DQHistogramMergeRegExp.argc
argc
Definition: DQHistogramMergeRegExp.py:20
H5Utils::DefaultMerger
Definition: DefaultMerger.h:21
MooRTT_summarizeCPU.fIn
fIn
Definition: MooRTT_summarizeCPU.py:11
Trk::split
@ split
Definition: LayerMaterialProperties.h:38