ATLAS Offline Software
MergeUtils.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
3 */
4 
5 #include "HDF5Utils/MergeUtils.h"
6 
7 #include <vector>
8 #include <stdexcept>
9 
10 namespace {
11  struct SmartMalloc {
12  SmartMalloc() : data(nullptr) {}
13  ~SmartMalloc() { this->freeData(); }
14  operator bool() { return data != nullptr; }
15 
16  void* allocate(std::size_t size);
17  void freeData();
18  void* data;
19  };
20 
21 
22  void* SmartMalloc::allocate(std::size_t size) {
23  // If we are already looking at memory, reallocate it
24  if (data) {
25  void* newData = realloc(data, size);
26  if (!newData)
27  // Note that we don't free 'data' here. That will still be taken care of
28  // by the destructor. This means that a user can catch the exception if
29  // they like and the old memory will still be available.
30  throw std::bad_alloc{};
31  else
32  data = newData;
33  }
34  else {
35  // We aren't looking at memory - use malloc
36  data = malloc(size);
37  if (!data)
38  throw std::bad_alloc{};
39  }
40  return data;
41  }
42 
43  void SmartMalloc::freeData() {
44  // free does nothing to the nullptr so it's safe to call without a check
45  free(data);
46  // Make sure we know that we don't own anything
47  data = nullptr;
48  }
49 
50 }
51 
52 namespace H5Utils {
54  const H5::DataSet& target,
55  const H5::DataSet& source,
56  hsize_t mergeAxis)
57  {
58  std::string sink;
59  return checkDatasetsToMerge(target, source, mergeAxis, sink);
60  }
61 
63  const H5::DataSet& target,
64  const H5::DataSet& source,
65  hsize_t mergeAxis,
66  std::string& errMsg)
67  {
68  // Check that the datasets hold the same types
69  // Note that H5 *can* do type comparisons but this function assumes that we
70  // should only merge the same types
71  if (target.getDataType() != source.getDataType() ) {
72  errMsg = "Target and source datasets hold different types.";
73  return false;
74  }
75 
76  // Get the dataspaces
77  H5::DataSpace targetSpace = target.getSpace();
78  H5::DataSpace sourceSpace = source.getSpace();
79  if (!targetSpace.isSimple() || !sourceSpace.isSimple() ) {
80  errMsg = "Only simple dataspaces are understood.";
81  return false;
82  }
83 
84  // Make sure that the dataspaces have the same dimensions
85  int nDims = targetSpace.getSimpleExtentNdims();
86  if (nDims != sourceSpace.getSimpleExtentNdims() ) {
87  errMsg = "Target and source dataspaces have different dimensions, " +
88  std::to_string(nDims) + " and " +
89  std::to_string(sourceSpace.getSimpleExtentNdims() ) + " respectively";
90  return false;
91  }
92 
93  // Make sure that the merge axis fits in the dimension
94  if (nDims <= static_cast<int>(mergeAxis)) {
95  errMsg = "Dataset dimension " + std::to_string(nDims) +
96  " is not compatible with the merge axis " +
97  std::to_string(mergeAxis);
98  return false;
99  }
100 
101  // Now make sure that the extent matches
102  std::vector<hsize_t> targetDims(nDims, 0);
103  std::vector<hsize_t> maxTargetDims(nDims, 0);
104  targetSpace.getSimpleExtentDims(targetDims.data(), maxTargetDims.data() );
105  std::vector<hsize_t> sourceDims(nDims, 0);
106  sourceSpace.getSimpleExtentDims(sourceDims.data() );
107 
108  for (int ii = 0; ii < nDims; ++ii) {
109  // Skip the merge axis in this check
110  if (ii == static_cast<int>(mergeAxis) )
111  continue;
112  if (targetDims.at(ii) != sourceDims.at(ii) ) {
113  errMsg = "Target and source databases dimensions differ on axis " +
114  std::to_string(ii) + ", " + std::to_string(targetDims.at(ii) ) +
115  " and " + std::to_string(sourceDims.at(ii) ) + " respectively";
116  return false;
117  }
118  }
119 
120  // Check the maximum extent is sufficient
121  if (maxTargetDims.at(mergeAxis) < (
122  targetDims.at(mergeAxis) + sourceDims.at(mergeAxis) ) ) {
123  errMsg = "Merged dataset will not fit into target dataset";
124  return false;
125  }
126 
127  return true;
128  } //> end function checkDatasetsToMerge
129 
131  H5::DataSet& target,
132  const H5::DataSet& source,
133  hsize_t mergeAxis,
134  std::size_t bufferSize)
135  {
136  std::string errMsg;
137  if (!checkDatasetsToMerge(target, source, mergeAxis, errMsg) )
138  throw std::invalid_argument(errMsg);
139 
140  // Get information about the target and source datasets
141  H5::DataSpace targetSpace = target.getSpace();
142  H5::DataSpace sourceSpace = source.getSpace();
143  int nDims = targetSpace.getSimpleExtentNdims();
144 
145  // Now make sure that the extent matches
146  std::vector<hsize_t> targetDims(nDims, 0);
147  targetSpace.getSimpleExtentDims(targetDims.data() );
148  std::vector<hsize_t> sourceDims(nDims, 0);
149  sourceSpace.getSimpleExtentDims(sourceDims.data() );
150 
151  // Start by extending the target dataset
152  std::vector<hsize_t> newDims = targetDims;
153  newDims.at(mergeAxis) += sourceDims.at(mergeAxis);
154  target.extend(newDims.data() );
155  targetSpace.setExtentSimple(newDims.size(), newDims.data() );
156 
157  // Now we need to work out how far we need to subdivide the source dataset
158  // to fit it inside the buffer.
159  std::size_t rowSize = getRowSize(source, mergeAxis);
160  // How many rows can we fit into one buffer
161  std::size_t nRowsBuffer = bufferSize / rowSize;
162  if (nRowsBuffer == 0)
163  throw std::invalid_argument(
164  "Allocated buffer is smaller than a single row! Merging is impossible.");
165 
166  // We have to allocate an area in memory for the buffer. Unlike normally in
167  // C++ we aren't allocating a space for an object but a specific size. This
168  // means that we have to use malloc.
169  // Smart pointers require some annoying syntax to use with malloc, but we
170  // can implement the same pattern with a simple struct.
171  SmartMalloc buffer;
172 
173  // Keep track of the offset from the target dataset
174  std::vector<hsize_t> targetOffset(nDims, 0);
175  // Start it from its end point before we extended it
176  targetOffset.at(mergeAxis) = targetDims.at(mergeAxis);
177 
178  // Step through the source dataset in increments equal to the number of
179  // source rows that can fit into the buffer.
180  std::size_t nSourceRows = sourceDims.at(mergeAxis);
181  for (std::size_t iRow = 0; iRow < nSourceRows; iRow += nRowsBuffer) {
182  // Construct the size and offset of the source slab
183  std::vector<hsize_t> sourceOffset(nDims, 0);
184  sourceOffset.at(mergeAxis) = iRow;
185  // The number of rows to write
186  std::size_t nRowsToWrite = std::min(nSourceRows-iRow, nRowsBuffer);
187  std::vector<hsize_t> sourceSize(sourceDims);
188  sourceSize.at(mergeAxis) = nRowsToWrite;
189  // Create the source hyperslab
190  sourceSpace.selectNone();
191  sourceSpace.selectHyperslab(
192  H5S_SELECT_SET,
193  sourceSize.data(),
194  sourceOffset.data() );
195 
196  // Create the target hyperslab
197  targetSpace.selectNone();
198  targetSpace.selectHyperslab(
199  H5S_SELECT_SET,
200  sourceSize.data(),
201  targetOffset.data() );
202 
203  H5::DataSpace memorySpace(sourceSize.size(), sourceSize.data() );
204  memorySpace.selectAll();
205 
206  // Prepare the buffer
207  buffer.allocate(nRowsToWrite*rowSize);
208  // Read into it
209  source.read(buffer.data, source.getDataType(), memorySpace, sourceSpace);
210  // Write from it
211  target.write(buffer.data, target.getDataType(), memorySpace, targetSpace);
212  // Increment the target offset
213  targetOffset.at(mergeAxis) += nRowsToWrite;
214  }
215  // Sanity check - make sure that the final targetOffset is where we think it
216  // should be
217  if (targetOffset.at(mergeAxis) != newDims.at(mergeAxis) )
218  throw std::logic_error(
219  "Target dataset was not filled! This indicates a logic error in the code!");
220  }
221 
222  H5::DataSet createDataSet(
223  H5::H5Location& targetLocation,
224  const H5::DataSet& source,
225  hsize_t mergeAxis,
226  int chunkSize,
227  int mergeExtent)
228  {
229  H5::DataSpace sourceSpace = source.getSpace();
230  // Get the new extent
231  std::vector<hsize_t> DSExtent(sourceSpace.getSimpleExtentNdims(), 0);
232  sourceSpace.getSimpleExtentDims(DSExtent.data() );
233  // Set the merge axis to be 0 length to begin with
234  DSExtent.at(mergeAxis) = 0;
235  std::vector<hsize_t> maxDSExtent = DSExtent;
236  maxDSExtent.at(mergeAxis) = mergeExtent;
237 
238  // Get the existing dataset creation properties
239  H5::DSetCreatPropList cList = source.getCreatePlist();
240  if (chunkSize > 0) {
241  std::vector<hsize_t> chunks = DSExtent;
242  chunks.at(mergeAxis) = chunkSize;
243  cList.setChunk(chunks.size(), chunks.data() );
244  }
245 
246  // Create the new space
247  H5::DataSpace space(DSExtent.size(), DSExtent.data(), maxDSExtent.data());
248  // This does nothing with the acc property list because I don't know
249  // what it is
250  return targetLocation.createDataSet(
251  source.getObjName(), source.getDataType(), space, cList);
252  }
253 
254  std::size_t getRowSize(const H5::DataSet& ds, hsize_t axis) {
255  // The size of one element
256  std::size_t eleSize = ds.getDataType().getSize();
257 
258  // The dimensions of the space
259  H5::DataSpace space = ds.getSpace();
260  std::vector<hsize_t> spaceDims(space.getSimpleExtentNdims(), 0);
261  space.getSimpleExtentDims(spaceDims.data() );
262 
263  std::size_t nRowElements = 1;
264  for (std::size_t ii = 0; ii < spaceDims.size(); ++ii)
265  if (ii != axis)
266  nRowElements *= spaceDims.at(ii);
267 
268  // Double check that this fits. This is probably over cautious but fine...
269  if (std::size_t(-1) / nRowElements < eleSize)
270  throw std::overflow_error("The size of one row would overflow the register!");
271 
272  return eleSize * nRowElements;
273  }
274 } //> end namespace H5Utils
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
checkxAOD.ds
ds
Definition: Tools/PyUtils/bin/checkxAOD.py:260
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
yodamerge_tmp.axis
list axis
Definition: yodamerge_tmp.py:241
AthExHiveOpts.chunkSize
chunkSize
Definition: AthExHiveOpts.py:101
python.setupRTTAlg.size
int size
Definition: setupRTTAlg.py:39
createCoolChannelIdFile.buffer
buffer
Definition: createCoolChannelIdFile.py:12
TrigInDetValidation_Base.malloc
malloc
Definition: TrigInDetValidation_Base.py:132
H5Utils
HDF5 Tuple Writer.
Definition: common.h:20
CaloRecGPU::CUDA_Helpers::allocate
void * allocate(const size_t num)
Allocates and returns the address of num bytes from GPU memory.
MergeUtils.h
ActsTrk::to_string
std::string to_string(const DetectorType &type)
Definition: GeometryDefs.h:34
H5Utils::getRowSize
std::size_t getRowSize(const H5::DataSet &ds, hsize_t axis)
Calculate the size of a row of a dataset in bytes.
Definition: MergeUtils.cxx:254
H5Utils::mergeDatasets
void mergeDatasets(H5::DataSet &target, const H5::DataSet &source, hsize_t mergeAxis, std::size_t bufferSize=-1)
Merge two datasets.
Definition: MergeUtils.cxx:130
H5Utils::checkDatasetsToMerge
bool checkDatasetsToMerge(const H5::DataSet &target, const H5::DataSet &source, hsize_t mergeAxis)
Make sure that two datasets can be merged.
Definition: MergeUtils.cxx:53
copySelective.target
string target
Definition: copySelective.py:37
H5Utils::createDataSet
H5::DataSet createDataSet(H5::H5Location &targetLocation, const H5::DataSet &source, hsize_t mergeAxis, int chunkSize=-1, int mergeExtent=-1)
Make a new dataset using the properties of another.
Definition: MergeUtils.cxx:222
copySelective.source
string source
Definition: copySelective.py:32
xAOD::bool
setBGCode setTAP setLVL2ErrorBits bool
Definition: TrigDecision_v1.cxx:60