ATLAS Offline Software
Loading...
Searching...
No Matches
MergeUtils.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
3*/
4
6
7#include <vector>
8#include <stdexcept>
9
10namespace {
11 struct SmartMalloc {
12 SmartMalloc() : data(nullptr) {}
13 ~SmartMalloc() { this->freeData(); }
14 operator bool() { return data != nullptr; }
15
16 void* allocate(std::size_t size);
17 void freeData();
18 void* data;
19 };
20
21
22 void* SmartMalloc::allocate(std::size_t size) {
23 // If we are already looking at memory, reallocate it
24 if (data) {
25 void* newData = realloc(data, size);
26 if (!newData)
27 // Note that we don't free 'data' here. That will still be taken care of
28 // by the destructor. This means that a user can catch the exception if
29 // they like and the old memory will still be available.
30 throw std::bad_alloc{};
31 else
32 data = newData;
33 }
34 else {
35 // We aren't looking at memory - use malloc
36 data = malloc(size);
37 if (!data)
38 throw std::bad_alloc{};
39 }
40 return data;
41 }
42
43 void SmartMalloc::freeData() {
44 // free does nothing to the nullptr so it's safe to call without a check
45 free(data);
46 // Make sure we know that we don't own anything
47 data = nullptr;
48 }
49
50}
51
52namespace H5Utils {
54 const H5::DataSet& target,
55 const H5::DataSet& source,
56 hsize_t mergeAxis)
57 {
58 std::string sink;
59 return checkDatasetsToMerge(target, source, mergeAxis, sink);
60 }
61
63 const H5::DataSet& target,
64 const H5::DataSet& source,
65 hsize_t mergeAxis,
66 std::string& errMsg)
67 {
68 // Check that the datasets hold the same types
69 // Note that H5 *can* do type comparisons but this function assumes that we
70 // should only merge the same types
71 if (target.getDataType() != source.getDataType() ) {
72 errMsg = "Target and source datasets hold different types.";
73 return false;
74 }
75
76 // Get the dataspaces
77 H5::DataSpace targetSpace = target.getSpace();
78 H5::DataSpace sourceSpace = source.getSpace();
79 if (!targetSpace.isSimple() || !sourceSpace.isSimple() ) {
80 errMsg = "Only simple dataspaces are understood.";
81 return false;
82 }
83
84 // Make sure that the dataspaces have the same dimensions
85 int nDims = targetSpace.getSimpleExtentNdims();
86 if (nDims != sourceSpace.getSimpleExtentNdims() ) {
87 errMsg = "Target and source dataspaces have different dimensions, " +
88 std::to_string(nDims) + " and " +
89 std::to_string(sourceSpace.getSimpleExtentNdims() ) + " respectively";
90 return false;
91 }
92
93 // Make sure that the merge axis fits in the dimension
94 if (nDims <= static_cast<int>(mergeAxis)) {
95 errMsg = "Dataset dimension " + std::to_string(nDims) +
96 " is not compatible with the merge axis " +
97 std::to_string(mergeAxis);
98 return false;
99 }
100
101 // Now make sure that the extent matches
102 std::vector<hsize_t> targetDims(nDims, 0);
103 std::vector<hsize_t> maxTargetDims(nDims, 0);
104 targetSpace.getSimpleExtentDims(targetDims.data(), maxTargetDims.data() );
105 std::vector<hsize_t> sourceDims(nDims, 0);
106 sourceSpace.getSimpleExtentDims(sourceDims.data() );
107
108 for (int ii = 0; ii < nDims; ++ii) {
109 // Skip the merge axis in this check
110 if (ii == static_cast<int>(mergeAxis) )
111 continue;
112 if (targetDims.at(ii) != sourceDims.at(ii) ) {
113 errMsg = "Target and source databases dimensions differ on axis " +
114 std::to_string(ii) + ", " + std::to_string(targetDims.at(ii) ) +
115 " and " + std::to_string(sourceDims.at(ii) ) + " respectively";
116 return false;
117 }
118 }
119
120 // Check the maximum extent is sufficient
121 if (maxTargetDims.at(mergeAxis) < (
122 targetDims.at(mergeAxis) + sourceDims.at(mergeAxis) ) ) {
123 errMsg = "Merged dataset will not fit into target dataset";
124 return false;
125 }
126
127 return true;
128 } //> end function checkDatasetsToMerge
129
131 H5::DataSet& target,
132 const H5::DataSet& source,
133 hsize_t mergeAxis,
134 std::size_t bufferSize)
135 {
136 std::string errMsg;
137 if (!checkDatasetsToMerge(target, source, mergeAxis, errMsg) )
138 throw std::invalid_argument(errMsg);
139
140 // Get information about the target and source datasets
141 H5::DataSpace targetSpace = target.getSpace();
142 H5::DataSpace sourceSpace = source.getSpace();
143 int nDims = targetSpace.getSimpleExtentNdims();
144
145 // Now make sure that the extent matches
146 std::vector<hsize_t> targetDims(nDims, 0);
147 targetSpace.getSimpleExtentDims(targetDims.data() );
148 std::vector<hsize_t> sourceDims(nDims, 0);
149 sourceSpace.getSimpleExtentDims(sourceDims.data() );
150
151 // Start by extending the target dataset
152 std::vector<hsize_t> newDims = targetDims;
153 newDims.at(mergeAxis) += sourceDims.at(mergeAxis);
154 target.extend(newDims.data() );
155 targetSpace.setExtentSimple(newDims.size(), newDims.data() );
156
157 // Now we need to work out how far we need to subdivide the source dataset
158 // to fit it inside the buffer.
159 std::size_t rowSize = getRowSize(source, mergeAxis);
160 // How many rows can we fit into one buffer
161 std::size_t nRowsBuffer = bufferSize / rowSize;
162 if (nRowsBuffer == 0)
163 throw std::invalid_argument(
164 "Allocated buffer is smaller than a single row! Merging is impossible.");
165
166 // We have to allocate an area in memory for the buffer. Unlike normally in
167 // C++ we aren't allocating a space for an object but a specific size. This
168 // means that we have to use malloc.
169 // Smart pointers require some annoying syntax to use with malloc, but we
170 // can implement the same pattern with a simple struct.
171 SmartMalloc buffer;
172
173 // Keep track of the offset from the target dataset
174 std::vector<hsize_t> targetOffset(nDims, 0);
175 // Start it from its end point before we extended it
176 targetOffset.at(mergeAxis) = targetDims.at(mergeAxis);
177
178 // Step through the source dataset in increments equal to the number of
179 // source rows that can fit into the buffer.
180 std::size_t nSourceRows = sourceDims.at(mergeAxis);
181 for (std::size_t iRow = 0; iRow < nSourceRows; iRow += nRowsBuffer) {
182 // Construct the size and offset of the source slab
183 std::vector<hsize_t> sourceOffset(nDims, 0);
184 sourceOffset.at(mergeAxis) = iRow;
185 // The number of rows to write
186 std::size_t nRowsToWrite = std::min(nSourceRows-iRow, nRowsBuffer);
187 std::vector<hsize_t> sourceSize(sourceDims);
188 sourceSize.at(mergeAxis) = nRowsToWrite;
189 // Create the source hyperslab
190 sourceSpace.selectNone();
191 sourceSpace.selectHyperslab(
192 H5S_SELECT_SET,
193 sourceSize.data(),
194 sourceOffset.data() );
195
196 // Create the target hyperslab
197 targetSpace.selectNone();
198 targetSpace.selectHyperslab(
199 H5S_SELECT_SET,
200 sourceSize.data(),
201 targetOffset.data() );
202
203 H5::DataSpace memorySpace(sourceSize.size(), sourceSize.data() );
204 memorySpace.selectAll();
205
206 // Prepare the buffer
207 buffer.allocate(nRowsToWrite*rowSize);
208 // Read into it
209 source.read(buffer.data, source.getDataType(), memorySpace, sourceSpace);
210 // Write from it
211 target.write(buffer.data, target.getDataType(), memorySpace, targetSpace);
212 // Increment the target offset
213 targetOffset.at(mergeAxis) += nRowsToWrite;
214 }
215 // Sanity check - make sure that the final targetOffset is where we think it
216 // should be
217 if (targetOffset.at(mergeAxis) != newDims.at(mergeAxis) )
218 throw std::logic_error(
219 "Target dataset was not filled! This indicates a logic error in the code!");
220 }
221
222 H5::DataSet createDataSet(
223 H5::H5Location& targetLocation,
224 const H5::DataSet& source,
225 hsize_t mergeAxis,
226 int chunkSize,
227 int mergeExtent)
228 {
229 H5::DataSpace sourceSpace = source.getSpace();
230 // Get the new extent
231 std::vector<hsize_t> DSExtent(sourceSpace.getSimpleExtentNdims(), 0);
232 sourceSpace.getSimpleExtentDims(DSExtent.data() );
233 // Set the merge axis to be 0 length to begin with
234 DSExtent.at(mergeAxis) = 0;
235 std::vector<hsize_t> maxDSExtent = DSExtent;
236 maxDSExtent.at(mergeAxis) = mergeExtent;
237
238 // Get the existing dataset creation properties
239 H5::DSetCreatPropList cList = source.getCreatePlist();
240 if (chunkSize > 0) {
241 std::vector<hsize_t> chunks = DSExtent;
242 chunks.at(mergeAxis) = chunkSize;
243 cList.setChunk(chunks.size(), chunks.data() );
244 }
245
246 // Create the new space
247 H5::DataSpace space(DSExtent.size(), DSExtent.data(), maxDSExtent.data());
248 // This does nothing with the acc property list because I don't know
249 // what it is
250 return targetLocation.createDataSet(
251 source.getObjName(), source.getDataType(), space, cList);
252 }
253
254 std::size_t getRowSize(const H5::DataSet& ds, hsize_t axis) {
255 // The size of one element
256 std::size_t eleSize = ds.getDataType().getSize();
257
258 // The dimensions of the space
259 H5::DataSpace space = ds.getSpace();
260 std::vector<hsize_t> spaceDims(space.getSimpleExtentNdims(), 0);
261 space.getSimpleExtentDims(spaceDims.data() );
262
263 std::size_t nRowElements = 1;
264 for (std::size_t ii = 0; ii < spaceDims.size(); ++ii)
265 if (ii != axis)
266 nRowElements *= spaceDims.at(ii);
267
268 // Double check that this fits. This is probably over cautious but fine...
269 if (std::size_t(-1) / nRowElements < eleSize)
270 throw std::overflow_error("The size of one row would overflow the register!");
271
272 return eleSize * nRowElements;
273 }
274} //> end namespace H5Utils
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
void * allocate(const size_t num)
Allocates and returns the address of num bytes from GPU memory.
bool checkDatasetsToMerge(const H5::DataSet &target, const H5::DataSet &source, hsize_t mergeAxis)
Make sure that two datasets can be merged.
std::size_t getRowSize(const H5::DataSet &ds, hsize_t axis)
Calculate the size of a row of a dataset in bytes.
H5::DataSet createDataSet(H5::H5Location &targetLocation, const H5::DataSet &source, hsize_t mergeAxis, int chunkSize=-1, int mergeExtent=-1)
Make a new dataset using the properties of another.
void mergeDatasets(H5::DataSet &target, const H5::DataSet &source, hsize_t mergeAxis, std::size_t bufferSize=-1)
Merge two datasets.
setBGCode setTAP setLVL2ErrorBits bool