196def extract_buffers(events, classified):
197 """Extract flat numpy buffers from an awkward array.
198
199 Returns a dict mapping column name -> numpy array, covering all container
200 offsets, nested-vector offsets, and input data columns. Output column
201 buffers are not included (allocate_outputs handles those).
202
203 Parameters
204 ----------
205 events:
206 An ak.Array (real or zero-length after typetracer conversion).
207 classified:
208 Output of classify_columns or resolve_optional_columns.
209 """
210 buffers = {}
211 num_events = int(ak.num(events, axis=0))
212
213 for container_name, info in classified.items():
214 nested_offsets = info["nested_offsets"]
215
216
217
218 for nested_offset_name, nested in nested_offsets.items():
219 for col in nested["inputs"]:
220 base = _branch_name_for_column(col.name)
221 inner = _inner_most_list_offset_array(events[base])
222 raw_offsets = np.asarray(inner.layout.offsets.data)
223 start = int(raw_offsets[0])
224 end = int(raw_offsets[-1])
225
226
227 buffers[nested_offset_name] = np.ascontiguousarray(
228 raw_offsets - start, dtype=np.uint64
229 )
230 buffers[col.name] = np.ascontiguousarray(
231 inner.layout.content.data[start:end]
232 )
233
234 flat_inputs = info["inputs"]
235
236 if not flat_inputs and not nested_offsets:
237
238 buffers[container_name] = np.array([0, num_events], dtype=np.uint64)
239 continue
240
241 if not flat_inputs:
242
243
244 any_nested_input = next(
245 (col for nested in nested_offsets.values() for col in nested["inputs"]),
246 None,
247 )
248 if any_nested_input is not None:
249 base = _branch_name_for_column(any_nested_input.name)
250 buffers[container_name] = np.ascontiguousarray(
251 events[base].layout.offsets.data, dtype=np.uint64
252 )
253 else:
254 buffers[container_name] = np.array([0, num_events], dtype=np.uint64)
255 continue
256
257
258
259 sorted_cols = sorted(flat_inputs, key=lambda c: c.offset_name)
260
261 for offset_name, cols_iter in itertools.groupby(
262 sorted_cols, key=lambda c: c.offset_name
263 ):
264 cols = list(cols_iter)
265 unzipped = {col.name: events[col.name] for col in cols}
266 zipped = ak.zip(unzipped)
267
268
269 form, length, raw_buffers = ak.to_buffers(
270 zipped, form_key=f"{offset_name}{{id}}"
271 )
272
273 if isinstance(form, ak.forms.RecordForm):
274
275
276
277
278 buffers[container_name] = np.array(
279 [0, length], dtype=np.uint64
280 )
281 for col in cols:
282 buffers[col.name] = ak.to_numpy(events[col.name])
283 elif isinstance(form, ak.forms.ListOffsetForm):
284
285
286 offset_key = next(
287 key for key in raw_buffers if key.endswith("-offsets")
288 )
289 buffers[container_name] = np.asarray(
290 raw_buffers[offset_key]
291 ).astype(np.uint64)
292
293
294 inner = form.content
295 for field in inner.fields:
296 buffers[field] = np.asarray(
297 raw_buffers[f"{inner.content(field).form_key}-data"]
298 )
299 else:
300 raise RuntimeError(
301 f"Cannot handle form {type(form)} for "
302 f"container {container_name}"
303 )
304
305 return buffers
306
307