18 """Return the inner-most singly-jagged ListOffsetArray of ``array``.
20 For a ``var * var * T`` input (e.g. NumTrkPt500), returns a view whose
21 layout is a ``ListOffsetArray`` with ``NumpyArray`` content — i.e. the
22 per-particle list structure collapsed across the full event range.
23 Use ``result.layout.offsets.data`` for cumulative per-particle inner
24 offsets and ``result.layout.content.data`` for the flat numeric buffer.
26 Solution from Peter Fackeldey: traverse the layout with ``ak.transform``
27 and capture the deepest singly-jagged node.
30 layout_depth = layout.purelist_depth
32 is_branched, _ = layout.branch_depth
35 f
"{layout} has branching, cannot extract inner-most ListOffsetArray"
38 def _is_singly_jagged(lay):
40 isinstance(lay, ak.contents.ListOffsetArray)
41 and isinstance(lay.content, ak.contents.NumpyArray)
46 def _capture(lay, depth, **_kwargs):
48 if depth == (layout_depth - 1)
and _is_singly_jagged(lay):
49 found = lay.materialize()
51 ak.transform(_capture, layout, return_value=
"none")
55 "Did not find a singly-jagged ListOffsetArray at the inner-most depth"
57 return ak.Array(found)
66 """Group ColumnInfo objects by container and role.
71 Iterable of ColumnInfo objects as returned by PythonToolHandle.columns.
76 Keyed by container offset name (e.g. "EventInfo", "Muons"). Each value
79 - ``"offset"``: the ColumnInfo for this container's offset column
80 - ``"inputs"``: list of input ColumnInfo belonging to this container
81 - ``"outputs"``: list of output ColumnInfo belonging to this container
82 - ``"nested_offsets"``: dict of name -> ColumnInfo for offset columns
83 that are children of this container (e.g. "Muons.NumTrkPt500.offset")
87 Container offsets have ``is_offset=True`` and an ``offset_name`` of either
88 ``''`` (root, e.g. "EventInfo") or the name of another container offset
89 (e.g. "Muons" has ``offset_name="EventInfo"``). Nested-vector offsets also
90 have ``is_offset=True`` but their name contains a dot; they are stored under
91 ``"nested_offsets"`` of their parent container rather than as top-level keys.
94 offset_cols = {col.name: col
for col
in columns
if col.is_offset}
95 data_cols = [col
for col
in columns
if not col.is_offset]
104 container_offsets = {}
105 nested_offsets_by_container = {}
107 for name, col
in offset_cols.items():
108 parent = col.offset_name
109 if parent ==
"" or parent
in offset_cols:
112 nested_offsets_by_container.setdefault(parent, {})[name] = {
118 container_offsets[name] = col
121 container_offsets[name] = col
129 "nested_offsets": nested_offsets_by_container.get(name, {}),
131 for name, col
in container_offsets.items()
136 nested_to_container = {
137 nested_name: container
138 for container, nested_map
in nested_offsets_by_container.items()
139 for nested_name
in nested_map
143 for col
in data_cols:
144 target = col.offset_name
145 is_output = col.access_mode == ColumnAccessMode.output
147 if target
in classified:
148 bucket = classified[target]
149 elif target
in nested_to_container:
150 container = nested_to_container[target]
151 bucket = classified[container][
"nested_offsets"][target]
157 bucket[
"outputs"].append(col)
159 bucket[
"inputs"].append(col)
197 """Extract flat numpy buffers from an awkward array.
199 Returns a dict mapping column name -> numpy array, covering all container
200 offsets, nested-vector offsets, and input data columns. Output column
201 buffers are not included (allocate_outputs handles those).
206 An ak.Array (real or zero-length after typetracer conversion).
208 Output of classify_columns or resolve_optional_columns.
211 num_events = int(ak.num(events, axis=0))
213 for container_name, info
in classified.items():
214 nested_offsets = info[
"nested_offsets"]
218 for nested_offset_name, nested
in nested_offsets.items():
219 for col
in nested[
"inputs"]:
222 raw_offsets = np.asarray(inner.layout.offsets.data)
223 start = int(raw_offsets[0])
224 end = int(raw_offsets[-1])
227 buffers[nested_offset_name] = np.ascontiguousarray(
228 raw_offsets - start, dtype=np.uint64
230 buffers[col.name] = np.ascontiguousarray(
231 inner.layout.content.data[start:end]
234 flat_inputs = info[
"inputs"]
236 if not flat_inputs
and not nested_offsets:
238 buffers[container_name] = np.array([0, num_events], dtype=np.uint64)
244 any_nested_input = next(
245 (col
for nested
in nested_offsets.values()
for col
in nested[
"inputs"]),
248 if any_nested_input
is not None:
250 buffers[container_name] = np.ascontiguousarray(
251 events[base].layout.offsets.data, dtype=np.uint64
254 buffers[container_name] = np.array([0, num_events], dtype=np.uint64)
259 sorted_cols = sorted(flat_inputs, key=
lambda c: c.offset_name)
261 for offset_name, cols_iter
in itertools.groupby(
262 sorted_cols, key=
lambda c: c.offset_name
264 cols = list(cols_iter)
265 unzipped = {col.name: events[col.name]
for col
in cols}
266 zipped = ak.zip(unzipped)
269 form, length, raw_buffers = ak.to_buffers(
270 zipped, form_key=f
"{offset_name}{{id}}"
273 if isinstance(form, ak.forms.RecordForm):
278 buffers[container_name] = np.array(
279 [0, length], dtype=np.uint64
282 buffers[col.name] = ak.to_numpy(events[col.name])
283 elif isinstance(form, ak.forms.ListOffsetForm):
287 key
for key
in raw_buffers
if key.endswith(
"-offsets")
289 buffers[container_name] = np.asarray(
290 raw_buffers[offset_key]
295 for field
in inner.fields:
296 buffers[field] = np.asarray(
297 raw_buffers[f
"{inner.content(field).form_key}-data"]
301 f
"Cannot handle form {type(form)} for "
302 f
"container {container_name}"
309 """Allocate zero-filled numpy arrays for each output column.
311 Sizes each output array using ``offsets[-1]`` of the referenced offset
312 buffer. Arrays are added into ``buffer_dict`` in-place and also returned.
317 Output of ``classify_columns`` or ``resolve_optional_columns``.
319 Dict of column name -> numpy array, as returned by ``extract_buffers``.
320 Modified in-place to include the newly allocated output arrays.
325 Mapping of output column name -> zero-filled numpy array (same objects
326 also inserted into ``buffer_dict``).
328 nested_offset_names = {
330 for info
in classified.values()
331 for nested_name
in info[
"nested_offsets"]
334 for _container_name, info
in classified.items():
335 for col
in info[
"outputs"]:
336 if col.offset_name
in nested_offset_names:
337 raise NotImplementedError(
338 f
"Nested-vector output columns are not supported "
339 f
"(column '{col.name}' has nested offset '{col.offset_name}')"
341 offset_data = buffer_dict.get(col.offset_name)
342 if offset_data
is None:
344 f
"Cannot find offset buffer '{col.offset_name}' "
345 f
"needed for output column '{col.name}'"
347 raise RuntimeError(msg)
348 size = int(offset_data[-1])
349 arr = np.zeros(size, dtype=col.dtype)
350 output_buffers[col.name] = arr
351 buffer_dict[col.name] = arr
352 return output_buffers
356 """Build an awkward array from output column buffers.
361 Output of ``classify_columns`` or ``resolve_optional_columns``.
363 Dict of column name -> numpy array, containing both offset buffers and
364 the output arrays populated by ``allocate_outputs`` and ``call()``.
366 Number of events (outer axis length of the returned array).
371 Record array with one field per output column, each a variable-length
372 list of per-particle values (i.e. ``var * dtype``).
380 for _container_name, info
in classified.items():
381 for col
in info[
"outputs"]:
382 node_offset = f
"node{2 * node_index}"
383 node_data = f
"node{2 * node_index + 1}"
386 form_fields.append(col.name)
387 form_contents.append(
389 "class":
"ListOffsetArray",
392 "class":
"NumpyArray",
393 "primitive": col.dtype,
394 "form_key": node_data,
396 "form_key": node_offset,
400 out_buffers[f
"{node_data}-data"] = buffer_dict[col.name]
401 out_buffers[f
"{node_offset}-offsets"] = buffer_dict[col.offset_name]
404 "class":
"RecordArray",
405 "fields": form_fields,
406 "contents": form_contents,
410 return ak.from_buffers(form, num_events, out_buffers)