18 """Group ColumnInfo objects by container and role.
23 Iterable of ColumnInfo objects as returned by PythonToolHandle.columns.
28 Keyed by container offset name (e.g. "EventInfo", "Muons"). Each value
31 - ``"offset"``: the ColumnInfo for this container's offset column
32 - ``"inputs"``: list of input ColumnInfo belonging to this container
33 - ``"outputs"``: list of output ColumnInfo belonging to this container
34 - ``"nested_offsets"``: dict of name -> ColumnInfo for offset columns
35 that are children of this container (e.g. "Muons.NumTrkPt500.offset")
39 Container offsets have ``is_offset=True`` and an ``offset_name`` of either
40 ``''`` (root, e.g. "EventInfo") or the name of another container offset
41 (e.g. "Muons" has ``offset_name="EventInfo"``). Nested-vector offsets also
42 have ``is_offset=True`` but their name contains a dot; they are stored under
43 ``"nested_offsets"`` of their parent container rather than as top-level keys.
46 offset_cols = {col.name: col
for col
in columns
if col.is_offset}
47 data_cols = [col
for col
in columns
if not col.is_offset]
59 container_offsets = {}
60 nested_offsets_by_container = {}
62 for name, col
in offset_cols.items():
63 parent = col.offset_name
64 if parent ==
"" or parent
in offset_cols:
70 nested_offsets_by_container.setdefault(container, {})[name] = col
72 container_offsets[name] = col
75 container_offsets[name] = col
83 "nested_offsets": nested_offsets_by_container.get(name, {}),
85 for name, col
in container_offsets.items()
90 container = col.offset_name
91 if container
not in classified:
94 if col.access_mode == ColumnAccessMode.output:
95 classified[container][
"outputs"].append(col)
97 classified[container][
"inputs"].append(col)
135 """Extract flat numpy buffers from an awkward array.
137 Returns a dict mapping column name -> numpy array, covering all container
138 offsets, nested-vector offsets, and input data columns. Output column
139 buffers are not included (allocate_outputs handles those).
144 An ak.Array (real or zero-length after typetracer conversion).
146 Output of classify_columns or resolve_optional_columns.
149 num_events = int(ak.num(events, axis=0))
151 for container_name, info
in classified.items():
152 input_cols = info[
"inputs"]
156 buffers[container_name] = np.array(
157 [0, num_events], dtype=np.uint64
163 sorted_cols = sorted(input_cols, key=
lambda c: c.offset_name)
165 for offset_name, cols_iter
in itertools.groupby(
166 sorted_cols, key=
lambda c: c.offset_name
168 cols = list(cols_iter)
169 unzipped = {col.name: events[col.name]
for col
in cols}
170 zipped = ak.zip(unzipped)
173 form, length, raw_buffers = ak.to_buffers(
174 zipped, form_key=f
"{offset_name}{{id}}"
177 if isinstance(form, ak.forms.RecordForm):
182 buffers[container_name] = np.array(
183 [0, length], dtype=np.uint64
186 buffers[col.name] = ak.to_numpy(events[col.name])
187 elif isinstance(form, ak.forms.ListOffsetForm):
191 key
for key
in raw_buffers
if key.endswith(
"-offsets")
193 buffers[container_name] = np.asarray(
194 raw_buffers[offset_key]
199 for field
in inner.fields:
200 buffers[field] = np.asarray(
201 raw_buffers[f
"{inner.content(field).form_key}-data"]
205 f
"Cannot handle form {type(form)} for "
206 f
"container {container_name}"
215 """Allocate zero-filled numpy arrays for each output column.
217 Sizes each output array using ``offsets[-1]`` of the referenced offset
218 buffer. Arrays are added into ``buffer_dict`` in-place and also returned.
223 Output of ``classify_columns`` or ``resolve_optional_columns``.
225 Dict of column name -> numpy array, as returned by ``extract_buffers``.
226 Modified in-place to include the newly allocated output arrays.
231 Mapping of output column name -> zero-filled numpy array (same objects
232 also inserted into ``buffer_dict``).
235 for _container_name, info
in classified.items():
236 for col
in info[
"outputs"]:
237 offset_data = buffer_dict.get(col.offset_name)
238 if offset_data
is None:
240 f
"Cannot find offset buffer '{col.offset_name}' "
241 f
"needed for output column '{col.name}'"
243 raise RuntimeError(msg)
244 size = int(offset_data[-1])
245 arr = np.zeros(size, dtype=col.dtype)
246 output_buffers[col.name] = arr
247 buffer_dict[col.name] = arr
248 return output_buffers
252 """Build an awkward array from output column buffers.
257 Output of ``classify_columns`` or ``resolve_optional_columns``.
259 Dict of column name -> numpy array, containing both offset buffers and
260 the output arrays populated by ``allocate_outputs`` and ``call()``.
262 Number of events (outer axis length of the returned array).
267 Record array with one field per output column, each a variable-length
268 list of per-particle values (i.e. ``var * dtype``).
276 for _container_name, info
in classified.items():
277 for col
in info[
"outputs"]:
278 node_offset = f
"node{2 * node_index}"
279 node_data = f
"node{2 * node_index + 1}"
282 form_fields.append(col.name)
283 form_contents.append(
285 "class":
"ListOffsetArray",
288 "class":
"NumpyArray",
289 "primitive": col.dtype,
290 "form_key": node_data,
292 "form_key": node_offset,
296 out_buffers[f
"{node_data}-data"] = buffer_dict[col.name]
297 out_buffers[f
"{node_offset}-offsets"] = buffer_dict[col.offset_name]
300 "class":
"RecordArray",
301 "fields": form_fields,
302 "contents": form_contents,
306 return ak.from_buffers(form, num_events, out_buffers)