134def extract_buffers(events, classified):
135 """Extract flat numpy buffers from an awkward array.
136
137 Returns a dict mapping column name -> numpy array, covering all container
138 offsets, nested-vector offsets, and input data columns. Output column
139 buffers are not included (allocate_outputs handles those).
140
141 Parameters
142 ----------
143 events:
144 An ak.Array (real or zero-length after typetracer conversion).
145 classified:
146 Output of classify_columns or resolve_optional_columns.
147 """
148 buffers = {}
149 num_events = int(ak.num(events, axis=0))
150
151 for container_name, info in classified.items():
152 input_cols = info["inputs"]
153
154 if not input_cols:
155
156 buffers[container_name] = np.array(
157 [0, num_events], dtype=np.uint64
158 )
159 continue
160
161
162
163 sorted_cols = sorted(input_cols, key=lambda c: c.offset_name)
164
165 for offset_name, cols_iter in itertools.groupby(
166 sorted_cols, key=lambda c: c.offset_name
167 ):
168 cols = list(cols_iter)
169 unzipped = {col.name: events[col.name] for col in cols}
170 zipped = ak.zip(unzipped)
171
172
173 form, length, raw_buffers = ak.to_buffers(
174 zipped, form_key=f"{offset_name}{{id}}"
175 )
176
177 if isinstance(form, ak.forms.RecordForm):
178
179
180
181
182 buffers[container_name] = np.array(
183 [0, length], dtype=np.uint64
184 )
185 for col in cols:
186 buffers[col.name] = ak.to_numpy(events[col.name])
187 elif isinstance(form, ak.forms.ListOffsetForm):
188
189
190 offset_key = next(
191 key for key in raw_buffers if key.endswith("-offsets")
192 )
193 buffers[container_name] = np.asarray(
194 raw_buffers[offset_key]
195 ).astype(np.uint64)
196
197
198 inner = form.content
199 for field in inner.fields:
200 buffers[field] = np.asarray(
201 raw_buffers[f"{inner.content(field).form_key}-data"]
202 )
203 else:
204 raise RuntimeError(
205 f"Cannot handle form {type(form)} for "
206 f"container {container_name}"
207 )
208
209
210
211 return buffers
212
213