ATLAS Offline Software
Loading...
Searching...
No Matches
F150KernelTesterAlg.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
9
10#include <iostream>
11#include <fstream> // Required for std::ofstream
12
14{
15 std::string F150KernelTesterAlg::get_cu_name(const std::string& kernel_name, int cu) {
16 std::string full_cu_name = kernel_name + ":{" + kernel_name + "_" + std::to_string(cu) + "}";
17 ATH_MSG_DEBUG("LOADING " + full_cu_name);
18 return full_cu_name;
19 }
20
21 void F150KernelTesterAlg::dumpHexData(std::span<const uint64_t> data, const std::string& dataDescriptor, const EventContext &ctx) const {
22
23 if(!m_outputTextFile) return;
24 auto withEvt = [&](const std::string& fname) {
25 const auto evt = ctx.eventID().event_number(); // get current event number
26 const auto dot = fname.rfind('.');
27 if (dot == std::string::npos) {
28 return fname + "_" + std::to_string(evt);
29 }
30 return fname.substr(0, dot) + "_" + std::to_string(evt) + fname.substr(dot);
31 };
32
33
34 ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
35 std::ofstream outputFile(withEvt(dataDescriptor));
36
37 for (uint64_t d : data) {
38 outputFile << std::hex << std::setw(16) << std::setfill('0') << d << '\n';
39 }
40
41 // Write different data types
42 outputFile.close();
43 }
44
45 void F150KernelTesterAlg::dumpHexData(std::span<const uint32_t> data, const std::string& dataDescriptor, const EventContext &ctx) const {
46
47 if(!m_outputTextFile) return;
48 auto withEvt = [&](const std::string& fname) {
49 const auto evt = ctx.eventID().event_number(); // get current event number
50 const auto dot = fname.rfind('.');
51 if (dot == std::string::npos) {
52 return fname + "_" + std::to_string(evt);
53 }
54 return fname.substr(0, dot) + "_" + std::to_string(evt) + fname.substr(dot);
55 };
56
57
58 ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
59 std::ofstream outputFile(withEvt(dataDescriptor));
60
61 for (uint64_t d : data) {
62 outputFile << std::hex << std::setw(8) << std::setfill('0') << d << '\n';
63 }
64
65 // Write different data types
66 outputFile.close();
67 }
68
69
71 {
72 ATH_MSG_INFO("Running on the FPGA accelerator");
73 ATH_MSG_INFO("Testing Slicing Engine: " + m_runSE);
74 ATH_MSG_INFO("Testing Inside Out: " + m_runIO);
75 ATH_MSG_INFO("Testing Inside Out on Slicing Engine Output: " + m_runIOOnSE);
76
77 ATH_CHECK(m_chronoSvc.retrieve());
78
79 {
80 Athena::Chrono chrono("Platform and device initialize", m_chronoSvc.get());
82 }
83
84 {
85 Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
86 ATH_MSG_INFO("Loading Program: " + m_xclbin);
88 }
89
90 cl_int err = CL_SUCCESS;
91
92 int cu = 1;
93
94
95 // Pixel clustering
96 m_pixelClusteringKernel = cl::Kernel(m_program, get_cu_name(m_pixelClusterKernelName, cu).c_str(), &err);
97
98 // Strip clustering
99 m_stripClusteringKernel = cl::Kernel(m_program, get_cu_name(m_stripClusterKernelName, cu).c_str(), &err);
100
101 // Strip L2G
102 m_stripL2GKernel = cl::Kernel(m_program, get_cu_name(m_stripL2GKernelName, cu).c_str(), &err);
103
104 // EDM prep
105 m_pixelEdmPrepKernel = cl::Kernel(m_program, get_cu_name(m_pixelEdmKernelName, cu).c_str(), &err);
106 m_stripEdmPrepKernel = cl::Kernel(m_program, get_cu_name(m_stripEdmKernelName, cu).c_str(), &err);
107
108 // Slicing
109 m_slicingEngineInput = cl::Kernel(m_program, get_cu_name(m_slicingEngineInputName, cu).c_str(), &err);
110 m_slicingEngineOutput = cl::Kernel(m_program, get_cu_name(m_slicingEngineOutputName, cu).c_str(), &err);
111
112 // inside out
113 m_insideOutInput = cl::Kernel(m_program, get_cu_name(m_insideOutInputName, cu).c_str(), &err);
114 m_insideOutOutput = cl::Kernel(m_program, get_cu_name(m_insideOutOutputName, cu).c_str(), &err);
115
116
117 m_queue = cl::CommandQueue(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
118
119 if (err != CL_SUCCESS) {
120 return StatusCode::FAILURE;
121 }
122
123 ATH_CHECK(m_xaodClusterMaker.retrieve());
124 ATH_CHECK(m_testVectorTool.retrieve());
127
128 // Initialize track sim keys
129 ATH_CHECK(m_FPGAHitKey.initialize());
130 ATH_CHECK(m_FPGASlicedHitKey.initialize());
131 ATH_CHECK(m_FPGATrackKey.initialize());
132
133 ATH_CHECK(m_FPGATrackOutput.initialize());
134
135 // Only needed if we are running the full F150
136 ATH_CHECK(m_FPGAStripRDO.initialize());
137 ATH_CHECK(m_FPGAPixelRDO.initialize());
142
143 return StatusCode::SUCCESS;
144 }
145
146 StatusCode F150KernelTesterAlg::execute(const EventContext &ctx) const
147 {
148 ATH_MSG_DEBUG("Executing F150KernelTesterAlg");
149
150
152 // if not running the full IO, use the simulation to write the output to storegate
153 if(!m_runIOOnSE && !m_runIO && !m_runFull150)
154 {
156 auto outputVec = std::make_unique<std::vector<uint64_t>>();
157
158 ATH_CHECK(m_FPGADataFormatTool->convertFPGATracksToFPGADataFormat(outTrackCollection.cptr(), *outputVec, ctx));
159 // Now record the filled vector
160 ATH_CHECK(FPGATrackOutput.record(std::move(outputVec)));
161 }
162
163 // Prepare the inputs for testing
164 ATH_MSG_DEBUG("Accessing SE In data.");
165 std::vector<uint64_t> pixelDataIN;
166 std::vector<uint64_t> stripDataIN;
168 ATH_CHECK(m_FPGADataFormatTool->convertFPGAHitsToFPGADataFormat(hitCollectionHandle.cptr(), true, false, pixelDataIN, ctx));
169 ATH_CHECK(m_FPGADataFormatTool->convertFPGAHitsToFPGADataFormat(hitCollectionHandle.cptr(), false, true, stripDataIN, ctx));
170
171 int padLength = 8;
172 int inputIOLength = pixelDataIN.size();
173 auto remainder = inputIOLength % padLength;
174 if (remainder != 0) {
175 size_t to_add = padLength - remainder;
176 pixelDataIN.insert(pixelDataIN.end(), to_add, 0); // append zeros
177 }
178
179 dumpHexData(pixelDataIN, "FPGATrackSim_slicingIn_pixel.txt", ctx);
180 dumpHexData(stripDataIN, "FPGATrackSim_slicingIn_strip.txt", ctx);
181
182 ATH_MSG_DEBUG("Accessing SE Out data.");
183 std::vector<uint64_t> dataPixelOut;
184 std::vector<uint64_t> dataStripOut;
186 ATH_CHECK(m_FPGADataFormatTool->convertFPGASliceToFPGADataFormat(outhitCollectionHandle.cptr(), true, false, dataPixelOut, ctx));
187 ATH_CHECK(m_FPGADataFormatTool->convertFPGASliceToFPGADataFormat(outhitCollectionHandle.cptr(), false, true, dataStripOut, ctx));
188 dumpHexData(dataPixelOut, "FPGATrackSim_slicingOut_pixel.txt", ctx);
189 dumpHexData(dataStripOut, "FPGATrackSim_slicingOut_strip.txt", ctx);
190
191 ATH_MSG_DEBUG("Accessing SE Out data.");
192 std::vector<uint64_t> dataInsideOut;
193 ATH_CHECK(m_FPGADataFormatTool->convertFPGATracksToFPGADataFormat(outTrackCollection.cptr(), dataInsideOut, ctx));
194 dumpHexData(dataInsideOut, "FPGATrackSim_insideOut.txt", ctx);
195
196
197 cl_int err = CL_SUCCESS;
198
199 // increment the event if there is data in this event
200 if(pixelDataIN.size() > 6) m_numEvents++;
201
202 // initialize buffers
203 m_pixelClusterInputBuffer = cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
204 m_stripClusterInputBuffer = cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
205
206 // Clustering
207 m_pixelClusterOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
208 m_stripClusterOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
209 m_pixelClusterEDMOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
210 m_stripClusterEDMOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
211
212 // L2G
213 m_stripL2GInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
214 m_stripL2GEDMInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
215
216 m_stripL2GOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
217 m_stripL2GEDMOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
218
219 // EDMPrep
220 m_edmPixelInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
221 m_edmStripInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
222 m_edmPixelOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint32_t), nullptr, &err);
223 m_edmStripOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint32_t), nullptr, &err);
224
225
226 if(m_runFull150) m_slicingEngineInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
227 else m_slicingEngineInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, pixelDataIN.size() * sizeof(uint64_t), nullptr, &err);
228 m_slicingEngineOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
229
230 if(m_runIOOnSE || m_runFull150) m_insideOutInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
231 else m_insideOutInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, dataPixelOut.size() * sizeof(uint64_t), nullptr, &err);
232 m_insideOutOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
233
234
235 if (m_runSE) {
236 // Events (write → kSEInput → kSEOutput → read)
237 cl::Event evtSEWriteIn;
238 cl::Event evtSEKInputDone;
239 cl::Event evtSEKOutputDone;
240 cl::Event evtSEReadOut;
241
242 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
243 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(inputIOLength));
244 ATH_MSG_DEBUG("Setting NWords:" << static_cast<unsigned long long>(inputIOLength)<<" with size: "<<pixelDataIN.size());
245
246 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
247
248 ATH_MSG_DEBUG("Transferring SE data");
249 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelDataIN.size() * sizeof(uint64_t), pixelDataIN.data(), nullptr, &evtSEWriteIn);
250 m_queue.finish();
251
252 // Execute
253 ATH_MSG_DEBUG("Executing SE Kernel");
254 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
255 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
256 m_queue.finish();
257 ATH_MSG_DEBUG("Executing SE output Kernel");
258 m_queue.enqueueTask(m_slicingEngineOutput, nullptr, &evtSEKOutputDone);
259 m_queue.finish();
260
261 // Read
262 ATH_MSG_DEBUG("Reading output data from kernel");
263 std::vector<uint64_t> out_data(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0);
264 std::vector<cl::Event> waitForSERead{evtSEKOutputDone};
265 m_queue.enqueueReadBuffer(m_slicingEngineOutputBuffer, /*blocking*/ CL_FALSE, 0, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t) ,out_data.data(),&waitForSERead, &evtSEReadOut);
266
267 // Optional explicit sync (blocking read already waits)
268 cl::Event::waitForEvents({evtSEReadOut});
269
270 dumpHexData(out_data, "HW_slicingOut_pixel.txt", ctx);
271
272 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
273
274 }
275 if (m_runIO)
276 {
277 cl::Event evtWriteIn;
278 cl::Event evtKInputDone;
279 cl::Event evtKOutputDone;
280 cl::Event evtReadOut;
281
282 ATH_MSG_DEBUG("Setting IO args");
283 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
284 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
285
286 ATH_MSG_DEBUG("Loading input data to IO input kernel");
287 m_queue.enqueueWriteBuffer(m_insideOutInputBuffer, CL_TRUE, 0, dataPixelOut.size() * sizeof(uint64_t), dataPixelOut.data(), nullptr, &evtWriteIn);
288 m_queue.finish();
289 // Execute
290 ATH_MSG_DEBUG("Executing IO Kernel");
291 std::vector<cl::Event> waitAfterWrite{evtWriteIn};
292 m_queue.enqueueTask(m_insideOutInput, &waitAfterWrite, &evtKInputDone);
293 m_queue.enqueueTask(m_insideOutOutput, nullptr, &evtKOutputDone);
294 m_queue.finish();
295
296 // Read
297 ATH_MSG_DEBUG("Reading output data from kernel");
298
299 // output handles
301 ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
302
303 std::vector<cl::Event> waitForRead{evtKOutputDone};
304 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
305
306 // Ensure completion (optional since read is blocking, but explicit is fine)
307 cl::Event::waitForEvents({evtReadOut});
308 dumpHexData((*FPGATrackOutput), "HW_insideOut.txt", ctx);
309
310 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
311 }
312
313 if (m_runIOOnSE)
314 {
315 cl::Event evtSEWriteIn;
316 cl::Event evtSEKInputDone;
317 cl::Event evtSEKOutputDone;
318
319 cl::Event evtBufferTransfer;
320
321
322 cl::Event evtKInputDone;
323 cl::Event evtKOutputDone;
324 cl::Event evtReadOut;
325
326 ATH_MSG_DEBUG("Allocating SE buffers");
327 const size_t pixel_size_bytesIN = pixelDataIN.size() * sizeof(uint64_t);
328
329 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
330 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(pixelDataIN.size()));
331
332 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
333
334 ATH_MSG_DEBUG("Setting IO args");
335 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
336 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
337 m_queue.finish();
338
339 ATH_MSG_DEBUG("Transferring SE data");
340 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixel_size_bytesIN, pixelDataIN.data(), nullptr, &evtSEWriteIn);
341 m_queue.finish();
342 // Execute
343 ATH_MSG_DEBUG("Executing SE Kernel");
344 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
345 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
346 m_queue.enqueueTask(m_slicingEngineOutput, nullptr, &evtSEKOutputDone);
347 m_queue.finish();
348 // Execute
349 ATH_MSG_DEBUG("Executing IO Kernel");
350 std::vector<cl::Event> waitAfterSE{evtSEKOutputDone};
351
352 m_queue.enqueueCopyBuffer(m_slicingEngineOutputBuffer, m_insideOutInputBuffer, 0, 0, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), &waitAfterSE, &evtBufferTransfer);
353
354 std::vector<cl::Event> waitAfterTransfer{evtBufferTransfer};
355 m_queue.enqueueTask(m_insideOutInput, &waitAfterTransfer, &evtKInputDone);
356 m_queue.enqueueTask(m_insideOutOutput, NULL, &evtKOutputDone);
357 m_queue.finish();
358 // Read
359 ATH_MSG_DEBUG("Reading output data from kernel");
360 std::vector<cl::Event> waitForRead{evtKOutputDone};
361
362 // output handles
364 ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
365
366 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
367
368 // Ensure completion (optional since read is blocking, but explicit is fine)
369 cl::Event::waitForEvents({evtReadOut});
370 dumpHexData((*FPGATrackOutput), "HW_insideOut.txt", ctx);
371
372 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
373 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
374 }
375
376 if(m_runFull150)
377 {
378 // === INPUT FETCH ===
379 auto pixelInput = SG::get(m_FPGAPixelRDO, ctx);
380 auto stripInput = SG::get(m_FPGAStripRDO, ctx);
381
382 const int* pixelInputSize{nullptr}, *stripInputSize{nullptr};
383 ATH_CHECK(SG::get(pixelInputSize, m_FPGAPixelRDOSize, ctx));
384 ATH_CHECK(SG::get(stripInputSize, m_FPGAStripRDOSize, ctx));
385
386 // === KERNEL ARG SETUP ===
387 m_pixelClusteringKernel.setArg(0, m_pixelClusterInputBuffer);
388 m_pixelClusteringKernel.setArg(1, m_pixelClusterOutputBuffer);
389 m_pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBuffer);
390
391 m_stripClusteringKernel.setArg(0, m_stripClusterInputBuffer);
392 m_stripClusteringKernel.setArg(1, m_stripClusterOutputBuffer);
393 m_stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBuffer);
394 m_stripClusteringKernel.setArg(3, static_cast<unsigned int>(*stripInputSize));
395
396 m_stripL2GKernel.setArg(0, m_stripL2GInputBuffer);
397 m_stripL2GKernel.setArg(1, m_stripL2GEDMInputBuffer);
398 m_stripL2GKernel.setArg(2, m_stripL2GOutputBuffer);
399 m_stripL2GKernel.setArg(3, m_stripL2GEDMOutputBuffer);
400
401 m_pixelEdmPrepKernel.setArg(0, m_edmPixelInputBuffer);
402 m_pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBuffer);
403 m_stripEdmPrepKernel.setArg(0, m_edmStripInputBuffer);
404 m_stripEdmPrepKernel.setArg(1, m_edmStripOutputBuffer);
405
406 // === HOST->DEVICE INPUT WRITES ===
407 cl::Event evt_pixel_input_write, evt_strip_input_write;
408
409 m_queue.enqueueWriteBuffer(m_pixelClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), nullptr, &evt_pixel_input_write);
410 m_queue.enqueueWriteBuffer(m_stripClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), nullptr, &evt_strip_input_write);
411
412 std::vector<cl::Event> evts_pixel_input_write{evt_pixel_input_write};
413 std::vector<cl::Event> evts_strip_input_write{evt_strip_input_write};
414
415 // === CLUSTERING KERNELS ===
416 cl::Event evt_pixel_clustering_done, evt_strip_clustering_done;
417
418 m_queue.enqueueTask(m_pixelClusteringKernel, &evts_pixel_input_write, &evt_pixel_clustering_done);
419 m_queue.enqueueTask(m_stripClusteringKernel, &evts_strip_input_write, &evt_strip_clustering_done);
420
421 // === PCOPY STRIP CLUSTERS -> L2G INPUTS ===
422 cl::Event evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm;
423 std::vector<cl::Event> evts_strip_clustering_done{evt_strip_clustering_done};
424
425 m_queue.enqueueCopyBuffer(m_stripClusterOutputBuffer, m_stripL2GInputBuffer, 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_strip_clustering_done, &evt_strip_l2g_input_copy_clusters);
426 m_queue.enqueueCopyBuffer(m_stripClusterEDMOutputBuffer, m_stripL2GEDMInputBuffer, 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_strip_clustering_done, &evt_strip_l2g_input_copy_edm);
427
428 std::vector<cl::Event> evts_strip_l2g_input_copies{evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm};
429
430 // === STRIP L2G KERNEL ===
431 cl::Event evt_strip_l2g_done;
432 m_queue.enqueueTask(m_stripL2GKernel, &evts_strip_l2g_input_copies, &evt_strip_l2g_done);
433
434 // === PHASE: COPY EDM INPUTS (PIXEL FROM CLUSTERING, STRIP FROM L2G) ===
435 cl::Event evt_pixel_edm_input_copy, evt_strip_edm_input_copy;
436 std::vector<cl::Event> evts_pixel_clustering_done{evt_pixel_clustering_done};
437 std::vector<cl::Event> evts_strip_l2g_done{evt_strip_l2g_done};
438
439 m_queue.enqueueCopyBuffer(m_pixelClusterEDMOutputBuffer, m_edmPixelInputBuffer, 0, 0, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_pixel_clustering_done, &evt_pixel_edm_input_copy);
440 m_queue.enqueueCopyBuffer(m_stripL2GEDMOutputBuffer, m_edmStripInputBuffer, 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_strip_l2g_done, &evt_strip_edm_input_copy);
441
442 // === PHASE: EDM PREP KERNELS ===
443 cl::Event evt_pixel_edm_prep_done, evt_strip_edm_prep_done;
444 std::vector<cl::Event> evts_pixel_edm_input_copied{evt_pixel_edm_input_copy};
445 std::vector<cl::Event> evts_strip_edm_input_copied{evt_strip_edm_input_copy};
446
447 m_queue.enqueueTask(m_pixelEdmPrepKernel, &evts_pixel_edm_input_copied, &evt_pixel_edm_prep_done);
448 m_queue.enqueueTask(m_stripEdmPrepKernel, &evts_strip_edm_input_copied, &evt_strip_edm_prep_done);
449
450 // === PHASE: EDM READBACKS ===
452 ATH_CHECK(FPGAPixelOutput.record(std::make_unique<std::vector<uint32_t>>(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0)));
453
455 ATH_CHECK(FPGAStripOutput.record(std::make_unique<std::vector<uint32_t>>(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0)));
456
457 cl::Event evt_pixel_edm_read_done, evt_strip_edm_read_done;
458 std::vector<cl::Event> evts_pixel_edm_prep_done{evt_pixel_edm_prep_done};
459 std::vector<cl::Event> evts_strip_edm_prep_done{evt_strip_edm_prep_done};
460
461 m_queue.enqueueReadBuffer(m_edmPixelOutputBuffer, CL_FALSE, 0, sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evts_pixel_edm_prep_done, &evt_pixel_edm_read_done);
462 m_queue.enqueueReadBuffer(m_edmStripOutputBuffer, CL_FALSE, 0, sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evts_strip_edm_prep_done, &evt_strip_edm_read_done);
463
464 cl::Event::waitForEvents(std::vector<cl::Event>{evt_pixel_edm_read_done, evt_strip_edm_read_done});
465
466 // === PHASE: POST-EDM GUARDS ===
467 if (pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0;
468 if (stripInput->size() == 6) (*FPGAStripOutput)[0] = 0;
469
470 m_queue.finish();
471 ATH_MSG_DEBUG("Done F100");
472
473 // === PHASE: PIXEL CLUSTER RAW READBACK FOR SE ===
474 cl::Event evt_pixel_cluster_output_read;
475 std::vector<uint64_t> pixelClusterOut(EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE, 0);
476
477 m_queue.enqueueReadBuffer(m_pixelClusterOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &evts_pixel_clustering_done, &evt_pixel_cluster_output_read);
478 m_queue.finish();
479
480 // === PHASE: FOOTER SCAN & SANITIZE ===
481 int nWords = static_cast<int>(pixelClusterOut.size()) - 1;
482 for (; nWords >= 0; nWords--)
483 {
484 if (pixelClusterOut[nWords] == 0xcd00000000000000) break;
485 }
486 // If footer not found, bail out safely
487 if (nWords < 0)
488 {
489 ATH_MSG_ERROR("Footer 0xcd00000000000000 not found in pixelClusterOut; cannot determine nWords"); return StatusCode::FAILURE;
490 }
491
492 // We have 3 footer words, account for that
493 if (nWords > 0) nWords += 3;
494
495 //clean the next 8 words to account for the input buffer read
496 for (int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size()); i++)
497 {
498 pixelClusterOut[nWords + i] = 0;
499 }
500 ATH_MSG_DEBUG("Got NWords:" << nWords);
501
502 // === KERNEL ARG SETUP ===
503 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
504 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(nWords));
505 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
506
507 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
508 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
509
510 // === PHASE: WRITE BUFFER FOR SE ===
511 cl::Event evt_se_input_write;
512
513 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelClusterOut.size() * sizeof(uint64_t), pixelClusterOut.data(), nullptr, &evt_se_input_write);
514 m_queue.finish();
515
516
517 // === PHASE: SE RUNNING ===
518 cl::Event evt_se_kernel_input_done, evt_se_kernel_output_done;
519 std::vector<cl::Event> evts_after_se_input_write{evt_se_input_write};
520
521 m_queue.enqueueTask(m_slicingEngineInput, &evts_after_se_input_write, &evt_se_kernel_input_done);
522 m_queue.enqueueTask(m_slicingEngineOutput, nullptr, &evt_se_kernel_output_done);
523 m_queue.finish();
524
525 // === PHASE: SE->IO COPY ===
526 cl::Event evt_io_input_transfer;
527 std::vector<cl::Event> evts_after_se_output_done{evt_se_kernel_output_done};
528
529 m_queue.enqueueCopyBuffer(m_slicingEngineOutputBuffer, m_insideOutInputBuffer, 0, 0, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), &evts_after_se_output_done, &evt_io_input_transfer);
530 m_queue.finish();
531
532 // === PHASE: IO KERNELS ===
533 cl::Event evt_io_kernel_input_done, evt_io_kernel_output_done;
534 std::vector<cl::Event> evts_after_io_input_transfer{evt_io_input_transfer};
535
536 m_queue.enqueueTask(m_insideOutInput, &evts_after_io_input_transfer, &evt_io_kernel_input_done);
537 m_queue.enqueueTask(m_insideOutOutput, nullptr, &evt_io_kernel_output_done);
538 m_queue.finish();
539
540 // === PHASE: IO READBACK ===
542 ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t>>(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
543
544 cl::Event evt_io_output_read;
545 std::vector<cl::Event> evts_before_insideout_read{evt_io_kernel_output_done};
546 m_queue.enqueueReadBuffer(m_insideOutOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evts_before_insideout_read, &evt_io_output_read);
547
548 cl::Event::waitForEvents(std::vector<cl::Event>{evt_io_output_read});
549 dumpHexData((*FPGATrackOutput), "HW_insideOut.txt", ctx);
550
551 m_queue.finish();
552
553 // === PHASE: PROFILING ACCUMULATION ===
554 m_SE_kernelTime += evt_se_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_se_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
555 m_IO_kernelTime += evt_io_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_io_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
556
557
558 }
559 return StatusCode::SUCCESS;
560 }
561
563 {
564 ATH_MSG_INFO("Finalizing F150KernelTesterAlg");
565 ATH_MSG_INFO("Number of events: " << m_numEvents);
566
567 if(m_numEvents > 0){
568 ATH_MSG_INFO("Inside out ave time: " << m_IO_kernelTime / m_numEvents / 1e6 << " ms");
569 ATH_MSG_INFO("Slicing Engine ave time: " << m_SE_kernelTime / m_numEvents / 1e6 << " ms");
570 }
571
572 return StatusCode::SUCCESS;
573 }
574}
575
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_DEBUG(x)
Exception-safe IChronoSvc caller.
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
Exception-safe IChronoSvc caller.
Definition Chrono.h:50
Gaudi::Property< bool > m_outputTextFile
Whether to run SE or not.
Gaudi::Property< std::string > m_slicingEngineInputName
Gaudi::Property< std::string > m_pixelEdmKernelName
Name of the FPGA kernel.
Gaudi::Property< bool > m_runIO
Whether to run inside out or not.
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
Tool for formatting FPGA data.
Gaudi::Property< std::string > m_slicingEngineOutputName
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGASlicedHitKey
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGATrackOutput
Gaudi::Property< std::string > m_insideOutOutputName
std::atomic< cl_ulong > m_IO_kernelTime
Time for kernel execution.
Gaudi::Property< std::string > m_stripL2GKernelName
Name of the strip L2G kernelS.
Gaudi::Property< bool > m_runFull150
Whether to run the Full F150 include F100 on hy.
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
ToolHandle< xAODClusterMaker > m_xaodClusterMaker
Tool for creating xAOD containers.
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
std::atomic< cl_ulong > m_SE_kernelTime
Sum for the average time of the kernel execution.
Gaudi::Property< std::string > m_pixelClusterKernelName
Name of the pixel clustering kernel.
Gaudi::Property< std::string > m_insideOutInputName
std::atomic< ulonglong > m_numEvents
Number of events for the average time of the kernel execution.
ToolHandle< TestVectorTool > m_testVectorTool
Tool for preparing test vectors.
SG::ReadHandleKey< FPGATrackSimTrackCollection > m_FPGATrackKey
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGAHitKey
ToolHandle< OutputConversionTool > m_outputConversionTool
Gaudi::Property< bool > m_runIOOnSE
Whether to run inside out on the output of slicing engine.
std::string get_cu_name(const std::string &kernel_name, int cu)
Gaudi::Property< bool > m_runSE
Whether to run SE or not.
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
void dumpHexData(std::span< const uint64_t > data, const std::string &dataDescriptor, const EventContext &ctx) const
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
Gaudi::Property< std::string > m_stripClusterKernelName
Name of the strip clustering kernel.
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
Gaudi::Property< std::string > m_stripEdmKernelName
Name of the FPGA kernel.
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
cl::Program m_program
Program object containing the kernel.
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
cl::Context m_context
Context object for the application.
cl::Device m_accelerator
Device object for the accelerator card.
const_pointer_type cptr()
Dereference the pointer.
StatusCode record(std::unique_ptr< T > data)
Record a const object to the store.
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
The class for enconding RDO to FPGA format.
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long TRACK_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.
Definition dot.py:1