ATLAS Offline Software
Loading...
Searching...
No Matches
F150KernelTesterAlg.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
8
9#include <iostream>
10#include <fstream> // Required for std::ofstream
11
13{
14 std::string F150KernelTesterAlg::get_cu_name(const std::string& kernel_name, int cu) {
15 std::string full_cu_name = kernel_name + ":{" + kernel_name + "_" + std::to_string(cu) + "}";
16 ATH_MSG_DEBUG("LOADING " + full_cu_name);
17 return full_cu_name;
18 }
19
20 void F150KernelTesterAlg::dumpHexData(std::span<const uint64_t> data, const std::string& dataDescriptor, const EventContext &ctx) const {
21
22 if(!m_outputTextFile) return;
23 auto withEvt = [&](const std::string& fname) {
24 const auto evt = ctx.eventID().event_number(); // get current event number
25 const auto dot = fname.rfind('.');
26 if (dot == std::string::npos) {
27 return fname + "_" + std::to_string(evt);
28 }
29 return fname.substr(0, dot) + "_" + std::to_string(evt) + fname.substr(dot);
30 };
31
32
33 ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
34 std::ofstream outputFile(withEvt(dataDescriptor));
35
36 for (uint64_t d : data) {
37 outputFile << std::hex << std::setw(16) << std::setfill('0') << d << '\n';
38 }
39
40 // Write different data types
41 outputFile.close();
42 }
43
44 void F150KernelTesterAlg::dumpHexData(std::span<const uint32_t> data, const std::string& dataDescriptor, const EventContext &ctx) const {
45
46 if(!m_outputTextFile) return;
47 auto withEvt = [&](const std::string& fname) {
48 const auto evt = ctx.eventID().event_number(); // get current event number
49 const auto dot = fname.rfind('.');
50 if (dot == std::string::npos) {
51 return fname + "_" + std::to_string(evt);
52 }
53 return fname.substr(0, dot) + "_" + std::to_string(evt) + fname.substr(dot);
54 };
55
56
57 ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
58 std::ofstream outputFile(withEvt(dataDescriptor));
59
60 for (uint64_t d : data) {
61 outputFile << std::hex << std::setw(8) << std::setfill('0') << d << '\n';
62 }
63
64 // Write different data types
65 outputFile.close();
66 }
67
68
70 {
71 ATH_MSG_INFO("Running on the FPGA accelerator");
72 ATH_MSG_INFO("Testing Slicing Engine: " + m_runSE);
73 ATH_MSG_INFO("Testing Inside Out: " + m_runIO);
74 ATH_MSG_INFO("Testing Inside Out on Slicing Engine Output: " + m_runIOOnSE);
75
76 ATH_CHECK(m_chronoSvc.retrieve());
77
78 {
79 Athena::Chrono chrono("Platform and device initialize", m_chronoSvc.get());
81 }
82
83 {
84 Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
85 ATH_MSG_INFO("Loading Program: " + m_xclbin);
87 }
88
89 cl_int err = CL_SUCCESS;
90
91 int cu = 1;
92
93
94 // Pixel clustering
95 m_pixelClusteringKernel = cl::Kernel(m_program, get_cu_name(m_pixelClusterKernelName, cu).c_str(), &err);
96
97 // Strip clustering
98 m_stripClusteringKernel = cl::Kernel(m_program, get_cu_name(m_stripClusterKernelName, cu).c_str(), &err);
99
100 // Strip L2G
101 m_stripL2GKernel = cl::Kernel(m_program, get_cu_name(m_stripL2GKernelName, cu).c_str(), &err);
102
103 // EDM prep
104 m_pixelEdmPrepKernel = cl::Kernel(m_program, get_cu_name(m_pixelEdmKernelName, cu).c_str(), &err);
105 m_stripEdmPrepKernel = cl::Kernel(m_program, get_cu_name(m_stripEdmKernelName, cu).c_str(), &err);
106
107 // Slicing
108 m_slicingEngineInput = cl::Kernel(m_program, get_cu_name(m_slicingEngineInputName, cu).c_str(), &err);
109 m_slicingEngineOutput = cl::Kernel(m_program, get_cu_name(m_slicingEngineOutputName, cu).c_str(), &err);
110
111 // inside out
112 m_insideOutInput = cl::Kernel(m_program, get_cu_name(m_insideOutInputName, cu).c_str(), &err);
113 m_insideOutOutput = cl::Kernel(m_program, get_cu_name(m_insideOutOutputName, cu).c_str(), &err);
114
115
116 m_queue = cl::CommandQueue(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
117
118 if (err != CL_SUCCESS) {
119 return StatusCode::FAILURE;
120 }
121
122 ATH_CHECK(m_xaodClusterMaker.retrieve());
123 ATH_CHECK(m_testVectorTool.retrieve());
126
127 // Initialize track sim keys
128 ATH_CHECK(m_FPGAHitKey.initialize());
129 ATH_CHECK(m_FPGASlicedHitKey.initialize());
130 ATH_CHECK(m_FPGATrackKey.initialize());
131
132 ATH_CHECK(m_FPGATrackOutput.initialize());
133
134 // Only needed if we are running the full F150
135 ATH_CHECK(m_FPGAStripRDO.initialize());
136 ATH_CHECK(m_FPGAPixelRDO.initialize());
141
142 return StatusCode::SUCCESS;
143 }
144
145 StatusCode F150KernelTesterAlg::execute(const EventContext &ctx) const
146 {
147 ATH_MSG_DEBUG("Executing F150KernelTesterAlg");
148
149
151 // if not running the full IO, use the simulation to write the output to storegate
152 if(!m_runIOOnSE && !m_runIO && !m_runFull150)
153 {
155 auto outputVec = std::make_unique<std::vector<uint64_t>>();
156
157 ATH_CHECK(m_FPGADataFormatTool->convertFPGATracksToFPGADataFormat(outTrackCollection.cptr(), *outputVec, ctx));
158 // Now record the filled vector
159 ATH_CHECK(FPGATrackOutput.record(std::move(outputVec)));
160 }
161
162 // Prepare the inputs for testing
163 ATH_MSG_DEBUG("Accessing SE In data.");
164 std::vector<uint64_t> pixelDataIN;
165 std::vector<uint64_t> stripDataIN;
167 ATH_CHECK(m_FPGADataFormatTool->convertFPGAHitsToFPGADataFormat(hitCollectionHandle.cptr(), true, false, pixelDataIN, ctx));
168 ATH_CHECK(m_FPGADataFormatTool->convertFPGAHitsToFPGADataFormat(hitCollectionHandle.cptr(), false, true, stripDataIN, ctx));
169
170 int padLength = 8;
171 int inputIOLength = pixelDataIN.size();
172 auto remainder = inputIOLength % padLength;
173 if (remainder != 0) {
174 size_t to_add = padLength - remainder;
175 pixelDataIN.insert(pixelDataIN.end(), to_add, 0); // append zeros
176 }
177
178 dumpHexData(pixelDataIN, "FPGATrackSim_slicingIn_pixel.txt", ctx);
179 dumpHexData(stripDataIN, "FPGATrackSim_slicingIn_strip.txt", ctx);
180
181 ATH_MSG_DEBUG("Accessing SE Out data.");
182 std::vector<uint64_t> dataPixelOut;
183 std::vector<uint64_t> dataStripOut;
185 ATH_CHECK(m_FPGADataFormatTool->convertFPGASliceToFPGADataFormat(outhitCollectionHandle.cptr(), true, false, dataPixelOut, ctx));
186 ATH_CHECK(m_FPGADataFormatTool->convertFPGASliceToFPGADataFormat(outhitCollectionHandle.cptr(), false, true, dataStripOut, ctx));
187 dumpHexData(dataPixelOut, "FPGATrackSim_slicingOut_pixel.txt", ctx);
188 dumpHexData(dataStripOut, "FPGATrackSim_slicingOut_strip.txt", ctx);
189
190 ATH_MSG_DEBUG("Accessing SE Out data.");
191 std::vector<uint64_t> dataInsideOut;
192 ATH_CHECK(m_FPGADataFormatTool->convertFPGATracksToFPGADataFormat(outTrackCollection.cptr(), dataInsideOut, ctx));
193 dumpHexData(dataInsideOut, "FPGATrackSim_insideOut.txt", ctx);
194
195
196 cl_int err = CL_SUCCESS;
197
198 // increment the event if there is data in this event
199 if(pixelDataIN.size() > 6) m_numEvents++;
200
201 // initialize buffers
202 m_pixelClusterInputBuffer = cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
203 m_stripClusterInputBuffer = cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
204
205 // Clustering
206 m_pixelClusterOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
207 m_stripClusterOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
208 m_pixelClusterEDMOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
209 m_stripClusterEDMOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
210
211 // L2G
212 m_stripL2GInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
213 m_stripL2GEDMInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
214
215 m_stripL2GOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
216 m_stripL2GEDMOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
217
218 // EDMPrep
219 m_edmPixelInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
220 m_edmStripInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
221 m_edmPixelOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint32_t), nullptr, &err);
222 m_edmStripOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint32_t), nullptr, &err);
223
224
225 if(m_runFull150) m_slicingEngineInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
226 else m_slicingEngineInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, pixelDataIN.size() * sizeof(uint64_t), nullptr, &err);
227 m_slicingEngineOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
228
229 if(m_runIOOnSE || m_runFull150) m_insideOutInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
230 else m_insideOutInputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, dataPixelOut.size() * sizeof(uint64_t), nullptr, &err);
231 m_insideOutOutputBuffer = cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err);
232
233
234 if (m_runSE) {
235 // Events (write → kSEInput → kSEOutput → read)
236 cl::Event evtSEWriteIn;
237 cl::Event evtSEKInputDone;
238 cl::Event evtSEKOutputDone;
239 cl::Event evtSEReadOut;
240
241 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
242 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(inputIOLength));
243 ATH_MSG_DEBUG("Setting NWords:" << static_cast<unsigned long long>(inputIOLength)<<" with size: "<<pixelDataIN.size());
244
245 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
246
247 ATH_MSG_DEBUG("Transferring SE data");
248 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelDataIN.size() * sizeof(uint64_t), pixelDataIN.data(), nullptr, &evtSEWriteIn);
249 m_queue.finish();
250
251 // Execute
252 ATH_MSG_DEBUG("Executing SE Kernel");
253 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
254 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
255 m_queue.finish();
256 ATH_MSG_DEBUG("Executing SE output Kernel");
257 m_queue.enqueueTask(m_slicingEngineOutput, nullptr, &evtSEKOutputDone);
258 m_queue.finish();
259
260 // Read
261 ATH_MSG_DEBUG("Reading output data from kernel");
262 std::vector<uint64_t> out_data(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0);
263 std::vector<cl::Event> waitForSERead{evtSEKOutputDone};
264 m_queue.enqueueReadBuffer(m_slicingEngineOutputBuffer, /*blocking*/ CL_FALSE, 0, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t) ,out_data.data(),&waitForSERead, &evtSEReadOut);
265
266 // Optional explicit sync (blocking read already waits)
267 cl::Event::waitForEvents({evtSEReadOut});
268
269 dumpHexData(out_data, "HW_slicingOut_pixel.txt", ctx);
270
271 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
272
273 }
274 if (m_runIO)
275 {
276 cl::Event evtWriteIn;
277 cl::Event evtKInputDone;
278 cl::Event evtKOutputDone;
279 cl::Event evtReadOut;
280
281 ATH_MSG_DEBUG("Setting IO args");
282 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
283 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
284
285 ATH_MSG_DEBUG("Loading input data to IO input kernel");
286 m_queue.enqueueWriteBuffer(m_insideOutInputBuffer, CL_TRUE, 0, dataPixelOut.size() * sizeof(uint64_t), dataPixelOut.data(), nullptr, &evtWriteIn);
287 m_queue.finish();
288 // Execute
289 ATH_MSG_DEBUG("Executing IO Kernel");
290 std::vector<cl::Event> waitAfterWrite{evtWriteIn};
291 m_queue.enqueueTask(m_insideOutInput, &waitAfterWrite, &evtKInputDone);
292 m_queue.enqueueTask(m_insideOutOutput, nullptr, &evtKOutputDone);
293 m_queue.finish();
294
295 // Read
296 ATH_MSG_DEBUG("Reading output data from kernel");
297
298 // output handles
300 ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
301
302 std::vector<cl::Event> waitForRead{evtKOutputDone};
303 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
304
305 // Ensure completion (optional since read is blocking, but explicit is fine)
306 cl::Event::waitForEvents({evtReadOut});
307 dumpHexData((*FPGATrackOutput), "HW_insideOut.txt", ctx);
308
309 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
310 }
311
312 if (m_runIOOnSE)
313 {
314 cl::Event evtSEWriteIn;
315 cl::Event evtSEKInputDone;
316 cl::Event evtSEKOutputDone;
317
318 cl::Event evtBufferTransfer;
319
320
321 cl::Event evtKInputDone;
322 cl::Event evtKOutputDone;
323 cl::Event evtReadOut;
324
325 ATH_MSG_DEBUG("Allocating SE buffers");
326 const size_t pixel_size_bytesIN = pixelDataIN.size() * sizeof(uint64_t);
327
328 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
329 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(pixelDataIN.size()));
330
331 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
332
333 ATH_MSG_DEBUG("Setting IO args");
334 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
335 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
336 m_queue.finish();
337
338 ATH_MSG_DEBUG("Transferring SE data");
339 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixel_size_bytesIN, pixelDataIN.data(), nullptr, &evtSEWriteIn);
340 m_queue.finish();
341 // Execute
342 ATH_MSG_DEBUG("Executing SE Kernel");
343 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
344 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
345 m_queue.enqueueTask(m_slicingEngineOutput, nullptr, &evtSEKOutputDone);
346 m_queue.finish();
347 // Execute
348 ATH_MSG_DEBUG("Executing IO Kernel");
349 std::vector<cl::Event> waitAfterSE{evtSEKOutputDone};
350
351 m_queue.enqueueCopyBuffer(m_slicingEngineOutputBuffer, m_insideOutInputBuffer, 0, 0, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), &waitAfterSE, &evtBufferTransfer);
352
353 std::vector<cl::Event> waitAfterTransfer{evtBufferTransfer};
354 m_queue.enqueueTask(m_insideOutInput, &waitAfterTransfer, &evtKInputDone);
355 m_queue.enqueueTask(m_insideOutOutput, NULL, &evtKOutputDone);
356 m_queue.finish();
357 // Read
358 ATH_MSG_DEBUG("Reading output data from kernel");
359 std::vector<cl::Event> waitForRead{evtKOutputDone};
360
361 // output handles
363 ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
364
365 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
366
367 // Ensure completion (optional since read is blocking, but explicit is fine)
368 cl::Event::waitForEvents({evtReadOut});
369 dumpHexData((*FPGATrackOutput), "HW_insideOut.txt", ctx);
370
371 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
372 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
373 }
374
375 if(m_runFull150)
376 {
377 // === INPUT FETCH ===
378 auto pixelInput = SG::get(m_FPGAPixelRDO, ctx);
379 auto stripInput = SG::get(m_FPGAStripRDO, ctx);
380
381 const int* pixelInputSize{nullptr}, *stripInputSize{nullptr};
382 ATH_CHECK(SG::get(pixelInputSize, m_FPGAPixelRDOSize, ctx));
383 ATH_CHECK(SG::get(stripInputSize, m_FPGAStripRDOSize, ctx));
384
385 // === KERNEL ARG SETUP ===
386 m_pixelClusteringKernel.setArg(0, m_pixelClusterInputBuffer);
387 m_pixelClusteringKernel.setArg(1, m_pixelClusterOutputBuffer);
388 m_pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBuffer);
389
390 m_stripClusteringKernel.setArg(0, m_stripClusterInputBuffer);
391 m_stripClusteringKernel.setArg(1, m_stripClusterOutputBuffer);
392 m_stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBuffer);
393 m_stripClusteringKernel.setArg(3, static_cast<unsigned int>(*stripInputSize));
394
395 m_stripL2GKernel.setArg(0, m_stripL2GInputBuffer);
396 m_stripL2GKernel.setArg(1, m_stripL2GEDMInputBuffer);
397 m_stripL2GKernel.setArg(2, m_stripL2GOutputBuffer);
398 m_stripL2GKernel.setArg(3, m_stripL2GEDMOutputBuffer);
399
400 m_pixelEdmPrepKernel.setArg(0, m_edmPixelInputBuffer);
401 m_pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBuffer);
402 m_stripEdmPrepKernel.setArg(0, m_edmStripInputBuffer);
403 m_stripEdmPrepKernel.setArg(1, m_edmStripOutputBuffer);
404
405 // === HOST->DEVICE INPUT WRITES ===
406 cl::Event evt_pixel_input_write, evt_strip_input_write;
407
408 m_queue.enqueueWriteBuffer(m_pixelClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), nullptr, &evt_pixel_input_write);
409 m_queue.enqueueWriteBuffer(m_stripClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), nullptr, &evt_strip_input_write);
410
411 std::vector<cl::Event> evts_pixel_input_write{evt_pixel_input_write};
412 std::vector<cl::Event> evts_strip_input_write{evt_strip_input_write};
413
414 // === CLUSTERING KERNELS ===
415 cl::Event evt_pixel_clustering_done, evt_strip_clustering_done;
416
417 m_queue.enqueueTask(m_pixelClusteringKernel, &evts_pixel_input_write, &evt_pixel_clustering_done);
418 m_queue.enqueueTask(m_stripClusteringKernel, &evts_strip_input_write, &evt_strip_clustering_done);
419
420 // === PCOPY STRIP CLUSTERS -> L2G INPUTS ===
421 cl::Event evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm;
422 std::vector<cl::Event> evts_strip_clustering_done{evt_strip_clustering_done};
423
424 m_queue.enqueueCopyBuffer(m_stripClusterOutputBuffer, m_stripL2GInputBuffer, 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_strip_clustering_done, &evt_strip_l2g_input_copy_clusters);
425 m_queue.enqueueCopyBuffer(m_stripClusterEDMOutputBuffer, m_stripL2GEDMInputBuffer, 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_strip_clustering_done, &evt_strip_l2g_input_copy_edm);
426
427 std::vector<cl::Event> evts_strip_l2g_input_copies{evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm};
428
429 // === STRIP L2G KERNEL ===
430 cl::Event evt_strip_l2g_done;
431 m_queue.enqueueTask(m_stripL2GKernel, &evts_strip_l2g_input_copies, &evt_strip_l2g_done);
432
433 // === PHASE: COPY EDM INPUTS (PIXEL FROM CLUSTERING, STRIP FROM L2G) ===
434 cl::Event evt_pixel_edm_input_copy, evt_strip_edm_input_copy;
435 std::vector<cl::Event> evts_pixel_clustering_done{evt_pixel_clustering_done};
436 std::vector<cl::Event> evts_strip_l2g_done{evt_strip_l2g_done};
437
438 m_queue.enqueueCopyBuffer(m_pixelClusterEDMOutputBuffer, m_edmPixelInputBuffer, 0, 0, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_pixel_clustering_done, &evt_pixel_edm_input_copy);
439 m_queue.enqueueCopyBuffer(m_stripL2GEDMOutputBuffer, m_edmStripInputBuffer, 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &evts_strip_l2g_done, &evt_strip_edm_input_copy);
440
441 // === PHASE: EDM PREP KERNELS ===
442 cl::Event evt_pixel_edm_prep_done, evt_strip_edm_prep_done;
443 std::vector<cl::Event> evts_pixel_edm_input_copied{evt_pixel_edm_input_copy};
444 std::vector<cl::Event> evts_strip_edm_input_copied{evt_strip_edm_input_copy};
445
446 m_queue.enqueueTask(m_pixelEdmPrepKernel, &evts_pixel_edm_input_copied, &evt_pixel_edm_prep_done);
447 m_queue.enqueueTask(m_stripEdmPrepKernel, &evts_strip_edm_input_copied, &evt_strip_edm_prep_done);
448
449 // === PHASE: EDM READBACKS ===
451 ATH_CHECK(FPGAPixelOutput.record(std::make_unique<std::vector<uint32_t>>(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0)));
452
454 ATH_CHECK(FPGAStripOutput.record(std::make_unique<std::vector<uint32_t>>(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0)));
455
456 cl::Event evt_pixel_edm_read_done, evt_strip_edm_read_done;
457 std::vector<cl::Event> evts_pixel_edm_prep_done{evt_pixel_edm_prep_done};
458 std::vector<cl::Event> evts_strip_edm_prep_done{evt_strip_edm_prep_done};
459
460 m_queue.enqueueReadBuffer(m_edmPixelOutputBuffer, CL_FALSE, 0, sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evts_pixel_edm_prep_done, &evt_pixel_edm_read_done);
461 m_queue.enqueueReadBuffer(m_edmStripOutputBuffer, CL_FALSE, 0, sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evts_strip_edm_prep_done, &evt_strip_edm_read_done);
462
463 cl::Event::waitForEvents(std::vector<cl::Event>{evt_pixel_edm_read_done, evt_strip_edm_read_done});
464
465 // === PHASE: POST-EDM GUARDS ===
466 if (pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0;
467 if (stripInput->size() == 6) (*FPGAStripOutput)[0] = 0;
468
469 m_queue.finish();
470 ATH_MSG_DEBUG("Done F100");
471
472 // === PHASE: PIXEL CLUSTER RAW READBACK FOR SE ===
473 cl::Event evt_pixel_cluster_output_read;
474 std::vector<uint64_t> pixelClusterOut(EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE, 0);
475
476 m_queue.enqueueReadBuffer(m_pixelClusterOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &evts_pixel_clustering_done, &evt_pixel_cluster_output_read);
477 m_queue.finish();
478
479 // === PHASE: FOOTER SCAN & SANITIZE ===
480 int nWords = static_cast<int>(pixelClusterOut.size()) - 1;
481 for (; nWords >= 0; nWords--)
482 {
483 if (pixelClusterOut[nWords] == 0xcd00000000000000) break;
484 }
485 // If footer not found, bail out safely
486 if (nWords < 0)
487 {
488 ATH_MSG_ERROR("Footer 0xcd00000000000000 not found in pixelClusterOut; cannot determine nWords"); return StatusCode::FAILURE;
489 }
490
491 // We have 3 footer words, account for that
492 if (nWords > 0) nWords += 3;
493
494 //clean the next 8 words to account for the input buffer read
495 for (int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size()); i++)
496 {
497 pixelClusterOut[nWords + i] = 0;
498 }
499 ATH_MSG_DEBUG("Got NWords:" << nWords);
500
501 // === KERNEL ARG SETUP ===
502 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
503 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(nWords));
504 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
505
506 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
507 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
508
509 // === PHASE: WRITE BUFFER FOR SE ===
510 cl::Event evt_se_input_write;
511
512 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelClusterOut.size() * sizeof(uint64_t), pixelClusterOut.data(), nullptr, &evt_se_input_write);
513 m_queue.finish();
514
515
516 // === PHASE: SE RUNNING ===
517 cl::Event evt_se_kernel_input_done, evt_se_kernel_output_done;
518 std::vector<cl::Event> evts_after_se_input_write{evt_se_input_write};
519
520 m_queue.enqueueTask(m_slicingEngineInput, &evts_after_se_input_write, &evt_se_kernel_input_done);
521 m_queue.enqueueTask(m_slicingEngineOutput, nullptr, &evt_se_kernel_output_done);
522 m_queue.finish();
523
524 // === PHASE: SE->IO COPY ===
525 cl::Event evt_io_input_transfer;
526 std::vector<cl::Event> evts_after_se_output_done{evt_se_kernel_output_done};
527
528 m_queue.enqueueCopyBuffer(m_slicingEngineOutputBuffer, m_insideOutInputBuffer, 0, 0, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), &evts_after_se_output_done, &evt_io_input_transfer);
529 m_queue.finish();
530
531 // === PHASE: IO KERNELS ===
532 cl::Event evt_io_kernel_input_done, evt_io_kernel_output_done;
533 std::vector<cl::Event> evts_after_io_input_transfer{evt_io_input_transfer};
534
535 m_queue.enqueueTask(m_insideOutInput, &evts_after_io_input_transfer, &evt_io_kernel_input_done);
536 m_queue.enqueueTask(m_insideOutOutput, nullptr, &evt_io_kernel_output_done);
537 m_queue.finish();
538
539 // === PHASE: IO READBACK ===
541 ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t>>(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
542
543 cl::Event evt_io_output_read;
544 std::vector<cl::Event> evts_before_insideout_read{evt_io_kernel_output_done};
545 m_queue.enqueueReadBuffer(m_insideOutOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evts_before_insideout_read, &evt_io_output_read);
546
547 cl::Event::waitForEvents(std::vector<cl::Event>{evt_io_output_read});
548 dumpHexData((*FPGATrackOutput), "HW_insideOut.txt", ctx);
549
550 m_queue.finish();
551
552 // === PHASE: PROFILING ACCUMULATION ===
553 m_SE_kernelTime += evt_se_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_se_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
554 m_IO_kernelTime += evt_io_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_io_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
555
556
557 }
558 return StatusCode::SUCCESS;
559 }
560
562 {
563 ATH_MSG_INFO("Finalizing F150KernelTesterAlg");
564 ATH_MSG_INFO("Number of events: " << m_numEvents);
565
566 if(m_numEvents > 0){
567 ATH_MSG_INFO("Inside out ave time: " << m_IO_kernelTime / m_numEvents / 1e6 << " ms");
568 ATH_MSG_INFO("Slicing Engine ave time: " << m_SE_kernelTime / m_numEvents / 1e6 << " ms");
569 }
570
571 return StatusCode::SUCCESS;
572 }
573}
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_DEBUG(x)
Exception-safe IChronoSvc caller.
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
Exception-safe IChronoSvc caller.
Definition Chrono.h:50
Gaudi::Property< bool > m_outputTextFile
Whether to run SE or not.
Gaudi::Property< std::string > m_slicingEngineInputName
Gaudi::Property< std::string > m_pixelEdmKernelName
Name of the FPGA kernel.
Gaudi::Property< bool > m_runIO
Whether to run inside out or not.
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
Tool for formatting FPGA data.
Gaudi::Property< std::string > m_slicingEngineOutputName
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGASlicedHitKey
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGATrackOutput
Gaudi::Property< std::string > m_insideOutOutputName
std::atomic< cl_ulong > m_IO_kernelTime
Time for kernel execution.
Gaudi::Property< std::string > m_stripL2GKernelName
Name of the strip L2G kernelS.
Gaudi::Property< bool > m_runFull150
Whether to run the Full F150 include F100 on hy.
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
ToolHandle< xAODClusterMaker > m_xaodClusterMaker
Tool for creating xAOD containers.
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
std::atomic< cl_ulong > m_SE_kernelTime
Sum for the average time of the kernel execution.
Gaudi::Property< std::string > m_pixelClusterKernelName
Name of the pixel clustering kernel.
Gaudi::Property< std::string > m_insideOutInputName
std::atomic< ulonglong > m_numEvents
Number of events for the average time of the kernel execution.
ToolHandle< TestVectorTool > m_testVectorTool
Tool for preparing test vectors.
SG::ReadHandleKey< FPGATrackSimTrackCollection > m_FPGATrackKey
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGAHitKey
ToolHandle< OutputConversionTool > m_outputConversionTool
Gaudi::Property< bool > m_runIOOnSE
Whether to run inside out on the output of slicing engine.
std::string get_cu_name(const std::string &kernel_name, int cu)
Gaudi::Property< bool > m_runSE
Whether to run SE or not.
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
void dumpHexData(std::span< const uint64_t > data, const std::string &dataDescriptor, const EventContext &ctx) const
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
Gaudi::Property< std::string > m_stripClusterKernelName
Name of the strip clustering kernel.
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
Gaudi::Property< std::string > m_stripEdmKernelName
Name of the FPGA kernel.
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
cl::Program m_program
Program object containing the kernel.
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
cl::Context m_context
Context object for the application.
cl::Device m_accelerator
Device object for the accelerator card.
const_pointer_type cptr()
Dereference the pointer.
StatusCode record(std::unique_ptr< T > data)
Record a const object to the store.
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
The class for enconding RDO to FPGA format.
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long TRACK_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.
Definition dot.py:1