Should be overriden by derived classes to perform meaningful work.
147 {
149
150
151 SG::ReadHandle<FPGATrackSimTrackCollection> outTrackCollection(
m_FPGATrackKey, ctx);
152
154 {
156 auto outputVec = std::make_unique<std::vector<uint64_t>>();
157
159
160 ATH_CHECK(FPGATrackOutput.record(std::move(outputVec)));
161 }
162
163
165 std::vector<uint64_t> pixelDataIN;
166 std::vector<uint64_t> stripDataIN;
167 SG::ReadHandle<FPGATrackSimHitCollection> hitCollectionHandle(
m_FPGAHitKey, ctx);
170
171 int padLength = 8;
172 int inputIOLength = pixelDataIN.size();
173 auto remainder = inputIOLength % padLength;
176 pixelDataIN.insert(pixelDataIN.end(), to_add, 0);
177 }
178
179 dumpHexData(pixelDataIN,
"FPGATrackSim_slicingIn_pixel.txt", ctx);
180 dumpHexData(stripDataIN,
"FPGATrackSim_slicingIn_strip.txt", ctx);
181
183 std::vector<uint64_t> dataPixelOut;
184 std::vector<uint64_t> dataStripOut;
185 SG::ReadHandle<FPGATrackSimHitCollection> outhitCollectionHandle(
m_FPGASlicedHitKey, ctx);
188 dumpHexData(dataPixelOut,
"FPGATrackSim_slicingOut_pixel.txt", ctx);
189 dumpHexData(dataStripOut,
"FPGATrackSim_slicingOut_strip.txt", ctx);
190
192 std::vector<uint64_t> dataInsideOut;
194 dumpHexData(dataInsideOut,
"FPGATrackSim_insideOut.txt", ctx);
195
196
197 cl_int
err = CL_SUCCESS;
198
199
201
202
205
206
211
212
215
218
219
224
225
227 else m_slicingEngineInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, pixelDataIN.size() *
sizeof(uint64_t),
nullptr, &err);
229
231 else m_insideOutInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, dataPixelOut.size() *
sizeof(uint64_t),
nullptr, &err);
233
234
236
237 cl::Event evtSEWriteIn;
238 cl::Event evtSEKInputDone;
239 cl::Event evtSEKOutputDone;
240 cl::Event evtSEReadOut;
241
242 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
243 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(inputIOLength));
244 ATH_MSG_DEBUG(
"Setting NWords:" <<
static_cast<unsigned long long>(inputIOLength)<<
" with size: "<<pixelDataIN.size());
245
246 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
247
249 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelDataIN.size() *
sizeof(uint64_t), pixelDataIN.data(),
nullptr, &evtSEWriteIn);
251
252
254 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
255 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
258 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
260
261
264 std::vector<cl::Event> waitForSERead{evtSEKOutputDone};
266
267
268 cl::Event::waitForEvents({evtSEReadOut});
269
270 dumpHexData(out_data,
"HW_slicingOut_pixel.txt", ctx);
271
272 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
273
274 }
276 {
277 cl::Event evtWriteIn;
278 cl::Event evtKInputDone;
279 cl::Event evtKOutputDone;
280 cl::Event evtReadOut;
281
283 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
284 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
285
287 m_queue.enqueueWriteBuffer(m_insideOutInputBuffer, CL_TRUE, 0, dataPixelOut.size() *
sizeof(uint64_t), dataPixelOut.data(),
nullptr, &evtWriteIn);
289
291 std::vector<cl::Event> waitAfterWrite{evtWriteIn};
292 m_queue.enqueueTask(m_insideOutInput, &waitAfterWrite, &evtKInputDone);
293 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evtKOutputDone);
295
296
298
299
302
303 std::vector<cl::Event> waitForRead{evtKOutputDone};
304 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
305
306
307 cl::Event::waitForEvents({evtReadOut});
308 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
309
310 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
311 }
312
314 {
315 cl::Event evtSEWriteIn;
316 cl::Event evtSEKInputDone;
317 cl::Event evtSEKOutputDone;
318
319 cl::Event evtBufferTransfer;
320
321
322 cl::Event evtKInputDone;
323 cl::Event evtKOutputDone;
324 cl::Event evtReadOut;
325
327 const size_t pixel_size_bytesIN = pixelDataIN.size() *
sizeof(
uint64_t);
328
329 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
330 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(pixelDataIN.size()));
331
332 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
333
335 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
336 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
338
340 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixel_size_bytesIN, pixelDataIN.data(),
nullptr, &evtSEWriteIn);
342
344 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
345 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
346 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
348
350 std::vector<cl::Event> waitAfterSE{evtSEKOutputDone};
351
353
354 std::vector<cl::Event> waitAfterTransfer{evtBufferTransfer};
355 m_queue.enqueueTask(m_insideOutInput, &waitAfterTransfer, &evtKInputDone);
356 m_queue.enqueueTask(m_insideOutOutput, NULL, &evtKOutputDone);
358
360 std::vector<cl::Event> waitForRead{evtKOutputDone};
361
362
365
366 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
367
368
369 cl::Event::waitForEvents({evtReadOut});
370 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
371
372 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
373 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
374 }
375
377 {
378
381
382 const int* pixelInputSize{nullptr}, *stripInputSize{nullptr};
385
386
387 m_pixelClusteringKernel.setArg(0, m_pixelClusterInputBuffer);
388 m_pixelClusteringKernel.setArg(1, m_pixelClusterOutputBuffer);
389 m_pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBuffer);
390
391 m_stripClusteringKernel.setArg(0, m_stripClusterInputBuffer);
392 m_stripClusteringKernel.setArg(1, m_stripClusterOutputBuffer);
393 m_stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBuffer);
394 m_stripClusteringKernel.setArg(3, static_cast<unsigned int>(*stripInputSize));
395
396 m_stripL2GKernel.setArg(0, m_stripL2GInputBuffer);
397 m_stripL2GKernel.setArg(1, m_stripL2GEDMInputBuffer);
398 m_stripL2GKernel.setArg(2, m_stripL2GOutputBuffer);
399 m_stripL2GKernel.setArg(3, m_stripL2GEDMOutputBuffer);
400
401 m_pixelEdmPrepKernel.setArg(0, m_edmPixelInputBuffer);
402 m_pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBuffer);
403 m_stripEdmPrepKernel.setArg(0, m_edmStripInputBuffer);
404 m_stripEdmPrepKernel.setArg(1, m_edmStripOutputBuffer);
405
406
407 cl::Event evt_pixel_input_write, evt_strip_input_write;
408
409 m_queue.enqueueWriteBuffer(m_pixelClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(),
nullptr, &evt_pixel_input_write);
410 m_queue.enqueueWriteBuffer(m_stripClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(),
nullptr, &evt_strip_input_write);
411
412 std::vector<cl::Event> evts_pixel_input_write{evt_pixel_input_write};
413 std::vector<cl::Event> evts_strip_input_write{evt_strip_input_write};
414
415
416 cl::Event evt_pixel_clustering_done, evt_strip_clustering_done;
417
418 m_queue.enqueueTask(m_pixelClusteringKernel, &evts_pixel_input_write, &evt_pixel_clustering_done);
419 m_queue.enqueueTask(m_stripClusteringKernel, &evts_strip_input_write, &evt_strip_clustering_done);
420
421
422 cl::Event evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm;
423 std::vector<cl::Event> evts_strip_clustering_done{evt_strip_clustering_done};
424
427
428 std::vector<cl::Event> evts_strip_l2g_input_copies{evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm};
429
430
431 cl::Event evt_strip_l2g_done;
432 m_queue.enqueueTask(m_stripL2GKernel, &evts_strip_l2g_input_copies, &evt_strip_l2g_done);
433
434
435 cl::Event evt_pixel_edm_input_copy, evt_strip_edm_input_copy;
436 std::vector<cl::Event> evts_pixel_clustering_done{evt_pixel_clustering_done};
437 std::vector<cl::Event> evts_strip_l2g_done{evt_strip_l2g_done};
438
441
442
443 cl::Event evt_pixel_edm_prep_done, evt_strip_edm_prep_done;
444 std::vector<cl::Event> evts_pixel_edm_input_copied{evt_pixel_edm_input_copy};
445 std::vector<cl::Event> evts_strip_edm_input_copied{evt_strip_edm_input_copy};
446
447 m_queue.enqueueTask(m_pixelEdmPrepKernel, &evts_pixel_edm_input_copied, &evt_pixel_edm_prep_done);
448 m_queue.enqueueTask(m_stripEdmPrepKernel, &evts_strip_edm_input_copied, &evt_strip_edm_prep_done);
449
450
453
456
457 cl::Event evt_pixel_edm_read_done, evt_strip_edm_read_done;
458 std::vector<cl::Event> evts_pixel_edm_prep_done{evt_pixel_edm_prep_done};
459 std::vector<cl::Event> evts_strip_edm_prep_done{evt_strip_edm_prep_done};
460
461 m_queue.enqueueReadBuffer(m_edmPixelOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evts_pixel_edm_prep_done, &evt_pixel_edm_read_done);
462 m_queue.enqueueReadBuffer(m_edmStripOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evts_strip_edm_prep_done, &evt_strip_edm_read_done);
463
464 cl::Event::waitForEvents(std::vector<cl::Event>{evt_pixel_edm_read_done, evt_strip_edm_read_done});
465
466
467 if (pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0;
468 if (stripInput->size() == 6) (*FPGAStripOutput)[0] = 0;
469
472
473
474 cl::Event evt_pixel_cluster_output_read;
476
477 m_queue.enqueueReadBuffer(m_pixelClusterOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &evts_pixel_clustering_done, &evt_pixel_cluster_output_read);
479
480
481 int nWords = static_cast<int>(pixelClusterOut.size()) - 1;
482 for (; nWords >= 0; nWords--)
483 {
484 if (pixelClusterOut[nWords] == 0xcd00000000000000) break;
485 }
486
487 if (nWords < 0)
488 {
489 ATH_MSG_ERROR(
"Footer 0xcd00000000000000 not found in pixelClusterOut; cannot determine nWords");
return StatusCode::FAILURE;
490 }
491
492
493 if (nWords > 0) nWords += 3;
494
495
496 for (
int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size());
i++)
497 {
498 pixelClusterOut[nWords +
i] = 0;
499 }
501
502
503 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
504 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(nWords));
505 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
506
507 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
508 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
509
510
511 cl::Event evt_se_input_write;
512
513 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelClusterOut.size() *
sizeof(uint64_t), pixelClusterOut.data(),
nullptr, &evt_se_input_write);
515
516
517
518 cl::Event evt_se_kernel_input_done, evt_se_kernel_output_done;
519 std::vector<cl::Event> evts_after_se_input_write{evt_se_input_write};
520
521 m_queue.enqueueTask(m_slicingEngineInput, &evts_after_se_input_write, &evt_se_kernel_input_done);
522 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evt_se_kernel_output_done);
524
525
526 cl::Event evt_io_input_transfer;
527 std::vector<cl::Event> evts_after_se_output_done{evt_se_kernel_output_done};
528
531
532
533 cl::Event evt_io_kernel_input_done, evt_io_kernel_output_done;
534 std::vector<cl::Event> evts_after_io_input_transfer{evt_io_input_transfer};
535
536 m_queue.enqueueTask(m_insideOutInput, &evts_after_io_input_transfer, &evt_io_kernel_input_done);
537 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evt_io_kernel_output_done);
539
540
543
544 cl::Event evt_io_output_read;
545 std::vector<cl::Event> evts_before_insideout_read{evt_io_kernel_output_done};
546 m_queue.enqueueReadBuffer(m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evts_before_insideout_read, &evt_io_output_read);
547
548 cl::Event::waitForEvents(std::vector<cl::Event>{evt_io_output_read});
549 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
550
552
553
554 m_SE_kernelTime += evt_se_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_se_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
555 m_IO_kernelTime += evt_io_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_io_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
556
557
558 }
559 return StatusCode::SUCCESS;
560 }
#define ATH_CHECK
Evaluate an expression and check for errors.
Gaudi::Property< bool > m_runIO
Whether to run inside out or not.
SG::ReadHandleKey< int > m_FPGAStripRDOSize
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
Tool for formatting FPGA data.
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGASlicedHitKey
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGATrackOutput
std::atomic< cl_ulong > m_IO_kernelTime
Time for kernel execution.
Gaudi::Property< bool > m_runFull150
Whether to run the Full F150 include F100 on hy.
std::atomic< cl_ulong > m_SE_kernelTime
Sum for the average time of the kernel execution.
std::atomic< ulonglong > m_numEvents
Number of events for the average time of the kernel execution.
SG::ReadHandleKey< FPGATrackSimTrackCollection > m_FPGATrackKey
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGAHitKey
Gaudi::Property< bool > m_runIOOnSE
Whether to run inside out on the output of slicing engine.
Gaudi::Property< bool > m_runSE
Whether to run SE or not.
void dumpHexData(std::span< const uint64_t > data, const std::string &dataDescriptor, const EventContext &ctx) const
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
SG::ReadHandleKey< int > m_FPGAPixelRDOSize
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
cl::Context m_context
Context object for the application.
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long TRACK_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.