Should be overriden by derived classes to perform meaningful work.
146 {
148
149
150 SG::ReadHandle<FPGATrackSimTrackCollection> outTrackCollection(
m_FPGATrackKey, ctx);
151
153 {
155 auto outputVec = std::make_unique<std::vector<uint64_t>>();
156
158
159 ATH_CHECK(FPGATrackOutput.record(std::move(outputVec)));
160 }
161
162
164 std::vector<uint64_t> pixelDataIN;
165 std::vector<uint64_t> stripDataIN;
166 SG::ReadHandle<FPGATrackSimHitCollection> hitCollectionHandle(
m_FPGAHitKey, ctx);
169
170 int padLength = 8;
171 int inputIOLength = pixelDataIN.size();
172 auto remainder = inputIOLength % padLength;
175 pixelDataIN.insert(pixelDataIN.end(), to_add, 0);
176 }
177
178 dumpHexData(pixelDataIN,
"FPGATrackSim_slicingIn_pixel.txt", ctx);
179 dumpHexData(stripDataIN,
"FPGATrackSim_slicingIn_strip.txt", ctx);
180
182 std::vector<uint64_t> dataPixelOut;
183 std::vector<uint64_t> dataStripOut;
184 SG::ReadHandle<FPGATrackSimHitCollection> outhitCollectionHandle(
m_FPGASlicedHitKey, ctx);
187 dumpHexData(dataPixelOut,
"FPGATrackSim_slicingOut_pixel.txt", ctx);
188 dumpHexData(dataStripOut,
"FPGATrackSim_slicingOut_strip.txt", ctx);
189
191 std::vector<uint64_t> dataInsideOut;
193 dumpHexData(dataInsideOut,
"FPGATrackSim_insideOut.txt", ctx);
194
195
196 cl_int
err = CL_SUCCESS;
197
198
200
201
204
205
210
211
214
217
218
223
224
226 else m_slicingEngineInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, pixelDataIN.size() *
sizeof(uint64_t),
nullptr, &err);
228
230 else m_insideOutInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, dataPixelOut.size() *
sizeof(uint64_t),
nullptr, &err);
232
233
235
236 cl::Event evtSEWriteIn;
237 cl::Event evtSEKInputDone;
238 cl::Event evtSEKOutputDone;
239 cl::Event evtSEReadOut;
240
241 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
242 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(inputIOLength));
243 ATH_MSG_DEBUG(
"Setting NWords:" <<
static_cast<unsigned long long>(inputIOLength)<<
" with size: "<<pixelDataIN.size());
244
245 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
246
248 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelDataIN.size() *
sizeof(uint64_t), pixelDataIN.data(),
nullptr, &evtSEWriteIn);
250
251
253 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
254 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
257 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
259
260
263 std::vector<cl::Event> waitForSERead{evtSEKOutputDone};
265
266
267 cl::Event::waitForEvents({evtSEReadOut});
268
269 dumpHexData(out_data,
"HW_slicingOut_pixel.txt", ctx);
270
271 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
272
273 }
275 {
276 cl::Event evtWriteIn;
277 cl::Event evtKInputDone;
278 cl::Event evtKOutputDone;
279 cl::Event evtReadOut;
280
282 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
283 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
284
286 m_queue.enqueueWriteBuffer(m_insideOutInputBuffer, CL_TRUE, 0, dataPixelOut.size() *
sizeof(uint64_t), dataPixelOut.data(),
nullptr, &evtWriteIn);
288
290 std::vector<cl::Event> waitAfterWrite{evtWriteIn};
291 m_queue.enqueueTask(m_insideOutInput, &waitAfterWrite, &evtKInputDone);
292 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evtKOutputDone);
294
295
297
298
301
302 std::vector<cl::Event> waitForRead{evtKOutputDone};
303 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
304
305
306 cl::Event::waitForEvents({evtReadOut});
307 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
308
309 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
310 }
311
313 {
314 cl::Event evtSEWriteIn;
315 cl::Event evtSEKInputDone;
316 cl::Event evtSEKOutputDone;
317
318 cl::Event evtBufferTransfer;
319
320
321 cl::Event evtKInputDone;
322 cl::Event evtKOutputDone;
323 cl::Event evtReadOut;
324
326 const size_t pixel_size_bytesIN = pixelDataIN.size() *
sizeof(
uint64_t);
327
328 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
329 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(pixelDataIN.size()));
330
331 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
332
334 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
335 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
337
339 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixel_size_bytesIN, pixelDataIN.data(),
nullptr, &evtSEWriteIn);
341
343 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
344 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
345 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
347
349 std::vector<cl::Event> waitAfterSE{evtSEKOutputDone};
350
352
353 std::vector<cl::Event> waitAfterTransfer{evtBufferTransfer};
354 m_queue.enqueueTask(m_insideOutInput, &waitAfterTransfer, &evtKInputDone);
355 m_queue.enqueueTask(m_insideOutOutput, NULL, &evtKOutputDone);
357
359 std::vector<cl::Event> waitForRead{evtKOutputDone};
360
361
364
365 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
366
367
368 cl::Event::waitForEvents({evtReadOut});
369 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
370
371 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
372 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
373 }
374
376 {
377
380
381 const int* pixelInputSize{nullptr}, *stripInputSize{nullptr};
384
385
386 m_pixelClusteringKernel.setArg(0, m_pixelClusterInputBuffer);
387 m_pixelClusteringKernel.setArg(1, m_pixelClusterOutputBuffer);
388 m_pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBuffer);
389
390 m_stripClusteringKernel.setArg(0, m_stripClusterInputBuffer);
391 m_stripClusteringKernel.setArg(1, m_stripClusterOutputBuffer);
392 m_stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBuffer);
393 m_stripClusteringKernel.setArg(3, static_cast<unsigned int>(*stripInputSize));
394
395 m_stripL2GKernel.setArg(0, m_stripL2GInputBuffer);
396 m_stripL2GKernel.setArg(1, m_stripL2GEDMInputBuffer);
397 m_stripL2GKernel.setArg(2, m_stripL2GOutputBuffer);
398 m_stripL2GKernel.setArg(3, m_stripL2GEDMOutputBuffer);
399
400 m_pixelEdmPrepKernel.setArg(0, m_edmPixelInputBuffer);
401 m_pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBuffer);
402 m_stripEdmPrepKernel.setArg(0, m_edmStripInputBuffer);
403 m_stripEdmPrepKernel.setArg(1, m_edmStripOutputBuffer);
404
405
406 cl::Event evt_pixel_input_write, evt_strip_input_write;
407
408 m_queue.enqueueWriteBuffer(m_pixelClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(),
nullptr, &evt_pixel_input_write);
409 m_queue.enqueueWriteBuffer(m_stripClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(),
nullptr, &evt_strip_input_write);
410
411 std::vector<cl::Event> evts_pixel_input_write{evt_pixel_input_write};
412 std::vector<cl::Event> evts_strip_input_write{evt_strip_input_write};
413
414
415 cl::Event evt_pixel_clustering_done, evt_strip_clustering_done;
416
417 m_queue.enqueueTask(m_pixelClusteringKernel, &evts_pixel_input_write, &evt_pixel_clustering_done);
418 m_queue.enqueueTask(m_stripClusteringKernel, &evts_strip_input_write, &evt_strip_clustering_done);
419
420
421 cl::Event evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm;
422 std::vector<cl::Event> evts_strip_clustering_done{evt_strip_clustering_done};
423
426
427 std::vector<cl::Event> evts_strip_l2g_input_copies{evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm};
428
429
430 cl::Event evt_strip_l2g_done;
431 m_queue.enqueueTask(m_stripL2GKernel, &evts_strip_l2g_input_copies, &evt_strip_l2g_done);
432
433
434 cl::Event evt_pixel_edm_input_copy, evt_strip_edm_input_copy;
435 std::vector<cl::Event> evts_pixel_clustering_done{evt_pixel_clustering_done};
436 std::vector<cl::Event> evts_strip_l2g_done{evt_strip_l2g_done};
437
440
441
442 cl::Event evt_pixel_edm_prep_done, evt_strip_edm_prep_done;
443 std::vector<cl::Event> evts_pixel_edm_input_copied{evt_pixel_edm_input_copy};
444 std::vector<cl::Event> evts_strip_edm_input_copied{evt_strip_edm_input_copy};
445
446 m_queue.enqueueTask(m_pixelEdmPrepKernel, &evts_pixel_edm_input_copied, &evt_pixel_edm_prep_done);
447 m_queue.enqueueTask(m_stripEdmPrepKernel, &evts_strip_edm_input_copied, &evt_strip_edm_prep_done);
448
449
452
455
456 cl::Event evt_pixel_edm_read_done, evt_strip_edm_read_done;
457 std::vector<cl::Event> evts_pixel_edm_prep_done{evt_pixel_edm_prep_done};
458 std::vector<cl::Event> evts_strip_edm_prep_done{evt_strip_edm_prep_done};
459
460 m_queue.enqueueReadBuffer(m_edmPixelOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evts_pixel_edm_prep_done, &evt_pixel_edm_read_done);
461 m_queue.enqueueReadBuffer(m_edmStripOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evts_strip_edm_prep_done, &evt_strip_edm_read_done);
462
463 cl::Event::waitForEvents(std::vector<cl::Event>{evt_pixel_edm_read_done, evt_strip_edm_read_done});
464
465
466 if (pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0;
467 if (stripInput->size() == 6) (*FPGAStripOutput)[0] = 0;
468
471
472
473 cl::Event evt_pixel_cluster_output_read;
475
476 m_queue.enqueueReadBuffer(m_pixelClusterOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &evts_pixel_clustering_done, &evt_pixel_cluster_output_read);
478
479
480 int nWords = static_cast<int>(pixelClusterOut.size()) - 1;
481 for (; nWords >= 0; nWords--)
482 {
483 if (pixelClusterOut[nWords] == 0xcd00000000000000) break;
484 }
485
486 if (nWords < 0)
487 {
488 ATH_MSG_ERROR(
"Footer 0xcd00000000000000 not found in pixelClusterOut; cannot determine nWords");
return StatusCode::FAILURE;
489 }
490
491
492 if (nWords > 0) nWords += 3;
493
494
495 for (
int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size());
i++)
496 {
497 pixelClusterOut[nWords +
i] = 0;
498 }
500
501
502 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
503 m_slicingEngineInput.setArg(2, static_cast<unsigned long long>(nWords));
504 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
505
506 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
507 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
508
509
510 cl::Event evt_se_input_write;
511
512 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelClusterOut.size() *
sizeof(uint64_t), pixelClusterOut.data(),
nullptr, &evt_se_input_write);
514
515
516
517 cl::Event evt_se_kernel_input_done, evt_se_kernel_output_done;
518 std::vector<cl::Event> evts_after_se_input_write{evt_se_input_write};
519
520 m_queue.enqueueTask(m_slicingEngineInput, &evts_after_se_input_write, &evt_se_kernel_input_done);
521 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evt_se_kernel_output_done);
523
524
525 cl::Event evt_io_input_transfer;
526 std::vector<cl::Event> evts_after_se_output_done{evt_se_kernel_output_done};
527
530
531
532 cl::Event evt_io_kernel_input_done, evt_io_kernel_output_done;
533 std::vector<cl::Event> evts_after_io_input_transfer{evt_io_input_transfer};
534
535 m_queue.enqueueTask(m_insideOutInput, &evts_after_io_input_transfer, &evt_io_kernel_input_done);
536 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evt_io_kernel_output_done);
538
539
542
543 cl::Event evt_io_output_read;
544 std::vector<cl::Event> evts_before_insideout_read{evt_io_kernel_output_done};
545 m_queue.enqueueReadBuffer(m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evts_before_insideout_read, &evt_io_output_read);
546
547 cl::Event::waitForEvents(std::vector<cl::Event>{evt_io_output_read});
548 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
549
551
552
553 m_SE_kernelTime += evt_se_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_se_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
554 m_IO_kernelTime += evt_io_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_io_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
555
556
557 }
558 return StatusCode::SUCCESS;
559 }
#define ATH_CHECK
Evaluate an expression and check for errors.
Gaudi::Property< bool > m_runIO
Whether to run inside out or not.
SG::ReadHandleKey< int > m_FPGAStripRDOSize
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
Tool for formatting FPGA data.
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGASlicedHitKey
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGATrackOutput
std::atomic< cl_ulong > m_IO_kernelTime
Time for kernel execution.
Gaudi::Property< bool > m_runFull150
Whether to run the Full F150 include F100 on hy.
std::atomic< cl_ulong > m_SE_kernelTime
Sum for the average time of the kernel execution.
std::atomic< ulonglong > m_numEvents
Number of events for the average time of the kernel execution.
SG::ReadHandleKey< FPGATrackSimTrackCollection > m_FPGATrackKey
SG::ReadHandleKey< FPGATrackSimHitCollection > m_FPGAHitKey
Gaudi::Property< bool > m_runIOOnSE
Whether to run inside out on the output of slicing engine.
Gaudi::Property< bool > m_runSE
Whether to run SE or not.
void dumpHexData(std::span< const uint64_t > data, const std::string &dataDescriptor, const EventContext &ctx) const
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
SG::ReadHandleKey< int > m_FPGAPixelRDOSize
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
cl::Context m_context
Context object for the application.
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long TRACK_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.