156 auto outputVec = std::make_unique<std::vector<uint64_t>>();
165 std::vector<uint64_t> pixelDataIN;
166 std::vector<uint64_t> stripDataIN;
172 int inputIOLength = pixelDataIN.size();
173 auto remainder = inputIOLength % padLength;
176 pixelDataIN.insert(pixelDataIN.end(), to_add, 0);
179 dumpHexData(pixelDataIN,
"FPGATrackSim_slicingIn_pixel.txt", ctx);
180 dumpHexData(stripDataIN,
"FPGATrackSim_slicingIn_strip.txt", ctx);
183 std::vector<uint64_t> dataPixelOut;
184 std::vector<uint64_t> dataStripOut;
188 dumpHexData(dataPixelOut,
"FPGATrackSim_slicingOut_pixel.txt", ctx);
189 dumpHexData(dataStripOut,
"FPGATrackSim_slicingOut_strip.txt", ctx);
192 std::vector<uint64_t> dataInsideOut;
194 dumpHexData(dataInsideOut,
"FPGATrackSim_insideOut.txt", ctx);
197 cl_int err = CL_SUCCESS;
227 else m_slicingEngineInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, pixelDataIN.size() *
sizeof(uint64_t),
nullptr, &err);
231 else m_insideOutInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, dataPixelOut.size() *
sizeof(uint64_t),
nullptr, &err);
237 cl::Event evtSEWriteIn;
238 cl::Event evtSEKInputDone;
239 cl::Event evtSEKOutputDone;
240 cl::Event evtSEReadOut;
242 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
243 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(inputIOLength));
244 ATH_MSG_DEBUG(
"Setting NWords:" <<
static_cast<unsigned long long>(inputIOLength)<<
" with size: "<<pixelDataIN.size());
246 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
249 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelDataIN.size() *
sizeof(uint64_t), pixelDataIN.data(),
nullptr, &evtSEWriteIn);
254 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
255 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
258 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
264 std::vector<cl::Event> waitForSERead{evtSEKOutputDone};
268 cl::Event::waitForEvents({evtSEReadOut});
270 dumpHexData(out_data,
"HW_slicingOut_pixel.txt", ctx);
272 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
277 cl::Event evtWriteIn;
278 cl::Event evtKInputDone;
279 cl::Event evtKOutputDone;
280 cl::Event evtReadOut;
283 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
284 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
287 m_queue.enqueueWriteBuffer(m_insideOutInputBuffer, CL_TRUE, 0, dataPixelOut.size() *
sizeof(uint64_t), dataPixelOut.data(),
nullptr, &evtWriteIn);
291 std::vector<cl::Event> waitAfterWrite{evtWriteIn};
292 m_queue.enqueueTask(m_insideOutInput, &waitAfterWrite, &evtKInputDone);
293 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evtKOutputDone);
303 std::vector<cl::Event> waitForRead{evtKOutputDone};
304 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
307 cl::Event::waitForEvents({evtReadOut});
308 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
310 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
315 cl::Event evtSEWriteIn;
316 cl::Event evtSEKInputDone;
317 cl::Event evtSEKOutputDone;
319 cl::Event evtBufferTransfer;
322 cl::Event evtKInputDone;
323 cl::Event evtKOutputDone;
324 cl::Event evtReadOut;
327 const size_t pixel_size_bytesIN = pixelDataIN.size() *
sizeof(uint64_t);
329 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
330 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(pixelDataIN.size()));
332 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
335 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
336 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
340 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixel_size_bytesIN, pixelDataIN.data(),
nullptr, &evtSEWriteIn);
344 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
345 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
346 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
350 std::vector<cl::Event> waitAfterSE{evtSEKOutputDone};
354 std::vector<cl::Event> waitAfterTransfer{evtBufferTransfer};
355 m_queue.enqueueTask(m_insideOutInput, &waitAfterTransfer, &evtKInputDone);
356 m_queue.enqueueTask(m_insideOutOutput, NULL, &evtKOutputDone);
360 std::vector<cl::Event> waitForRead{evtKOutputDone};
366 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
369 cl::Event::waitForEvents({evtReadOut});
370 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
372 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
373 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
382 const int* pixelInputSize{
nullptr}, *stripInputSize{
nullptr};
387 m_pixelClusteringKernel.setArg(0, m_pixelClusterInputBuffer);
388 m_pixelClusteringKernel.setArg(1, m_pixelClusterOutputBuffer);
389 m_pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBuffer);
391 m_stripClusteringKernel.setArg(0, m_stripClusterInputBuffer);
392 m_stripClusteringKernel.setArg(1, m_stripClusterOutputBuffer);
393 m_stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBuffer);
394 m_stripClusteringKernel.setArg(3,
static_cast<unsigned int>(*stripInputSize));
396 m_stripL2GKernel.setArg(0, m_stripL2GInputBuffer);
397 m_stripL2GKernel.setArg(1, m_stripL2GEDMInputBuffer);
398 m_stripL2GKernel.setArg(2, m_stripL2GOutputBuffer);
399 m_stripL2GKernel.setArg(3, m_stripL2GEDMOutputBuffer);
401 m_pixelEdmPrepKernel.setArg(0, m_edmPixelInputBuffer);
402 m_pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBuffer);
403 m_stripEdmPrepKernel.setArg(0, m_edmStripInputBuffer);
404 m_stripEdmPrepKernel.setArg(1, m_edmStripOutputBuffer);
407 cl::Event evt_pixel_input_write, evt_strip_input_write;
409 m_queue.enqueueWriteBuffer(m_pixelClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(),
nullptr, &evt_pixel_input_write);
410 m_queue.enqueueWriteBuffer(m_stripClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(),
nullptr, &evt_strip_input_write);
412 std::vector<cl::Event> evts_pixel_input_write{evt_pixel_input_write};
413 std::vector<cl::Event> evts_strip_input_write{evt_strip_input_write};
416 cl::Event evt_pixel_clustering_done, evt_strip_clustering_done;
418 m_queue.enqueueTask(m_pixelClusteringKernel, &evts_pixel_input_write, &evt_pixel_clustering_done);
419 m_queue.enqueueTask(m_stripClusteringKernel, &evts_strip_input_write, &evt_strip_clustering_done);
422 cl::Event evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm;
423 std::vector<cl::Event> evts_strip_clustering_done{evt_strip_clustering_done};
428 std::vector<cl::Event> evts_strip_l2g_input_copies{evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm};
431 cl::Event evt_strip_l2g_done;
432 m_queue.enqueueTask(m_stripL2GKernel, &evts_strip_l2g_input_copies, &evt_strip_l2g_done);
435 cl::Event evt_pixel_edm_input_copy, evt_strip_edm_input_copy;
436 std::vector<cl::Event> evts_pixel_clustering_done{evt_pixel_clustering_done};
437 std::vector<cl::Event> evts_strip_l2g_done{evt_strip_l2g_done};
443 cl::Event evt_pixel_edm_prep_done, evt_strip_edm_prep_done;
444 std::vector<cl::Event> evts_pixel_edm_input_copied{evt_pixel_edm_input_copy};
445 std::vector<cl::Event> evts_strip_edm_input_copied{evt_strip_edm_input_copy};
447 m_queue.enqueueTask(m_pixelEdmPrepKernel, &evts_pixel_edm_input_copied, &evt_pixel_edm_prep_done);
448 m_queue.enqueueTask(m_stripEdmPrepKernel, &evts_strip_edm_input_copied, &evt_strip_edm_prep_done);
457 cl::Event evt_pixel_edm_read_done, evt_strip_edm_read_done;
458 std::vector<cl::Event> evts_pixel_edm_prep_done{evt_pixel_edm_prep_done};
459 std::vector<cl::Event> evts_strip_edm_prep_done{evt_strip_edm_prep_done};
461 m_queue.enqueueReadBuffer(m_edmPixelOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evts_pixel_edm_prep_done, &evt_pixel_edm_read_done);
462 m_queue.enqueueReadBuffer(m_edmStripOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evts_strip_edm_prep_done, &evt_strip_edm_read_done);
464 cl::Event::waitForEvents(std::vector<cl::Event>{evt_pixel_edm_read_done, evt_strip_edm_read_done});
467 if (pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0;
468 if (stripInput->size() == 6) (*FPGAStripOutput)[0] = 0;
474 cl::Event evt_pixel_cluster_output_read;
477 m_queue.enqueueReadBuffer(m_pixelClusterOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &evts_pixel_clustering_done, &evt_pixel_cluster_output_read);
481 int nWords =
static_cast<int>(pixelClusterOut.size()) - 1;
482 for (; nWords >= 0; nWords--)
484 if (pixelClusterOut[nWords] == 0xcd00000000000000)
break;
489 ATH_MSG_ERROR(
"Footer 0xcd00000000000000 not found in pixelClusterOut; cannot determine nWords");
return StatusCode::FAILURE;
493 if (nWords > 0) nWords += 3;
496 for (
int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size()); i++)
498 pixelClusterOut[nWords + i] = 0;
503 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
504 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(nWords));
505 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
507 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
508 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
511 cl::Event evt_se_input_write;
513 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelClusterOut.size() *
sizeof(uint64_t), pixelClusterOut.data(),
nullptr, &evt_se_input_write);
518 cl::Event evt_se_kernel_input_done, evt_se_kernel_output_done;
519 std::vector<cl::Event> evts_after_se_input_write{evt_se_input_write};
521 m_queue.enqueueTask(m_slicingEngineInput, &evts_after_se_input_write, &evt_se_kernel_input_done);
522 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evt_se_kernel_output_done);
526 cl::Event evt_io_input_transfer;
527 std::vector<cl::Event> evts_after_se_output_done{evt_se_kernel_output_done};
533 cl::Event evt_io_kernel_input_done, evt_io_kernel_output_done;
534 std::vector<cl::Event> evts_after_io_input_transfer{evt_io_input_transfer};
536 m_queue.enqueueTask(m_insideOutInput, &evts_after_io_input_transfer, &evt_io_kernel_input_done);
537 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evt_io_kernel_output_done);
544 cl::Event evt_io_output_read;
545 std::vector<cl::Event> evts_before_insideout_read{evt_io_kernel_output_done};
546 m_queue.enqueueReadBuffer(m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evts_before_insideout_read, &evt_io_output_read);
548 cl::Event::waitForEvents(std::vector<cl::Event>{evt_io_output_read});
549 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
554 m_SE_kernelTime += evt_se_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_se_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
555 m_IO_kernelTime += evt_io_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_io_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
559 return StatusCode::SUCCESS;