155 auto outputVec = std::make_unique<std::vector<uint64_t>>();
164 std::vector<uint64_t> pixelDataIN;
165 std::vector<uint64_t> stripDataIN;
171 int inputIOLength = pixelDataIN.size();
172 auto remainder = inputIOLength % padLength;
175 pixelDataIN.insert(pixelDataIN.end(), to_add, 0);
178 dumpHexData(pixelDataIN,
"FPGATrackSim_slicingIn_pixel.txt", ctx);
179 dumpHexData(stripDataIN,
"FPGATrackSim_slicingIn_strip.txt", ctx);
182 std::vector<uint64_t> dataPixelOut;
183 std::vector<uint64_t> dataStripOut;
187 dumpHexData(dataPixelOut,
"FPGATrackSim_slicingOut_pixel.txt", ctx);
188 dumpHexData(dataStripOut,
"FPGATrackSim_slicingOut_strip.txt", ctx);
191 std::vector<uint64_t> dataInsideOut;
193 dumpHexData(dataInsideOut,
"FPGATrackSim_insideOut.txt", ctx);
196 cl_int err = CL_SUCCESS;
226 else m_slicingEngineInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, pixelDataIN.size() *
sizeof(uint64_t),
nullptr, &err);
230 else m_insideOutInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, dataPixelOut.size() *
sizeof(uint64_t),
nullptr, &err);
236 cl::Event evtSEWriteIn;
237 cl::Event evtSEKInputDone;
238 cl::Event evtSEKOutputDone;
239 cl::Event evtSEReadOut;
241 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
242 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(inputIOLength));
243 ATH_MSG_DEBUG(
"Setting NWords:" <<
static_cast<unsigned long long>(inputIOLength)<<
" with size: "<<pixelDataIN.size());
245 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
248 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelDataIN.size() *
sizeof(uint64_t), pixelDataIN.data(),
nullptr, &evtSEWriteIn);
253 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
254 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
257 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
263 std::vector<cl::Event> waitForSERead{evtSEKOutputDone};
267 cl::Event::waitForEvents({evtSEReadOut});
269 dumpHexData(out_data,
"HW_slicingOut_pixel.txt", ctx);
271 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
276 cl::Event evtWriteIn;
277 cl::Event evtKInputDone;
278 cl::Event evtKOutputDone;
279 cl::Event evtReadOut;
282 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
283 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
286 m_queue.enqueueWriteBuffer(m_insideOutInputBuffer, CL_TRUE, 0, dataPixelOut.size() *
sizeof(uint64_t), dataPixelOut.data(),
nullptr, &evtWriteIn);
290 std::vector<cl::Event> waitAfterWrite{evtWriteIn};
291 m_queue.enqueueTask(m_insideOutInput, &waitAfterWrite, &evtKInputDone);
292 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evtKOutputDone);
302 std::vector<cl::Event> waitForRead{evtKOutputDone};
303 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
306 cl::Event::waitForEvents({evtReadOut});
307 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
309 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
314 cl::Event evtSEWriteIn;
315 cl::Event evtSEKInputDone;
316 cl::Event evtSEKOutputDone;
318 cl::Event evtBufferTransfer;
321 cl::Event evtKInputDone;
322 cl::Event evtKOutputDone;
323 cl::Event evtReadOut;
326 const size_t pixel_size_bytesIN = pixelDataIN.size() *
sizeof(uint64_t);
328 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
329 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(pixelDataIN.size()));
331 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
334 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
335 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
339 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixel_size_bytesIN, pixelDataIN.data(),
nullptr, &evtSEWriteIn);
343 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
344 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
345 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
349 std::vector<cl::Event> waitAfterSE{evtSEKOutputDone};
353 std::vector<cl::Event> waitAfterTransfer{evtBufferTransfer};
354 m_queue.enqueueTask(m_insideOutInput, &waitAfterTransfer, &evtKInputDone);
355 m_queue.enqueueTask(m_insideOutOutput, NULL, &evtKOutputDone);
359 std::vector<cl::Event> waitForRead{evtKOutputDone};
365 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
368 cl::Event::waitForEvents({evtReadOut});
369 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
371 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
372 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
381 const int* pixelInputSize{
nullptr}, *stripInputSize{
nullptr};
386 m_pixelClusteringKernel.setArg(0, m_pixelClusterInputBuffer);
387 m_pixelClusteringKernel.setArg(1, m_pixelClusterOutputBuffer);
388 m_pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBuffer);
390 m_stripClusteringKernel.setArg(0, m_stripClusterInputBuffer);
391 m_stripClusteringKernel.setArg(1, m_stripClusterOutputBuffer);
392 m_stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBuffer);
393 m_stripClusteringKernel.setArg(3,
static_cast<unsigned int>(*stripInputSize));
395 m_stripL2GKernel.setArg(0, m_stripL2GInputBuffer);
396 m_stripL2GKernel.setArg(1, m_stripL2GEDMInputBuffer);
397 m_stripL2GKernel.setArg(2, m_stripL2GOutputBuffer);
398 m_stripL2GKernel.setArg(3, m_stripL2GEDMOutputBuffer);
400 m_pixelEdmPrepKernel.setArg(0, m_edmPixelInputBuffer);
401 m_pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBuffer);
402 m_stripEdmPrepKernel.setArg(0, m_edmStripInputBuffer);
403 m_stripEdmPrepKernel.setArg(1, m_edmStripOutputBuffer);
406 cl::Event evt_pixel_input_write, evt_strip_input_write;
408 m_queue.enqueueWriteBuffer(m_pixelClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(),
nullptr, &evt_pixel_input_write);
409 m_queue.enqueueWriteBuffer(m_stripClusterInputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(),
nullptr, &evt_strip_input_write);
411 std::vector<cl::Event> evts_pixel_input_write{evt_pixel_input_write};
412 std::vector<cl::Event> evts_strip_input_write{evt_strip_input_write};
415 cl::Event evt_pixel_clustering_done, evt_strip_clustering_done;
417 m_queue.enqueueTask(m_pixelClusteringKernel, &evts_pixel_input_write, &evt_pixel_clustering_done);
418 m_queue.enqueueTask(m_stripClusteringKernel, &evts_strip_input_write, &evt_strip_clustering_done);
421 cl::Event evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm;
422 std::vector<cl::Event> evts_strip_clustering_done{evt_strip_clustering_done};
427 std::vector<cl::Event> evts_strip_l2g_input_copies{evt_strip_l2g_input_copy_clusters, evt_strip_l2g_input_copy_edm};
430 cl::Event evt_strip_l2g_done;
431 m_queue.enqueueTask(m_stripL2GKernel, &evts_strip_l2g_input_copies, &evt_strip_l2g_done);
434 cl::Event evt_pixel_edm_input_copy, evt_strip_edm_input_copy;
435 std::vector<cl::Event> evts_pixel_clustering_done{evt_pixel_clustering_done};
436 std::vector<cl::Event> evts_strip_l2g_done{evt_strip_l2g_done};
442 cl::Event evt_pixel_edm_prep_done, evt_strip_edm_prep_done;
443 std::vector<cl::Event> evts_pixel_edm_input_copied{evt_pixel_edm_input_copy};
444 std::vector<cl::Event> evts_strip_edm_input_copied{evt_strip_edm_input_copy};
446 m_queue.enqueueTask(m_pixelEdmPrepKernel, &evts_pixel_edm_input_copied, &evt_pixel_edm_prep_done);
447 m_queue.enqueueTask(m_stripEdmPrepKernel, &evts_strip_edm_input_copied, &evt_strip_edm_prep_done);
456 cl::Event evt_pixel_edm_read_done, evt_strip_edm_read_done;
457 std::vector<cl::Event> evts_pixel_edm_prep_done{evt_pixel_edm_prep_done};
458 std::vector<cl::Event> evts_strip_edm_prep_done{evt_strip_edm_prep_done};
460 m_queue.enqueueReadBuffer(m_edmPixelOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evts_pixel_edm_prep_done, &evt_pixel_edm_read_done);
461 m_queue.enqueueReadBuffer(m_edmStripOutputBuffer, CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evts_strip_edm_prep_done, &evt_strip_edm_read_done);
463 cl::Event::waitForEvents(std::vector<cl::Event>{evt_pixel_edm_read_done, evt_strip_edm_read_done});
466 if (pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0;
467 if (stripInput->size() == 6) (*FPGAStripOutput)[0] = 0;
473 cl::Event evt_pixel_cluster_output_read;
476 m_queue.enqueueReadBuffer(m_pixelClusterOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &evts_pixel_clustering_done, &evt_pixel_cluster_output_read);
480 int nWords =
static_cast<int>(pixelClusterOut.size()) - 1;
481 for (; nWords >= 0; nWords--)
483 if (pixelClusterOut[nWords] == 0xcd00000000000000)
break;
488 ATH_MSG_ERROR(
"Footer 0xcd00000000000000 not found in pixelClusterOut; cannot determine nWords");
return StatusCode::FAILURE;
492 if (nWords > 0) nWords += 3;
495 for (
int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size()); i++)
497 pixelClusterOut[nWords + i] = 0;
502 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
503 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(nWords));
504 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
506 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
507 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
510 cl::Event evt_se_input_write;
512 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelClusterOut.size() *
sizeof(uint64_t), pixelClusterOut.data(),
nullptr, &evt_se_input_write);
517 cl::Event evt_se_kernel_input_done, evt_se_kernel_output_done;
518 std::vector<cl::Event> evts_after_se_input_write{evt_se_input_write};
520 m_queue.enqueueTask(m_slicingEngineInput, &evts_after_se_input_write, &evt_se_kernel_input_done);
521 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evt_se_kernel_output_done);
525 cl::Event evt_io_input_transfer;
526 std::vector<cl::Event> evts_after_se_output_done{evt_se_kernel_output_done};
532 cl::Event evt_io_kernel_input_done, evt_io_kernel_output_done;
533 std::vector<cl::Event> evts_after_io_input_transfer{evt_io_input_transfer};
535 m_queue.enqueueTask(m_insideOutInput, &evts_after_io_input_transfer, &evt_io_kernel_input_done);
536 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evt_io_kernel_output_done);
543 cl::Event evt_io_output_read;
544 std::vector<cl::Event> evts_before_insideout_read{evt_io_kernel_output_done};
545 m_queue.enqueueReadBuffer(m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evts_before_insideout_read, &evt_io_output_read);
547 cl::Event::waitForEvents(std::vector<cl::Event>{evt_io_output_read});
548 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
553 m_SE_kernelTime += evt_se_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_se_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
554 m_IO_kernelTime += evt_io_kernel_output_done.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_io_kernel_input_done.getProfilingInfo<CL_PROFILING_COMMAND_START>();
558 return StatusCode::SUCCESS;