16 std::string full_cu_name = kernel_name +
":{" + kernel_name +
"_" +
std::to_string(cu) +
"}";
24 auto withEvt = [&](
const std::string&
fname) {
25 const auto evt = ctx.eventID().event_number();
27 if (
dot == std::string::npos) {
35 std::ofstream
outputFile(withEvt(dataDescriptor));
38 outputFile << std::hex << std::setw(16) << std::setfill(
'0') <<
d <<
'\n';
65 cl_int
err = CL_SUCCESS;
94 if (
err != CL_SUCCESS) {
95 return StatusCode::FAILURE;
117 return StatusCode::SUCCESS;
130 auto outputVec = std::make_unique<std::vector<uint64_t>>();
139 std::vector<uint64_t> pixelDataIN;
140 std::vector<uint64_t> stripDataIN;
144 dumpHexData(pixelDataIN,
"FPGATrackSim_slicingIn_pixel.txt", ctx);
145 dumpHexData(stripDataIN,
"FPGATrackSim_slicingIn_strip.txt", ctx);
148 std::vector<uint64_t> dataPixelOut;
149 std::vector<uint64_t> dataStripOut;
153 dumpHexData(dataPixelOut,
"FPGATrackSim_slicingOut_pixel.txt", ctx);
154 dumpHexData(dataStripOut,
"FPGATrackSim_slicingOut_strip.txt", ctx);
157 std::vector<uint64_t> dataInsideOut;
159 dumpHexData(dataInsideOut,
"FPGATrackSim_insideOut.txt", ctx);
162 cl_int
err = CL_SUCCESS;
185 m_slicingEngineInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_ONLY, pixelDataIN.size() *
sizeof(
uint64_t),
nullptr, &
err);
187 m_insideOutInputBuffer = cl::Buffer(
m_context, CL_MEM_READ_WRITE, dataPixelOut.size() *
sizeof(
uint64_t),
nullptr, &
err);
198 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
199 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(pixelDataIN.size()));
201 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
204 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixelDataIN.size() *
sizeof(
uint64_t), pixelDataIN.data(),
nullptr, &evtSEWriteIn);
208 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
209 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
210 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
215 std::vector<cl::Event> waitForSERead{evtSEKOutputDone};
219 cl::Event::waitForEvents({evtSEReadOut});
221 dumpHexData(out_data,
"HW_slicingOut_pixel.txt", ctx);
223 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
234 m_insideOutInput.setArg(0, m_insideOutInputBuffer);
235 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
238 m_queue.enqueueWriteBuffer(m_insideOutInputBuffer, CL_TRUE, 0, dataPixelOut.size() *
sizeof(
uint64_t), dataPixelOut.data(),
nullptr, &evtWriteIn);
242 std::vector<cl::Event> waitAfterWrite{evtWriteIn};
243 m_queue.enqueueTask(m_insideOutInput, &waitAfterWrite, &evtKInputDone);
244 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evtKOutputDone);
253 std::vector<cl::Event> waitForRead{evtKOutputDone};
254 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
257 cl::Event::waitForEvents({evtReadOut});
258 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
260 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
274 const size_t pixel_size_bytesIN = pixelDataIN.size() *
sizeof(
uint64_t);
276 m_slicingEngineInput.setArg(0, m_slicingEngineInputBuffer);
277 m_slicingEngineInput.setArg(2,
static_cast<unsigned long long>(pixelDataIN.size()));
279 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
282 m_insideOutInput.setArg(0, m_slicingEngineOutputBuffer);
283 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
287 m_queue.enqueueWriteBuffer(m_slicingEngineInputBuffer, CL_FALSE, 0, pixel_size_bytesIN, pixelDataIN.data(),
nullptr, &evtSEWriteIn);
291 std::vector<cl::Event> waitAfterSEWrite{evtSEWriteIn};
292 m_queue.enqueueTask(m_slicingEngineInput, &waitAfterSEWrite, &evtSEKInputDone);
293 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evtSEKOutputDone);
297 std::vector<cl::Event> waitAfterSE{evtSEKOutputDone};
298 m_queue.enqueueTask(m_insideOutInput, &waitAfterSE, &evtKInputDone);
299 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evtKOutputDone);
303 std::vector<cl::Event> waitForRead{evtKOutputDone};
309 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForRead, &evtReadOut);
312 cl::Event::waitForEvents({evtReadOut});
313 dumpHexData((*FPGATrackOutput),
"HW_insideOut.txt", ctx);
315 m_SE_kernelTime += evtSEKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtSEKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
316 m_IO_kernelTime += evtKOutputDone.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evtKInputDone.getProfilingInfo<CL_PROFILING_COMMAND_START>();
326 m_pixelClusteringKernel.setArg(0, m_pixelClusterInputBuffer);
327 m_pixelClusteringKernel.setArg(1, m_pixelClusterEDMOutputBuffer);
329 m_stripClusteringKernel.setArg(0, m_stripClusterInputBuffer);
330 m_stripClusteringKernel.setArg(1, m_stripClusterOutputBuffer);
331 m_stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBuffer);
332 m_stripClusteringKernel.setArg(3,
static_cast<unsigned int>((*stripInput).size()));
334 m_stripL2GKernel.setArg(0, m_stripClusterOutputBuffer);
335 m_stripL2GKernel.setArg(1, m_stripClusterEDMOutputBuffer);
336 m_stripL2GKernel.setArg(2, m_stripL2GOutputBuffer);
337 m_stripL2GKernel.setArg(3, m_stripL2GEDMOutputBuffer);
339 m_pixelEdmPrepKernel.setArg(0, m_pixelClusterEDMOutputBuffer);
340 m_pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBuffer);
341 m_stripEdmPrepKernel.setArg(0, m_stripL2GEDMOutputBuffer);
342 m_stripEdmPrepKernel.setArg(1, m_edmStripOutputBuffer);
344 m_slicingEngineInput.setArg(0, m_pixelClusterInputBuffer);
345 m_slicingEngineInput.setArg(3,
static_cast<unsigned int>((*pixelInput).size()));
347 m_slicingEngineOutput.setArg(1, m_slicingEngineOutputBuffer);
349 m_insideOutInput.setArg(0, m_slicingEngineOutputBuffer);
350 m_insideOutOutput.setArg(0, m_insideOutOutputBuffer);
356 m_queue.enqueueWriteBuffer(m_pixelClusterInputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * (*pixelInput).size(), (*pixelInput).data(),
nullptr, &evt_write_pixel_input);
357 m_queue.enqueueWriteBuffer(m_stripClusterInputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * (*stripInput).size(), (*stripInput).data(),
nullptr, &evt_write_strip_input);
358 std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
359 std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
375 m_queue.enqueueTask(m_pixelClusteringKernel, &evt_vec_pixel_input, &evt_pixel_clustering);
376 m_queue.enqueueTask(m_stripClusteringKernel, &evt_vec_strip_input, &evt_strip_clustering);
378 std::vector<cl::Event> evt_vec_strip_clustering{evt_strip_clustering};
379 m_queue.enqueueTask(m_stripL2GKernel, &evt_vec_strip_clustering, &evt_strip_l2g);
381 std::vector<cl::Event> evt_vec_pixelEDM {evt_pixel_clustering};
382 std::vector<cl::Event> evt_vec_strip_l2g{evt_strip_l2g};
383 m_queue.enqueueTask(m_pixelEdmPrepKernel, &evt_vec_pixelEDM, &evt_pixel_edm_prep);
384 m_queue.enqueueTask(m_stripEdmPrepKernel, &evt_vec_strip_l2g, &evt_strip_edm_prep);
387 std::vector<cl::Event> evt_vec_pixelClustering{evt_pixel_clustering};
388 m_queue.enqueueTask(m_slicingEngineInput, &evt_vec_pixelClustering, &evt_Slicing_InputDone);
389 m_queue.enqueueTask(m_slicingEngineOutput,
nullptr, &evt_Slicing_OutputDone);
392 std::vector<cl::Event> evt_vec_slicing{evt_Slicing_OutputDone};
393 m_queue.enqueueTask(m_insideOutInput, &evt_vec_slicing, &evt_Insideout_InputDone);
394 m_queue.enqueueTask(m_insideOutOutput,
nullptr, &evt_Insideout_OutputDone);
406 std::vector<cl::Event> evt_vec_pixel_edm_prep{evt_pixel_edm_prep};
407 std::vector<cl::Event> evt_vec_strip_edm_prep{evt_strip_edm_prep};
408 std::vector<cl::Event> waitForInsideout{evt_Insideout_OutputDone};
409 m_queue.enqueueReadBuffer(m_edmPixelOutputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_edm_prep, &evt_pixel_cluster_output);
410 m_queue.enqueueReadBuffer(m_edmStripOutputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_edm_prep, &evt_strip_cluster_output);
411 m_queue.enqueueReadBuffer( m_insideOutOutputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &waitForInsideout, &evt_track_output);
413 std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_strip_cluster_output, evt_track_output };
414 cl::Event::waitForEvents(wait_for_reads);
418 return StatusCode::SUCCESS;
431 return StatusCode::SUCCESS;