43 return StatusCode::SUCCESS;
65 uint64_t *stripClusters = stripOutput.data();
66 uint64_t *pixelClusters = pixelOutput.data();
68 unsigned int numStripClusters = stripClusters[0];
71 unsigned int numPixelClusters = pixelClusters[0];
74 std::unique_ptr<EFTrackingTransient::Metadata>
metadata =
75 std::make_unique<EFTrackingTransient::Metadata>();
77 metadata->numOfStripClusters = numStripClusters;
78 metadata->scRdoIndexSize = numStripClusters;
79 metadata->numOfPixelClusters = numPixelClusters;
80 metadata->pcRdoIndexSize = numPixelClusters;
93 for (
unsigned int i = 0;
i < numStripClusters;
i++)
147 for (
unsigned int i = 0;
i < numStripClusters;
i++)
165 for (
unsigned int i = 0;
i < numPixelClusters;
i++)
247 for (
unsigned int i = 0;
i < numPixelClusters;
i++)
267 return StatusCode::SUCCESS;
276 if (!scContainerHandle.
isValid())
279 return StatusCode::FAILURE;
283 if (!pcContainerHandle.
isValid())
286 return StatusCode::FAILURE;
290 std::vector<uint64_t> encodedStripClusters;
291 std::vector<uint64_t> encodedPixelClusters;
302 kernel.setArg<cl::Buffer>(0, inputPixelBuffer);
303 kernel.setArg<cl::Buffer>(1, inputStripBuffer);
304 kernel.setArg<cl::Buffer>(2, outputPixelBuffer);
305 kernel.setArg<cl::Buffer>(3, outputStripBuffer);
309 acc_queue.enqueueWriteBuffer(inputPixelBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * encodedPixelClusters.size(), encodedPixelClusters.data(), NULL, NULL);
310 acc_queue.enqueueWriteBuffer(inputStripBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * encodedStripClusters.size(), encodedStripClusters.data(), NULL, NULL);
316 acc_queue.enqueueTask(kernel);
323 acc_queue.enqueueReadBuffer(outputPixelBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * pixelChainOutput.size(), pixelChainOutput.data(), NULL, NULL);
324 acc_queue.enqueueReadBuffer(outputStripBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * stripChainOutput.size(), stripChainOutput.data(), NULL, NULL);
328 return StatusCode::SUCCESS;
341 std::vector<uint64_t> encodedPixelRDO;
342 std::vector<uint64_t> encodedStripRDO;
348 for (
unsigned int i = 0;
i < encodedPixelRDO.size();
i++)
350 ATH_MSG_DEBUG(
"Pixel RDO[" <<
i <<
"]: " << std::hex << encodedPixelRDO[
i] << std::dec);
352 for (
unsigned int i = 0;
i < encodedStripRDO.size();
i++)
354 ATH_MSG_DEBUG(
"Strip RDO[" <<
i <<
"]: " << std::hex << encodedStripRDO[
i] << std::dec);
359 cl::Buffer pixelClusterInputBuffer(
m_context, CL_MEM_READ_ONLY,
sizeof(
uint64_t) * encodedPixelRDO.size(), NULL, &
err);
360 cl::Buffer stripClusterInputBuffer(
m_context, CL_MEM_READ_ONLY,
sizeof(
uint64_t) * encodedStripRDO.size(), NULL, &
err);
378 pixelClusteringKernel.setArg<cl::Buffer>(0, pixelClusterInputBuffer);
379 pixelClusteringKernel.setArg<cl::Buffer>(1, pixelClusterOutputBuffer);
380 pixelClusteringKernel.setArg<cl::Buffer>(2, pixelClusterEDMOutputBuffer);
383 stripClusteringKernel.setArg<cl::Buffer>(0, stripClusterInputBuffer);
384 stripClusteringKernel.setArg<cl::Buffer>(1, stripClusterOutputBuffer);
385 stripClusteringKernel.setArg<cl::Buffer>(2, stripClusterEDMOutputBuffer);
386 stripClusteringKernel.setArg<
unsigned int>(3, encodedStripRDO.size());
390 pixelL2GKernel.setArg<cl::Buffer>(0, pixelClusterOutputBuffer);
391 pixelL2GKernel.setArg<cl::Buffer>(1, pixelClusterEDMOutputBuffer);
392 pixelL2GKernel.setArg<cl::Buffer>(2, pixelL2GOutputBuffer);
393 pixelL2GKernel.setArg<cl::Buffer>(3, pixelL2GEDMOutputBuffer);
396 stripL2GKernel.setArg<cl::Buffer>(0, stripClusterOutputBuffer);
397 stripL2GKernel.setArg<cl::Buffer>(1, stripClusterEDMOutputBuffer);
398 stripL2GKernel.setArg<cl::Buffer>(2, stripL2GOutputBuffer);
399 stripL2GKernel.setArg<cl::Buffer>(3, stripL2GEDMOutputBuffer);
403 edmPrepKernel.setArg<cl::Buffer>(0, pixelL2GEDMOutputBuffer);
404 edmPrepKernel.setArg<cl::Buffer>(1, stripL2GEDMOutputBuffer);
405 edmPrepKernel.setArg<cl::Buffer>(2, edmPixelOutputBuffer);
406 edmPrepKernel.setArg<cl::Buffer>(3, edmStripOutputBuffer);
412 acc_queue.enqueueWriteBuffer(pixelClusterInputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * encodedPixelRDO.size(), encodedPixelRDO.data(), NULL, &cl_evt_write_pixel_input);
413 acc_queue.enqueueWriteBuffer(stripClusterInputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * encodedStripRDO.size(), encodedStripRDO.data(), NULL, &cl_evt_write_strip_input);
414 std::vector<cl::Event> cl_evt_vec_pixel_input{cl_evt_write_pixel_input};
415 std::vector<cl::Event> cl_evt_vec_strip_input{cl_evt_write_strip_input};
427 acc_queue.enqueueTask(pixelClusteringKernel, &cl_evt_vec_pixel_input, &cl_evt_pixel_clustering);
428 acc_queue.enqueueTask(stripClusteringKernel, &cl_evt_vec_strip_input, &cl_evt_strip_clustering);
430 std::vector<cl::Event> cl_evt_vec_pixel_clustering{cl_evt_pixel_clustering};
431 std::vector<cl::Event> cl_evt_vec_strip_clustering{cl_evt_strip_clustering};
432 acc_queue.enqueueTask(pixelL2GKernel, &cl_evt_vec_pixel_clustering, &cl_evt_pixel_l2g);
433 acc_queue.enqueueTask(stripL2GKernel, &cl_evt_vec_strip_clustering, &cl_evt_strip_l2g);
434 std::vector<cl::Event> cl_evt_vec_l2g{cl_evt_pixel_l2g, cl_evt_strip_l2g};
436 acc_queue.enqueueTask(edmPrepKernel, &cl_evt_vec_l2g, &cl_evt_edm_prep);
444 acc_queue.enqueueReadBuffer(edmPixelOutputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * pixelChainOutput.size(), pixelChainOutput.data(), NULL, &cl_evt_pixel_cluster_output);
445 acc_queue.enqueueReadBuffer(edmStripOutputBuffer, CL_FALSE, 0,
sizeof(
uint64_t) * stripChainOutput.size(), stripChainOutput.data(), NULL, &cl_evt_strip_cluster_output);
450 cl_ulong pixel_input_start = cl_evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
451 cl_ulong pixel_input_end = cl_evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>();
452 cl_ulong pixel_input_time = pixel_input_end - pixel_input_start;
454 ATH_MSG_DEBUG(
"Pixel input buffer write time: " << pixel_input_time / 1e6 <<
" ms");
457 cl_ulong strip_input_start = cl_evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
458 cl_ulong strip_input_end = cl_evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>();
459 cl_ulong strip_input_time = strip_input_end - strip_input_start;
461 ATH_MSG_DEBUG(
"Strip input buffer write time: " << strip_input_time / 1e6 <<
" ms");
464 cl_ulong pixel_clustering_start = cl_evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
465 cl_ulong pixel_clustering_end = cl_evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>();
466 cl_ulong pixel_clustering_time = pixel_clustering_end - pixel_clustering_start;
468 ATH_MSG_DEBUG(
"Pixel clustering time: " << pixel_clustering_time / 1e6 <<
" ms");
471 cl_ulong strip_clustering_start = cl_evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
472 cl_ulong strip_clustering_end = cl_evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>();
473 cl_ulong strip_clustering_time = strip_clustering_end - strip_clustering_start;
475 ATH_MSG_DEBUG(
"Strip clustering time: " << strip_clustering_time / 1e6 <<
" ms");
478 cl_ulong pixel_l2g_start = cl_evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
479 cl_ulong pixel_l2g_end = cl_evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>();
480 cl_ulong pixel_l2g_time = pixel_l2g_end - pixel_l2g_start;
482 ATH_MSG_DEBUG(
"Pixel L2G time: " << pixel_l2g_time / 1e6 <<
" ms");
485 cl_ulong strip_l2g_start = cl_evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
486 cl_ulong strip_l2g_end = cl_evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>();
487 cl_ulong strip_l2g_time = strip_l2g_end - strip_l2g_start;
489 ATH_MSG_DEBUG(
"Strip L2G time: " << strip_l2g_time / 1e6 <<
" ms");
492 cl_ulong edm_prep_start = cl_evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
493 cl_ulong edm_prep_end = cl_evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>();
494 cl_ulong edm_prep_time = edm_prep_end - edm_prep_start;
496 ATH_MSG_DEBUG(
"EDMPrep time: " << edm_prep_time / 1e6 <<
" ms");
499 cl_ulong kernel_start = cl_evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
500 cl_ulong kernel_end = cl_evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>();
501 cl_ulong kernel_time = kernel_end - kernel_start;
503 ATH_MSG_DEBUG(
"Kernel execution time: " << kernel_time / 1e6 <<
" ms");
506 cl_ulong pixel_output_start = cl_evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
507 cl_ulong pixel_output_end = cl_evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>();
508 cl_ulong pixel_output_time = pixel_output_end - pixel_output_start;
510 ATH_MSG_DEBUG(
"Pixel output buffer read time: " << pixel_output_time / 1e6 <<
" ms");
513 cl_ulong strip_output_start = cl_evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
514 cl_ulong strip_output_end = cl_evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>();
515 cl_ulong strip_output_time = strip_output_end - strip_output_start;
517 ATH_MSG_DEBUG(
"Strip output buffer read time: " << strip_output_time / 1e6 <<
" ms");
519 return StatusCode::SUCCESS;
540 return StatusCode::SUCCESS;