103 const std::vector<uint64_t>* pixelInput{
nullptr}, *stripInput{
nullptr};
107 const int* pixelInputSize{
nullptr}, *stripInputSize{
nullptr};
118 size_t bufferIndex = ctx.slot() % nthreads;
121 size_t pixelStartClusterIndex = ctx.slot() % m_pixelStartClusteringKernels.size();
122 size_t pixelEndClusterIndex = ctx.slot() % m_pixelEndClusteringKernels.size();
123 size_t stripStartClusterIndex = ctx.slot() % m_stripStartClusteringKernels.size();
124 size_t stripEndClusterIndex = ctx.slot() % m_stripEndClusteringKernels.size();
127 const cl::CommandQueue &acc_queue =
m_acc_queues[bufferIndex];
129 ATH_MSG_INFO(
"Thread number "<<ctx.slot()<<
" running on buffer "<<bufferIndex<<
" pixelStartClusterIndex: "<< pixelStartClusterIndex<<
" stripStartClusterIndex: "<< stripStartClusterIndex<<
" stripEndClusterIndex: "<< stripEndClusterIndex);
131 cl::Kernel &pixelStartClusteringKernel = m_pixelStartClusteringKernels[pixelStartClusterIndex];
132 cl::Kernel &pixelEndClusteringKernel = m_pixelEndClusteringKernels[pixelEndClusterIndex];
133 cl::Kernel &stripStartClusteringKernel = m_stripStartClusteringKernels[stripStartClusterIndex];
134 cl::Kernel &stripEndClusteringKernel = m_stripEndClusteringKernels[stripEndClusterIndex];
138 pixelStartClusteringKernel.setArg(2,
static_cast<unsigned long long>(*pixelInputSize));
144 stripStartClusteringKernel.setArg(2,
static_cast<unsigned long long>(*stripInputSize));
149 cl::Event evt_write_pixel_input;
150 cl::Event evt_write_strip_input;
152 acc_queue.enqueueWriteBuffer(
m_pixelClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), NULL, &evt_write_pixel_input);
153 acc_queue.enqueueWriteBuffer(
m_stripClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), NULL, &evt_write_strip_input);
154 std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
155 std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
158 cl::Event evt_pixel_start_clustering;
159 cl::Event evt_pixel_end_clustering;
160 cl::Event evt_strip_start_clustering;
161 cl::Event evt_strip_end_clustering;
166 acc_queue.enqueueTask(pixelStartClusteringKernel, &evt_vec_pixel_input, &evt_pixel_start_clustering);
167 acc_queue.enqueueTask(pixelEndClusteringKernel, NULL , &evt_pixel_end_clustering);
169 acc_queue.enqueueTask(stripStartClusteringKernel, &evt_vec_strip_input, &evt_strip_start_clustering);
170 acc_queue.enqueueTask(stripEndClusteringKernel, NULL, &evt_strip_end_clustering);
174 cl::Event evt_pixel_cluster_output;
175 cl::Event evt_strip_cluster_output;
177 std::vector<cl::Event> evt_vec_pixel_done{evt_pixel_end_clustering};
178 std::vector<cl::Event> evt_vec_strip_done{evt_strip_end_clustering};
189 acc_queue.enqueueReadBuffer(
m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_done, &evt_pixel_cluster_output);
190 acc_queue.enqueueReadBuffer(
m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_done, &evt_strip_cluster_output);
192 std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_strip_cluster_output};
193 cl::Event::waitForEvents(wait_for_reads);
196 if(*pixelInputSize == 6) (*FPGAPixelOutput)[0] = 0;
197 if(*stripInputSize == 6) (*FPGAStripOutput)[0] = 0;
202 cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
206 cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
210 cl_ulong pixel_clustering_time = evt_pixel_end_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
214 cl_ulong strip_clustering_time = evt_strip_end_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
218 cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
222 cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
225 return StatusCode::SUCCESS;