8 #include <xrt/xrt_bo.h>
9 #include <xrt/xrt_device.h>
10 #include <xrt/xrt_kernel.h>
11 #include <xrt/xrt_uuid.h>
45 std::vector<std::string> listofCUs;
58 for(
unsigned int i = 0;
i < nthreads;
i++)
72 for (
const auto& cuName: listofCUs)
94 return StatusCode::SUCCESS;
103 const std::vector<uint64_t>* pixelInput{
nullptr}, *stripInput{
nullptr};
107 const int* pixelInputSize{
nullptr}, *stripInputSize{
nullptr};
118 size_t bufferIndex = ctx.slot() % nthreads;
121 size_t pixelStartClusterIndex = ctx.slot() % m_pixelStartClusteringKernels.size();
122 size_t pixelEndClusterIndex = ctx.slot() % m_pixelEndClusteringKernels.size();
123 size_t stripStartClusterIndex = ctx.slot() % m_stripStartClusteringKernels.size();
124 size_t stripEndClusterIndex = ctx.slot() % m_stripEndClusteringKernels.size();
127 const cl::CommandQueue &acc_queue =
m_acc_queues[bufferIndex];
129 ATH_MSG_INFO(
"Thread number "<<ctx.slot()<<
" running on buffer "<<bufferIndex<<
" pixelStartClusterIndex: "<< pixelStartClusterIndex<<
" stripStartClusterIndex: "<< stripStartClusterIndex<<
" stripEndClusterIndex: "<< stripEndClusterIndex);
131 cl::Kernel &pixelStartClusteringKernel = m_pixelStartClusteringKernels[pixelStartClusterIndex];
132 cl::Kernel &pixelEndClusteringKernel = m_pixelEndClusteringKernels[pixelEndClusterIndex];
133 cl::Kernel &stripStartClusteringKernel = m_stripStartClusteringKernels[stripStartClusterIndex];
134 cl::Kernel &stripEndClusteringKernel = m_stripEndClusteringKernels[stripEndClusterIndex];
138 pixelStartClusteringKernel.setArg(2,
static_cast<unsigned long long>(*pixelInputSize));
144 stripStartClusteringKernel.setArg(2,
static_cast<unsigned long long>(*stripInputSize));
154 std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
155 std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
166 acc_queue.enqueueTask(pixelStartClusteringKernel, &evt_vec_pixel_input, &evt_pixel_start_clustering);
167 acc_queue.enqueueTask(pixelEndClusteringKernel, NULL , &evt_pixel_end_clustering);
169 acc_queue.enqueueTask(stripStartClusteringKernel, &evt_vec_strip_input, &evt_strip_start_clustering);
170 acc_queue.enqueueTask(stripEndClusteringKernel, NULL, &evt_strip_end_clustering);
177 std::vector<cl::Event> evt_vec_pixel_done{evt_pixel_end_clustering};
178 std::vector<cl::Event> evt_vec_strip_done{evt_strip_end_clustering};
189 acc_queue.enqueueReadBuffer(
m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(
uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_done, &evt_pixel_cluster_output);
190 acc_queue.enqueueReadBuffer(
m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(
uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_done, &evt_strip_cluster_output);
192 std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_strip_cluster_output};
193 cl::Event::waitForEvents(wait_for_reads);
196 if(*pixelInputSize == 6) (*FPGAPixelOutput)[0] = 0;
197 if(*stripInputSize == 6) (*FPGAStripOutput)[0] = 0;
202 cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
206 cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
210 cl_ulong pixel_clustering_time = evt_pixel_end_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
214 cl_ulong strip_clustering_time = evt_strip_end_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
218 cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
222 cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
225 return StatusCode::SUCCESS;
243 return StatusCode::SUCCESS;
248 xrt::xclbin xrt_xclbin(
m_xclbin.value());
251 ATH_MSG_INFO(
"fpga name: "<<xrt_xclbin.get_fpga_device_name());
252 ATH_MSG_INFO(
"uuid: "<<xrt_xclbin.get_uuid().to_string());
254 for (
const xrt::xclbin::kernel &kernel : xrt_xclbin.get_kernels()) {
255 const std::string& kernelName = kernel.get_name();
261 const std::string& computeUnitName = computeUnit.get_name();
262 const std::string computeUnitIsolatedName = computeUnitName.substr(kernelName.size() + 1);
264 const std::string computeUnitUsableName = kernelName +
":{" + computeUnitIsolatedName +
"}";
267 cuNames.push_back(computeUnitUsableName);