ATLAS Offline Software
F100StreamIntegrationAlg.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3  */
4 
6 #include "AthenaKernel/Chrono.h"
8 #include <xrt/xrt_bo.h>
9 #include <xrt/xrt_device.h>
10 #include <xrt/xrt_kernel.h>
11 #include <xrt/xrt_uuid.h>
12 
14 {
16  {
17  ATH_MSG_INFO("Running on the FPGA accelerator");
18 
20 
21  ATH_CHECK(m_chronoSvc.retrieve());
22 
23  {
24  Athena::Chrono chrono("Platform and device initlize", m_chronoSvc.get());
26  }
27 
28  {
29  Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
31  }
32  ATH_MSG_INFO("loading "<<m_xclbin);
33 
34 
37 
40 
41  std::vector<std::string> listofCUs;
42 
43  getListofCUs(listofCUs);
44 
45  cl_int err = 0;
46 
47  unsigned int nthreads = m_FPGAThreads.value();
48 
49  if(m_FPGAThreads.value() < 1){
50  nthreads = SG::getNSlots();
51  }
52 
53  // create the buffers
54  for(unsigned int i = 0; i < nthreads; i++)
55  {
56  m_acc_queues.emplace_back(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
57 
58  // Input
59  m_pixelClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), NULL, &err));
60  m_stripClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), NULL, &err));
61 
62  // Clustering
63  m_pixelClusterOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
64  m_stripClusterOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
65  m_pixelClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
66  m_stripClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
67 
68  // L2G
69  m_pixelL2GOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
70  m_pixelL2GEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
71  m_stripL2GOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
72  m_stripL2GEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
73 
74  // EDMPrep
75  m_edmPixelOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint64_t), NULL, &err));
76  m_edmStripOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint64_t), NULL, &err));
77  }
78 
79  // Create kernels for each one of CUs that is inside device
80  for (const auto& cuName: listofCUs)
81  {
82  // Pixel clustering
83  if(cuName.find(m_pixelEndClusterKernelName.value()) != std::string::npos) m_pixelEndClusteringClusterKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
84  else if(cuName.find(m_pixelEndClusterEdmKernelName.value()) != std::string::npos) m_pixelEndClusteringEDMKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
85  else if(cuName.find(m_pixelStartClusterKernelName.value()) != std::string::npos) m_pixelStartClusteringKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
86 
87  // Strip clustering
88  else if(cuName.find(m_stripEndClusterKernelName.value()) != std::string::npos) m_stripEndClusteringKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
89  else if(cuName.find(m_stripStartClusterKernelName.value()) != std::string::npos) m_stripStartClusteringKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
90 
91  // Pixel L2G
92  else if(cuName.find(m_pixelL2GKernelName.value()) != std::string::npos) m_pixelL2GKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
93 
94  // Strip L2G
95  else if(cuName.find(m_stripL2GKernelName.value()) != std::string::npos) m_stripL2GKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
96 
97  // EDM prep
98  else if(cuName.find(m_pixelEdmKernelName.value()) != std::string::npos) m_pixelEdmPrepKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
99 
100  else if(cuName.find(m_stripEdmKernelName.value()) != std::string::npos) m_stripEdmPrepKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
101  else
102  {
103  ATH_MSG_WARNING("Do not recognize kernel name: "<<cuName);
104  }
105  }
106 
107  ATH_MSG_INFO(m_pixelStartClusterKernelName.value()<<" size: "<<m_pixelStartClusteringKernels.size());
108  ATH_MSG_INFO(m_pixelEndClusterKernelName.value()<<" size: "<<m_pixelEndClusteringClusterKernels.size());
109  ATH_MSG_INFO(m_pixelEndClusterEdmKernelName.value()<<" size: "<<m_pixelEndClusteringEDMKernels.size());
110  ATH_MSG_INFO(m_stripStartClusterKernelName.value()<<" size: "<<m_stripStartClusteringKernels.size());
111  ATH_MSG_INFO(m_stripEndClusterKernelName.value()<<" size: "<<m_stripEndClusteringKernels.size());
112  ATH_MSG_INFO(m_stripL2GKernelName.value()<<" size: "<<m_stripL2GKernels.size());
113  ATH_MSG_INFO(m_pixelEdmKernelName.value()<<" size: "<<m_pixelEdmPrepKernels.size());
114  ATH_MSG_INFO(m_stripEdmKernelName.value()<<" size: "<<m_stripEdmPrepKernels.size());
115 
116 
117  return StatusCode::SUCCESS;
118  }
119 
120  StatusCode F100StreamIntegrationAlg::execute(const EventContext &ctx) const
121  {
122  ATH_MSG_DEBUG("Executing F100StreamIntegrationAlg");
123  m_numEvents++;
124 
126  const std::vector<uint64_t>* pixelInput{nullptr}, *stripInput{nullptr};
127  ATH_CHECK(SG::get(pixelInput, m_FPGAPixelRDO, ctx));
128  ATH_CHECK(SG::get(stripInput, m_FPGAStripRDO, ctx));
129 
130 
131  // logic
132  unsigned int nthreads = m_FPGAThreads.value();
133 
134  if(m_FPGAThreads.value() < 1){
135  nthreads = SG::getNSlots();
136  }
137 
138  size_t bufferIndex = ctx.slot() % nthreads;
139 
140  // Get index for each of the kernels
141  size_t pixelStartClusterIndex = ctx.slot() % m_pixelStartClusteringKernels.size();
142  size_t pixelEndClusterIndex = ctx.slot() % m_pixelEndClusteringClusterKernels.size();
143  size_t pixelEndClusterEDMIndex = ctx.slot() % m_pixelEndClusteringEDMKernels.size();
144  size_t stripStartClusterIndex = ctx.slot() % m_stripStartClusteringKernels.size();
145  size_t stripEndClusterIndex = ctx.slot() % m_stripEndClusteringKernels.size();
146  size_t pixelL2GIndex = m_pixelL2GKernels.size() ? ctx.slot() % m_pixelL2GKernels.size() : 0;
147  size_t stripL2GIndex = ctx.slot() % m_stripL2GKernels.size();
148  size_t pixelEDMIndex = m_pixelEdmPrepKernels.size() ? ctx.slot() % m_pixelEdmPrepKernels.size() : 0;
149  size_t stripEDMIndex = m_stripEdmPrepKernels.size() ? ctx.slot() % m_stripEdmPrepKernels.size() : 0;
150 
151  const cl::CommandQueue &acc_queue = m_acc_queues[bufferIndex];
152 
153  ATH_MSG_INFO("Thread number "<<ctx.slot()<<" running on buffer "<<bufferIndex<<" pixelStartClusterIndex: "<< pixelStartClusterIndex<<" stripStartClusterIndex: "<< stripStartClusterIndex<<" stripEndClusterIndex: "<< stripEndClusterIndex<<" stripL2GIndex: "<< stripL2GIndex<<" pixelEDMIndex: "<< pixelEDMIndex<<" stripEDMIndex: "<< stripEDMIndex);
154 
155  cl::Kernel &pixelStartClusteringKernel = m_pixelStartClusteringKernels[pixelStartClusterIndex];
156  cl::Kernel &pixelEndClusteringClusterKernel = m_pixelEndClusteringClusterKernels[pixelEndClusterIndex];
157  cl::Kernel &pixelEndClusteringEDMKernel = m_pixelEndClusteringEDMKernels[pixelEndClusterEDMIndex];
158  cl::Kernel &stripStartClusteringKernel = m_stripStartClusteringKernels[stripStartClusterIndex];
159  cl::Kernel &stripEndClusteringKernel = m_stripEndClusteringKernels[stripEndClusterIndex];
160  cl::Kernel &stripL2GKernel = m_stripL2GKernels[stripL2GIndex];
161  cl::Kernel &pixelL2GKernel = m_pixelL2GKernels[pixelL2GIndex];
162 
163  cl::Kernel &pixelEdmPrepKernel = m_pixelEdmPrepKernels[pixelEDMIndex];
164  cl::Kernel &stripEdmPrepKernel = m_stripEdmPrepKernels[stripEDMIndex];
165 
166 
167  // Set kernel arguments
168  pixelStartClusteringKernel.setArg(0, m_pixelClusterInputBufferList[bufferIndex]);
169  pixelStartClusteringKernel.setArg(2, static_cast<unsigned long long>((*pixelInput).size()));
170 
171  pixelEndClusteringClusterKernel.setArg(1, m_pixelClusterOutputBufferList[bufferIndex]);
172  pixelEndClusteringEDMKernel.setArg(1, m_pixelClusterEDMOutputBufferList[bufferIndex]);
173 
174  stripStartClusteringKernel.setArg(0, m_stripClusterInputBufferList[bufferIndex]);
175  stripStartClusteringKernel.setArg(2, static_cast<unsigned long long>((*stripInput).size()));
176 
177  stripEndClusteringKernel.setArg(1, m_stripClusterOutputBufferList[bufferIndex]);
178 
179  pixelL2GKernel.setArg(0, m_pixelClusterOutputBufferList[bufferIndex]);
180  pixelL2GKernel.setArg(1, m_pixelClusterEDMOutputBufferList[bufferIndex]);
181  pixelL2GKernel.setArg(2, m_pixelL2GOutputBufferList[bufferIndex]);
182  pixelL2GKernel.setArg(3, m_pixelL2GEDMOutputBufferList[bufferIndex]);
183 
184 
185  stripL2GKernel.setArg(0, m_stripClusterOutputBufferList[bufferIndex]);
186  stripL2GKernel.setArg(1, m_stripClusterEDMOutputBufferList[bufferIndex]);
187  stripL2GKernel.setArg(2, m_stripL2GOutputBufferList[bufferIndex]);
188  stripL2GKernel.setArg(3, m_stripL2GEDMOutputBufferList[bufferIndex]);
189 
190  pixelEdmPrepKernel.setArg(0, m_pixelL2GEDMOutputBufferList[bufferIndex]);
191  pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBufferList[bufferIndex]);
192  stripEdmPrepKernel.setArg(0, m_stripL2GEDMOutputBufferList[bufferIndex]);
193  stripEdmPrepKernel.setArg(1, m_edmStripOutputBufferList[bufferIndex]);
194 
195 
196 
197  // Start the transfers
198  cl::Event evt_write_pixel_input;
199  cl::Event evt_write_strip_input;
200 
201  acc_queue.enqueueWriteBuffer(m_pixelClusterInputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), NULL, &evt_write_pixel_input);
202  acc_queue.enqueueWriteBuffer(m_stripClusterInputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), NULL, &evt_write_strip_input);
203  std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
204  std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
205 
206 
207  cl::Event evt_pixel_start_clustering;
208  cl::Event evt_pixel_end_clustering_cluster;
209  cl::Event evt_pixel_end_clustering_edm;
210  cl::Event evt_strip_start_clustering;
211  cl::Event evt_strip_end_clustering;
212  cl::Event evt_strip_l2g;
213  cl::Event evt_pixel_l2g;
214  cl::Event evt_edm_prep;
215  cl::Event evt_pixel_edm_prep;
216  cl::Event evt_strip_edm_prep;
217  {
218  Athena::Chrono chrono("Kernel execution", m_chronoSvc.get());
219  acc_queue.enqueueTask(pixelStartClusteringKernel, &evt_vec_pixel_input, &evt_pixel_start_clustering);
220  acc_queue.enqueueTask(pixelEndClusteringClusterKernel, NULL , &evt_pixel_end_clustering_cluster);
221  acc_queue.enqueueTask(pixelEndClusteringEDMKernel, NULL , &evt_pixel_end_clustering_edm);
222  acc_queue.enqueueTask(stripStartClusteringKernel, &evt_vec_strip_input, &evt_strip_start_clustering);
223  acc_queue.enqueueTask(stripEndClusteringKernel, NULL, &evt_strip_end_clustering);
224 
225  std::vector<cl::Event> evt_vec_pixel_clustering{
226  evt_pixel_end_clustering_cluster,
227  evt_pixel_end_clustering_edm,
228  };
229 
230  acc_queue.enqueueTask(pixelL2GKernel, &evt_vec_pixel_clustering, &evt_pixel_l2g);
231 
232  std::vector<cl::Event> evt_vec_strip_clustering{evt_strip_end_clustering};
233  acc_queue.enqueueTask(stripL2GKernel, &evt_vec_strip_clustering, &evt_strip_l2g);
234 
235  std::vector<cl::Event> evt_vec_pixel_l2g{evt_pixel_l2g};
236  acc_queue.enqueueTask(pixelEdmPrepKernel, &evt_vec_pixel_l2g, &evt_pixel_edm_prep);
237 
238  std::vector<cl::Event> evt_vec_strip_l2g{evt_strip_l2g};
239  acc_queue.enqueueTask(stripEdmPrepKernel, &evt_vec_strip_l2g, &evt_strip_edm_prep);
240 
241  }
242 
243  cl::Event evt_pixel_cluster_output;
244  cl::Event evt_strip_cluster_output;
245 
246  std::vector<cl::Event> evt_vec_pixel_edm_prep;
247  std::vector<cl::Event> evt_vec_strip_edm_prep;
248 
249  evt_vec_pixel_edm_prep.push_back(evt_pixel_edm_prep);
250  evt_vec_strip_edm_prep.push_back(evt_strip_edm_prep);
251 
252 
253  // output handles
254 
256  ATH_CHECK(FPGAPixelOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0)));
257 
259  ATH_CHECK(FPGAStripOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0)));
260 
261  acc_queue.enqueueReadBuffer(m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_edm_prep, &evt_pixel_cluster_output);
262  acc_queue.enqueueReadBuffer(m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_edm_prep, &evt_strip_cluster_output);
263 
264  std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_strip_cluster_output };
265  cl::Event::waitForEvents(wait_for_reads);
266 
267 
268  if(pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0; // if no pixel input, set the first element to 0
269  if(stripInput->size() == 6) (*FPGAStripOutput)[0] = 0; // if no strip input, set the first element to 0
270 
271 
272  // calculate the time for the kernel execution
273  // get the time of writing pixel input buffer
274  cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
275  m_pixelInputTime += pixel_input_time;
276  ATH_MSG_DEBUG("Pixel input buffer write time: " << pixel_input_time / 1e6 << " ms");
277 
278  // get the time of writing strip input buffer
279  cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
280  m_stripInputTime += strip_input_time;
281  ATH_MSG_DEBUG("Strip input buffer write time: " << strip_input_time / 1e6 << " ms");
282 
283  // get the time of pixel clustering_cluster
284  cl_ulong pixel_clustering_cluster_time = evt_pixel_end_clustering_cluster.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
285  m_pixelClusteringTime += pixel_clustering_cluster_time;
286  ATH_MSG_DEBUG("Pixel clustering_cluster time: " << pixel_clustering_cluster_time / 1e6 << " ms");
287 
288  cl_ulong pixel_clustering_edm_time = evt_pixel_end_clustering_edm.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
289  m_pixelClusteringTime += pixel_clustering_edm_time;
290  ATH_MSG_DEBUG("Pixel clustering_edm time: " << pixel_clustering_edm_time / 1e6 << " ms");
291 
292  // get the time of strip clustering
293  cl_ulong strip_clustering_time = evt_strip_end_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
294  m_stripClusteringTime += strip_clustering_time;
295  ATH_MSG_DEBUG("Strip clustering time: " << strip_clustering_time / 1e6 << " ms");
296 
297  // get the time of pixel L2G
298  cl_ulong pixel_l2g_time = evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
299  m_pixelL2GTime += pixel_l2g_time;
300  ATH_MSG_DEBUG("Pixel L2G time: " << pixel_l2g_time / 1e6 << " ms");
301 
302  // get the time of strip L2G
303  cl_ulong strip_l2g_time = evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
304  m_stripL2GTime += strip_l2g_time;
305  ATH_MSG_DEBUG("Strip L2G time: " << strip_l2g_time / 1e6 << " ms");
306 
307  // get the time of EDMPrep
308  cl_ulong pixel_edm_prep_time = evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
309  cl_ulong strip_edm_prep_time = evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
310 
311  m_pixelEdmPrepTime += pixel_edm_prep_time;
312  ATH_MSG_DEBUG("PixelEDMPrep time: " << pixel_edm_prep_time / 1e6 << " ms");
313 
314  m_stripEdmPrepTime += strip_edm_prep_time;
315  ATH_MSG_DEBUG("StripEDMPrep time: " << strip_edm_prep_time / 1e6 << " ms");
316 
317 
318  // get the time of the whole kernel execution
319  cl_ulong kernel_start = evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
320  cl_ulong kernel_end = std::max(evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>(), evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>());
321  m_kernelTime += (kernel_end - kernel_start);
322  ATH_MSG_DEBUG("Kernel execution time: " << (kernel_end - kernel_start) / 1e6 << " ms");
323 
324  // get the time of reading pixel output buffer
325  cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
326  m_pixelOutputTime += pixel_output_time;
327  ATH_MSG_DEBUG("Pixel output buffer read time: " << pixel_output_time / 1e6 << " ms");
328 
329  // get the time of reading strip output buffer
330  cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
331  m_stripOutputTime += strip_output_time;
332  ATH_MSG_DEBUG("Strip output buffer read time: " << strip_output_time / 1e6 << " ms");
333 
334  return StatusCode::SUCCESS;
335  }
336 
338  {
339 
340  ATH_MSG_INFO("Finalizing F100StreamIntegrationAlg");
341  ATH_MSG_INFO("Number of events: " << m_numEvents);
342 
343  if(m_numEvents > 0){
344  ATH_MSG_INFO("Pixel input ave time: " << m_pixelInputTime / m_numEvents / 1e6 << " ms");
345  ATH_MSG_INFO("Strip input ave time: " << m_stripInputTime / m_numEvents / 1e6 << " ms");
346  ATH_MSG_INFO("Pixel clustering ave time: " << m_pixelClusteringTime / m_numEvents / 1e6 << " ms");
347  ATH_MSG_INFO("Strip clustering ave time: " << m_stripClusteringTime / m_numEvents / 1e6 << " ms");
348  ATH_MSG_INFO("Pixel L2G ave time: " << m_pixelL2GTime / m_numEvents / 1e6 << " ms");
349  ATH_MSG_INFO("Strip L2G ave time: " << m_stripL2GTime / m_numEvents / 1e6 << " ms");
350  ATH_MSG_INFO("PixelEDMPrep ave time: " << m_pixelEdmPrepTime / m_numEvents / 1e6 << " ms");
351  ATH_MSG_INFO("StripEDMPrep ave time: " << m_stripEdmPrepTime / m_numEvents / 1e6 << " ms");
352  ATH_MSG_INFO("Kernel execution ave time: " << m_kernelTime / m_numEvents / 1e6 << " ms");
353  ATH_MSG_INFO("Pixel output ave time: " << m_pixelOutputTime / m_numEvents / 1e6 << " ms");
354  ATH_MSG_INFO("Strip output ave time: " << m_stripOutputTime / m_numEvents / 1e6 << " ms");
355  }
356 
357  return StatusCode::SUCCESS;
358  }
359 
360  void F100StreamIntegrationAlg::getListofCUs(std::vector<std::string>& cuNames)
361  {
362  xrt::xclbin xrt_xclbin(m_xclbin);
363 
364  ATH_MSG_INFO("xsa name: "<<xrt_xclbin.get_xsa_name());
365  ATH_MSG_INFO("fpga name: "<<xrt_xclbin.get_fpga_device_name());
366  ATH_MSG_INFO("uuid: "<<xrt_xclbin.get_uuid().to_string());
367 
368  for (const xrt::xclbin::kernel &kernel : xrt_xclbin.get_kernels()) {
369  const std::string& kernelName = kernel.get_name();
370 
371  ATH_MSG_INFO("kernelName: "<<kernelName);
372 
373 
374  for (const xrt::xclbin::ip &computeUnit : kernel.get_cus()) {
375  const std::string& computeUnitName = computeUnit.get_name();
376  const std::string computeUnitIsolatedName = computeUnitName.substr(kernelName.size() + 1);
377 
378  const std::string computeUnitUsableName = kernelName + ":{" + computeUnitIsolatedName + "}";
379 
380  ATH_MSG_INFO("CU name: "<<computeUnitUsableName);
381  cuNames.push_back(computeUnitUsableName);
382  }
383  }
384  }
385 
386 } // namespace EFTrackingFPGAIntegration
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_FPGAStripRDO
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
Definition: F100StreamIntegrationAlg.h:45
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_FPGAStripOutput
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGAStripOutput
Definition: F100StreamIntegrationAlg.h:48
IntegrationBase::m_accelerator
cl::Device m_accelerator
Device object for the accelerator card.
Definition: IntegrationBase.h:66
IntegrationBase::initialize
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
Definition: IntegrationBase.cxx:16
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripL2GEDMOutputBufferList
std::vector< cl::Buffer > m_stripL2GEDMOutputBufferList
Definition: F100StreamIntegrationAlg.h:113
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripClusteringTime
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
Definition: F100StreamIntegrationAlg.h:76
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripClusterInputBufferList
std::vector< cl::Buffer > m_stripClusterInputBufferList
Definition: F100StreamIntegrationAlg.h:103
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripClusterOutputBufferList
std::vector< cl::Buffer > m_stripClusterOutputBufferList
Definition: F100StreamIntegrationAlg.h:106
EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE
constexpr unsigned long PIXEL_CONTAINER_BUF_SIZE
Definition: EFTrackingTransient.h:36
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelEdmKernelName
Gaudi::Property< std::string > m_pixelEdmKernelName
Name of the FPGA kernel.
Definition: F100StreamIntegrationAlg.h:55
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelL2GTime
std::atomic< cl_ulong > m_pixelL2GTime
Time for pixel L2G.
Definition: F100StreamIntegrationAlg.h:77
make_unique
std::unique_ptr< T > make_unique(Args &&... args)
Definition: SkimmingToolEXOT5.cxx:23
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripOutputTime
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
Definition: F100StreamIntegrationAlg.h:82
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelClusterOutputBufferList
std::vector< cl::Buffer > m_pixelClusterOutputBufferList
Definition: F100StreamIntegrationAlg.h:105
JiveXML::Event
struct Event_t Event
Definition: ONCRPCServer.h:65
IntegrationBase::m_context
cl::Context m_context
Context object for the application.
Definition: IntegrationBase.h:67
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelL2GKernelName
Gaudi::Property< std::string > m_pixelL2GKernelName
Name of the pixel L2G kernel.
Definition: F100StreamIntegrationAlg.h:67
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_edmStripOutputBufferList
std::vector< cl::Buffer > m_edmStripOutputBufferList
Definition: F100StreamIntegrationAlg.h:116
Chrono.h
Exception-safe IChronoSvc caller.
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::getListofCUs
void getListofCUs(std::vector< std::string > &cuNames)
Definition: F100StreamIntegrationAlg.cxx:360
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_xclbin
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
Definition: F100StreamIntegrationAlg.h:52
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_numEvents
std::atomic< ulonglong > m_numEvents
Number of events processed.
Definition: F100StreamIntegrationAlg.h:72
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_FPGAPixelOutput
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGAPixelOutput
Definition: F100StreamIntegrationAlg.h:47
EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long PIXEL_BLOCK_BUF_SIZE
Definition: EFTrackingTransient.h:34
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripL2GOutputBufferList
std::vector< cl::Buffer > m_stripL2GOutputBufferList
Definition: F100StreamIntegrationAlg.h:111
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
Definition: F100StreamIntegrationAlg.cxx:120
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_kernelTime
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
Definition: F100StreamIntegrationAlg.h:83
EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_BUF_SIZE
Definition: EFTrackingTransient.h:37
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelInputTime
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
Definition: F100StreamIntegrationAlg.h:73
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelEndClusterEdmKernelName
Gaudi::Property< std::string > m_pixelEndClusterEdmKernelName
Name of the pixel clustering kernel start.
Definition: F100StreamIntegrationAlg.h:61
Athena::Chrono
Exception-safe IChronoSvc caller.
Definition: Chrono.h:50
dqt_zlumi_pandas.err
err
Definition: dqt_zlumi_pandas.py:183
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelStartClusterKernelName
Gaudi::Property< std::string > m_pixelStartClusterKernelName
Name of the pixel clustering kernel start.
Definition: F100StreamIntegrationAlg.h:59
lumiFormat.i
int i
Definition: lumiFormat.py:85
SG::get
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.
Definition: ReadCondHandle.h:287
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_FPGAPixelRDO
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
Definition: F100StreamIntegrationAlg.h:44
EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
Definition: EFTrackingTransient.h:42
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_chronoSvc
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
Definition: F100StreamIntegrationAlg.h:42
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelClusterEDMOutputBufferList
std::vector< cl::Buffer > m_pixelClusterEDMOutputBufferList
Definition: F100StreamIntegrationAlg.h:107
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_acc_queues
std::vector< cl::CommandQueue > m_acc_queues
Definition: F100StreamIntegrationAlg.h:119
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::finalize
virtual StatusCode finalize() override final
Definition: F100StreamIntegrationAlg.cxx:337
find_tgc_unfilled_channelids.ip
ip
Definition: find_tgc_unfilled_channelids.py:3
xAOD::uint64_t
uint64_t
Definition: EventInfo_v1.cxx:123
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
IntegrationBase::precheck
StatusCode precheck(const std::vector< Gaudi::Property< std::string >> &inputs) const
Check if the the desired Gaudi properties are set.
Definition: IntegrationBase.cxx:154
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::initialize
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
Definition: F100StreamIntegrationAlg.cxx:15
SG::VarHandleKey::initialize
StatusCode initialize(bool used=true)
If this object is used as a property, then this should be called during the initialize phase.
Definition: AthToolSupport/AsgDataHandles/Root/VarHandleKey.cxx:103
EFTrackingFPGAIntegration
The class for enconding RDO to FPGA format.
Definition: BenchmarkAlg.h:28
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripStartClusterKernelName
Gaudi::Property< std::string > m_stripStartClusterKernelName
Name of the strip clustering kernel start.
Definition: F100StreamIntegrationAlg.h:63
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_edmPixelOutputBufferList
std::vector< cl::Buffer > m_edmPixelOutputBufferList
Definition: F100StreamIntegrationAlg.h:115
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripEdmKernelName
Gaudi::Property< std::string > m_stripEdmKernelName
Name of the FPGA kernel.
Definition: F100StreamIntegrationAlg.h:57
IntegrationBase::loadProgram
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
Definition: IntegrationBase.cxx:115
EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
Definition: EFTrackingTransient.h:43
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripClusterEDMOutputBufferList
std::vector< cl::Buffer > m_stripClusterEDMOutputBufferList
Definition: F100StreamIntegrationAlg.h:108
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelClusterInputBufferList
std::vector< cl::Buffer > m_pixelClusterInputBufferList
Definition: F100StreamIntegrationAlg.h:102
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_FPGAThreads
Gaudi::Property< int > m_FPGAThreads
Definition: F100StreamIntegrationAlg.h:50
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelOutputTime
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
Definition: F100StreamIntegrationAlg.h:81
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelEdmPrepTime
std::atomic< cl_ulong > m_pixelEdmPrepTime
Time for pixel EDM preparation.
Definition: F100StreamIntegrationAlg.h:79
EFTrackingTransient::STRIP_BLOCK_BUF_SIZE
constexpr unsigned long STRIP_BLOCK_BUF_SIZE
Definition: EFTrackingTransient.h:35
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripInputTime
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
Definition: F100StreamIntegrationAlg.h:74
SG::WriteHandle
Definition: StoreGate/StoreGate/WriteHandle.h:73
SG::WriteHandle::record
StatusCode record(std::unique_ptr< T > data)
Record a const object to the store.
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelClusteringTime
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
Definition: F100StreamIntegrationAlg.h:75
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelL2GEDMOutputBufferList
std::vector< cl::Buffer > m_pixelL2GEDMOutputBufferList
Definition: F100StreamIntegrationAlg.h:112
SG::getNSlots
size_t getNSlots()
Return the number of event slots.
Definition: SlotSpecificObj.cxx:64
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelEndClusterKernelName
Gaudi::Property< std::string > m_pixelEndClusterKernelName
Name of the pixel clustering kernel start.
Definition: F100StreamIntegrationAlg.h:60
SlotSpecificObj.h
Maintain a set of objects, one per slot.
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_pixelL2GOutputBufferList
std::vector< cl::Buffer > m_pixelL2GOutputBufferList
Definition: F100StreamIntegrationAlg.h:110
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripL2GKernelName
Gaudi::Property< std::string > m_stripL2GKernelName
Name of the strip L2G kernelS.
Definition: F100StreamIntegrationAlg.h:68
IntegrationBase::m_program
cl::Program m_program
Program object containing the kernel.
Definition: IntegrationBase.h:68
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripEdmPrepTime
std::atomic< cl_ulong > m_stripEdmPrepTime
Time for strip EDM preparation.
Definition: F100StreamIntegrationAlg.h:80
F100StreamIntegrationAlg.h
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripL2GTime
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
Definition: F100StreamIntegrationAlg.h:78
EFTrackingFPGAIntegration::F100StreamIntegrationAlg::m_stripEndClusterKernelName
Gaudi::Property< std::string > m_stripEndClusterKernelName
Name of the strip clustering kernel start.
Definition: F100StreamIntegrationAlg.h:64