ATLAS Offline Software
F150IntegrationAlg.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3  */
4 
6 #include "AthenaKernel/Chrono.h"
8 #include <xrt/xrt_bo.h>
9 #include <xrt/xrt_device.h>
10 #include <xrt/xrt_kernel.h>
11 #include <xrt/xrt_uuid.h>
12 #include <fstream>
13 
15 {
17  {
18  ATH_MSG_INFO("Running on the FPGA accelerator");
19 
21 
22  ATH_CHECK(m_chronoSvc.retrieve());
23 
24  {
25  Athena::Chrono chrono("Platform and device initlize", m_chronoSvc.get());
27  }
28 
29  {
30  Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
32  }
33  ATH_MSG_INFO("loading "<<m_xclbin);
34 
35 
38 
42 
43  std::vector<std::string> listofCUs;
44 
45  getListofCUs(listofCUs);
46 
47  cl_int err = 0;
48 
49  unsigned int nthreads = m_FPGAThreads.value();
50 
51  if(m_FPGAThreads.value() < 1){
52  nthreads = SG::getNSlots();
53  }
54 
55  // create the buffers
56  for(unsigned int i = 0; i < nthreads; i++)
57  {
58  m_acc_queues.emplace_back(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
59 
60  // Input
61  m_pixelClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
62  m_stripClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
63 
64 
65  m_pixelClusterOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
66  m_stripClusterOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
67  m_pixelClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
68  m_stripClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
69 
70  m_stripL2GOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
71  m_stripL2GEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
72  // EDMPrep
73  m_edmPixelOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
74  m_edmStripOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
75 
76 
77  m_slicingEngineOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
78  m_insideOutOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
79  }
80 
81  // Create kernels for each one of CUs that is inside device
82  for (const auto& cuName: listofCUs)
83  {
84  // Pixel clustering
85  if(cuName.find(m_pixelClusterKernelName.value()) != std::string::npos) m_pixelClusteringKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
86 
87  // Strip clustering
88  else if(cuName.find(m_stripClusterKernelName.value()) != std::string::npos) m_stripClusteringKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
89  // Strip L2G
90  else if(cuName.find(m_stripL2GKernelName.value()) != std::string::npos) m_stripL2GKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
91 
92  // EDM prep
93  else if(cuName.find(m_pixelEdmKernelName.value()) != std::string::npos) m_pixelEdmPrepKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
94  else if(cuName.find(m_stripEdmKernelName.value()) != std::string::npos) m_stripEdmPrepKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
95  // Slicing
96  else if(cuName.find(m_slicingEngineInputName.value()) != std::string::npos) m_slicingEngineInputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
97  else if(cuName.find(m_slicingEngineOutputName.value()) != std::string::npos) m_slicingEngineOutputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
98  // IO
99  else if(cuName.find(m_insideOutInputName.value()) != std::string::npos) m_insideOutInputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
100  else if(cuName.find(m_insideOutOutputName.value()) != std::string::npos) m_insideOutOutputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
101  else
102  {
103  ATH_MSG_WARNING("Do not recognize kernel name: "<<cuName);
104  }
105  }
106 
107  ATH_MSG_INFO(m_pixelClusterKernelName.value()<<" size: "<<m_pixelClusteringKernels.size());
108  ATH_MSG_INFO(m_stripClusterKernelName.value()<<" size: "<<m_stripClusteringKernels.size());
109  ATH_MSG_INFO(m_stripL2GKernelName.value()<<" size: "<<m_stripL2GKernels.size());
110  ATH_MSG_INFO(m_pixelEdmKernelName.value()<<" size: "<<m_pixelEdmPrepKernels.size());
111  ATH_MSG_INFO(m_stripEdmKernelName.value()<<" size: "<<m_stripEdmPrepKernels.size());
112  ATH_MSG_INFO(m_slicingEngineInputName.value()<<" size: "<<m_slicingEngineInputKernels.size());
113  ATH_MSG_INFO(m_slicingEngineOutputName.value()<<" size: "<<m_slicingEngineOutputKernels.size());
114  ATH_MSG_INFO(m_insideOutInputName.value()<<" size: "<<m_insideOutInputKernels.size());
115  ATH_MSG_INFO(m_insideOutOutputName.value()<<" size: "<<m_insideOutOutputKernels.size());
116 
117  if(m_pixelClusteringKernels.size()==0){
118  ATH_MSG_FATAL("No m_pixelClusteringKernels constructed");
119  return StatusCode::FAILURE;
120  }
121 
122  // monitoring
123  if ( !m_monTool.empty() ) {
124  ATH_CHECK(m_monTool.retrieve() );
125  }
126  else {
127  ATH_MSG_INFO("Monitoring tool is empty");
128  }
129 
130  return StatusCode::SUCCESS;
131  }
132 
133  void F150IntegrationAlg::dumpHexData(std::span<const uint64_t> data, const std::string& dataDescriptor, const EventContext &ctx) const {
134 
135  if(!m_outputTextFile) return;
136  auto withEvt = [&](const std::string& fname) {
137  const auto evt = ctx.eventID().event_number(); // get current event number
138  const auto dot = fname.rfind('.');
139  if (dot == std::string::npos) {
140  return fname + "_" + std::to_string(evt);
141  }
142  return fname.substr(0, dot) + "_" + std::to_string(evt) + fname.substr(dot);
143  };
144 
145 
146  ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
147  std::ofstream outputFile(withEvt(dataDescriptor));
148 
149  for (uint64_t d : data) {
150  outputFile << std::hex << std::setw(16) << std::setfill('0') << d << '\n';
151  }
152 
153  // Write different data types
154  outputFile.close();
155  }
156 
157 
158  StatusCode F150IntegrationAlg::execute(const EventContext &ctx) const
159  {
160  ATH_MSG_DEBUG("Executing F150IntegrationAlg");
161  auto mnt_timer_Total = Monitored::Timer<std::chrono::milliseconds>("TIME_Total");
162  auto monTime = Monitored::Group(m_monTool, mnt_timer_Total);
163 
164  mnt_timer_Total.start();
165 
166  m_numEvents++;
167 
169  const std::vector<uint64_t>* pixelInput{nullptr}, *stripInput{nullptr};
170  ATH_CHECK(SG::get(pixelInput, m_FPGAPixelRDO, ctx));
171  ATH_CHECK(SG::get(stripInput, m_FPGAStripRDO, ctx));
172 
173 
174  // logic
175  unsigned int nthreads = m_FPGAThreads.value();
176 
177  if(m_FPGAThreads.value() < 1){
178  nthreads = SG::getNSlots();
179  }
180 
181  size_t bufferIndex = ctx.slot() % nthreads;
182 
183  // Get index for each of the kernels
184  size_t pixelClusterIndex = ctx.slot() % m_pixelClusteringKernels.size();
185  size_t stripClusterIndex = ctx.slot() % m_stripClusteringKernels.size();
186  size_t stripL2GIndex = ctx.slot() % m_stripL2GKernels.size();
187  size_t pixelEDMIndex = ctx.slot() % m_pixelEdmPrepKernels.size();
188  size_t stripEDMIndex = ctx.slot() % m_stripEdmPrepKernels.size();
189  size_t slicingOutIndex = ctx.slot() % m_slicingEngineOutputKernels.size();
190  size_t insideOutInputIndex = ctx.slot() % m_insideOutInputKernels.size();
191  size_t insideOutOutputIndex = ctx.slot() % m_insideOutOutputKernels.size();
192 
193  const cl::CommandQueue &acc_queue = m_acc_queues[bufferIndex];
194 
195  cl::Kernel &pixelClusteringKernel = m_pixelClusteringKernels[pixelClusterIndex];
196  cl::Kernel &stripClusteringKernel = m_stripClusteringKernels[stripClusterIndex];
197  cl::Kernel &stripL2GKernel = m_stripL2GKernels[stripL2GIndex];
198  cl::Kernel &pixelEdmPrepKernel = m_pixelEdmPrepKernels[pixelEDMIndex];
199  cl::Kernel &stripEdmPrepKernel = m_stripEdmPrepKernels[stripEDMIndex];
200  cl::Kernel &slicingEngineOutputKernel = m_slicingEngineOutputKernels[slicingOutIndex];
201  cl::Kernel &insideOutInputKernel = m_insideOutInputKernels[insideOutInputIndex];
202  cl::Kernel &insideOutOutputKernel = m_insideOutOutputKernels[insideOutOutputIndex];
203 
204 
205  // Set kernel arguments
206  pixelClusteringKernel.setArg(0, m_pixelClusterInputBufferList[bufferIndex]);
207  pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBufferList[bufferIndex]);
208 
209  stripClusteringKernel.setArg(0, m_stripClusterInputBufferList[bufferIndex]);
210  stripClusteringKernel.setArg(1, m_stripClusterOutputBufferList[bufferIndex]);
211  stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBufferList[bufferIndex]);
212  stripClusteringKernel.setArg(3, static_cast<unsigned int>((*stripInput).size()));
213 
214  stripL2GKernel.setArg(0, m_stripClusterOutputBufferList[bufferIndex]);
215  stripL2GKernel.setArg(1, m_stripClusterEDMOutputBufferList[bufferIndex]);
216  stripL2GKernel.setArg(2, m_stripL2GOutputBufferList[bufferIndex]);
217  stripL2GKernel.setArg(3, m_stripL2GEDMOutputBufferList[bufferIndex]);
218 
219  pixelEdmPrepKernel.setArg(0, m_pixelClusterEDMOutputBufferList[bufferIndex]);
220  pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBufferList[bufferIndex]);
221  stripEdmPrepKernel.setArg(0, m_stripL2GEDMOutputBufferList[bufferIndex]);
222  stripEdmPrepKernel.setArg(1, m_edmStripOutputBufferList[bufferIndex]);
223 
224  slicingEngineOutputKernel.setArg(1, m_slicingEngineOutputBufferList[bufferIndex]);
225 
226  insideOutInputKernel.setArg(0, m_slicingEngineOutputBufferList[bufferIndex]);
227  insideOutOutputKernel.setArg(0, m_insideOutOutputBufferList[bufferIndex]);
228 
229 
230  // Start the transfers
231  cl::Event evt_write_pixel_input;
232  cl::Event evt_write_strip_input;
233 
234  acc_queue.enqueueWriteBuffer(m_pixelClusterInputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), nullptr, &evt_write_pixel_input);
235  acc_queue.enqueueWriteBuffer(m_stripClusterInputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), nullptr, &evt_write_strip_input);
236  std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
237  std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
238 
239 
240  cl::Event evt_pixel_clustering;
241  cl::Event evt_strip_clustering;
242  cl::Event evt_strip_l2g;
243  cl::Event evt_pixel_l2g;
244  cl::Event evt_edm_prep;
245  cl::Event evt_pixel_edm_prep;
246  cl::Event evt_strip_edm_prep;
247  cl::Event evt_slicing_done;
248  cl::Event evt_insideoutInput_done;
249  cl::Event evt_insideoutOutput_done;
250  {
251  Athena::Chrono chrono("Kernel execution", m_chronoSvc.get());
252  acc_queue.enqueueTask(pixelClusteringKernel, &evt_vec_pixel_input, &evt_pixel_clustering);
253  acc_queue.enqueueTask(stripClusteringKernel, &evt_vec_strip_input, &evt_strip_clustering);
254 
255  std::vector<cl::Event> evt_vec_strip_clustering{evt_strip_clustering};
256  acc_queue.enqueueTask(stripL2GKernel, &evt_vec_strip_clustering, &evt_strip_l2g);
257 
258  std::vector<cl::Event> evt_vec_pixelEDM{evt_pixel_clustering};
259  std::vector<cl::Event> evt_vec_strip_l2g{evt_strip_l2g};
260  acc_queue.enqueueTask(pixelEdmPrepKernel, &evt_vec_pixelEDM, &evt_pixel_edm_prep);
261  acc_queue.enqueueTask(stripEdmPrepKernel, &evt_vec_strip_l2g, &evt_strip_edm_prep);
262 
263  acc_queue.enqueueTask(slicingEngineOutputKernel, nullptr, &evt_slicing_done);
264 
265  // Execute
266  ATH_MSG_DEBUG("Executing IO Kernel");
267  std::vector<cl::Event> evt_vec_slicing{evt_slicing_done};
268  acc_queue.enqueueTask(insideOutInputKernel, &evt_vec_slicing, &evt_insideoutInput_done);
269  acc_queue.enqueueTask(insideOutOutputKernel, nullptr, &evt_insideoutOutput_done);
270 
271  }
272 
273  cl::Event evt_pixel_cluster_output;
274  cl::Event evt_strip_cluster_output;
275  cl::Event evt_track_output;
276 
277  std::vector<cl::Event> evt_vec_pixel_edm_prep {evt_pixel_edm_prep};
278  std::vector<cl::Event> evt_vec_strip_edm_prep {evt_strip_edm_prep};
279  std::vector<cl::Event> evt_vec_insideout_output {evt_insideoutOutput_done};
280 
281 
282 
283  // output handles
285  ATH_CHECK(FPGAPixelOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0)));
286 
288  ATH_CHECK(FPGAStripOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0)));
289 
291  ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
292 
293  acc_queue.enqueueReadBuffer(m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_edm_prep, &evt_pixel_cluster_output);
294  acc_queue.enqueueReadBuffer(m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_edm_prep, &evt_strip_cluster_output);
295  acc_queue.enqueueReadBuffer(m_insideOutOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evt_vec_insideout_output, &evt_track_output);
296 
297  std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_strip_cluster_output, evt_track_output };
298  cl::Event::waitForEvents(wait_for_reads);
299 
300 
301  dumpHexData((*FPGATrackOutput), "HW_F150i_Stream_insideOut.txt", ctx);
302 
303 
304  // ---------- Read remaining device buffers & dump to hex ----------
305 
306  // Helper to query buffer size (bytes) -> element count
307  auto bufferElemCount = [](const cl::Buffer& b) -> size_t {
308  size_t bytes = 0;
309  b.getInfo(CL_MEM_SIZE, &bytes);
310  return bytes / sizeof(uint64_t);
311  };
312 
313  // Host scratch areas (sized dynamically from device buffers)
314  std::vector<uint64_t> pixelClusterEDMOut(bufferElemCount(m_pixelClusterEDMOutputBufferList[bufferIndex]), 0);
315  std::vector<uint64_t> stripClusterOut (bufferElemCount(m_stripClusterOutputBufferList[bufferIndex]), 0);
316  std::vector<uint64_t> stripClusterEDMOut (bufferElemCount(m_stripClusterEDMOutputBufferList[bufferIndex]), 0);
317  std::vector<uint64_t> stripL2GOut (bufferElemCount(m_stripL2GOutputBufferList[bufferIndex]), 0);
318  std::vector<uint64_t> stripL2GEDMOut (bufferElemCount(m_stripL2GEDMOutputBufferList[bufferIndex]), 0);
319  std::vector<uint64_t> slicingEngineOut (bufferElemCount(m_slicingEngineOutputBufferList[bufferIndex]), 0);
320 
321  // Read events for the above
322  cl::Event evt_read_pixel_cluster_edm;
323  cl::Event evt_read_strip_cluster;
324  cl::Event evt_read_strip_cluster_edm;
325  cl::Event evt_read_strip_l2g;
326  cl::Event evt_read_strip_l2g_edm;
327  cl::Event evt_read_slicing_out;
328 
329  // Dependencies: make each read wait on the kernel that produced the buffer
330  std::vector<cl::Event> deps_pixel_clust_edm { evt_pixel_clustering }; // pixelClusteringKernel -> pixelClusterEDM
331  std::vector<cl::Event> deps_strip_clust { evt_strip_clustering }; // stripClusteringKernel -> strip cluster buffers
332  std::vector<cl::Event> deps_strip_l2g { evt_strip_l2g }; // stripL2GKernel -> strip L2G buffers
333  std::vector<cl::Event> deps_slicing { evt_slicing_done }; // slicingEngineOutputKernel -> slicing buffer
334 
335  // Enqueue reads of intermediate / remaining buffers
336  acc_queue.enqueueReadBuffer(
337  m_pixelClusterEDMOutputBufferList[bufferIndex], CL_FALSE, 0,
338  sizeof(uint64_t) * pixelClusterEDMOut.size(), pixelClusterEDMOut.data(),
339  &deps_pixel_clust_edm, &evt_read_pixel_cluster_edm);
340 
341  acc_queue.enqueueReadBuffer(
342  m_stripClusterOutputBufferList[bufferIndex], CL_FALSE, 0,
343  sizeof(uint64_t) * stripClusterOut.size(), stripClusterOut.data(),
344  &deps_strip_clust, &evt_read_strip_cluster);
345 
346  acc_queue.enqueueReadBuffer(
347  m_stripClusterEDMOutputBufferList[bufferIndex], CL_FALSE, 0,
348  sizeof(uint64_t) * stripClusterEDMOut.size(), stripClusterEDMOut.data(),
349  &deps_strip_clust, &evt_read_strip_cluster_edm);
350 
351  acc_queue.enqueueReadBuffer(
352  m_stripL2GOutputBufferList[bufferIndex], CL_FALSE, 0,
353  sizeof(uint64_t) * stripL2GOut.size(), stripL2GOut.data(),
354  &deps_strip_l2g, &evt_read_strip_l2g);
355 
356  acc_queue.enqueueReadBuffer(
357  m_stripL2GEDMOutputBufferList[bufferIndex], CL_FALSE, 0,
358  sizeof(uint64_t) * stripL2GEDMOut.size(), stripL2GEDMOut.data(),
359  &deps_strip_l2g, &evt_read_strip_l2g_edm);
360 
361  acc_queue.enqueueReadBuffer(
362  m_slicingEngineOutputBufferList[bufferIndex], CL_FALSE, 0,
363  sizeof(uint64_t) * slicingEngineOut.size(), slicingEngineOut.data(),
364  &deps_slicing, &evt_read_slicing_out);
365 
366  // Wait for *all* reads (existing finals + new intermediates)
367  std::vector<cl::Event> all_reads = {
368  // existing final outputs:
369  evt_pixel_cluster_output, evt_strip_cluster_output, evt_track_output,
370  // new reads:
371  evt_read_pixel_cluster_edm,
372  evt_read_strip_cluster, evt_read_strip_cluster_edm,
373  evt_read_strip_l2g, evt_read_strip_l2g_edm,
374  evt_read_slicing_out
375  };
376  cl::Event::waitForEvents(all_reads);
377 
378  // Dump everything to hex files (include inputs for completeness)
379  dumpHexData((*pixelInput), "HW_F150i_Stream_pixelInput_event.txt", ctx);
380  dumpHexData((*stripInput), "HW_F150i_Stream_stripInput_event.txt", ctx);
381 
382  // Final container outputs (already read into WriteHandles)
383  dumpHexData((*FPGAPixelOutput), "HW_F150i_Stream_pixelEDM_event.txt", ctx);
384  dumpHexData((*FPGAStripOutput), "HW_F150i_Stream_stripEDM_event.txt", ctx);
385 
386  // Intermediates
387  dumpHexData(pixelClusterEDMOut, "HW_F150i_Stream_pixelClusterEDM_event.txt", ctx);
388  dumpHexData(stripClusterOut, "HW_F150i_Stream_stripCluster_event.txt", ctx);
389  dumpHexData(stripClusterEDMOut, "HW_F150i_Stream_stripClusterEDM_event.txt", ctx);
390  dumpHexData(stripL2GOut, "HW_F150i_Stream_stripL2G_event.txt", ctx);
391  dumpHexData(stripL2GEDMOut, "HW_F150i_Stream_stripL2GEDM_event.txt", ctx);
392  dumpHexData(slicingEngineOut, "HW_F150i_Stream_slicingEngineOut_event.txt", ctx);
393 
394 
395 
396  mnt_timer_Total.stop();
397 
398  if(pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0; // if no pixel input, set the first element to 0
399  if(stripInput->size() == 6) (*FPGAStripOutput)[0] = 0; // if no strip input, set the first element to 0
400 
401 
402  // calculate the time for the kernel execution
403  // get the time of writing pixel input buffer
404  cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
405  m_pixelInputTime += pixel_input_time;
406  ATH_MSG_DEBUG("Pixel input buffer write time: " << pixel_input_time / 1e6 << " ms");
407 
408  // get the time of writing strip input buffer
409  cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
410  m_stripInputTime += strip_input_time;
411  ATH_MSG_DEBUG("Strip input buffer write time: " << strip_input_time / 1e6 << " ms");
412 
413  // get the time of pixel clustering
414  cl_ulong pixel_clustering_time = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
415  m_pixelClusteringTime += pixel_clustering_time;
416  ATH_MSG_DEBUG("Pixel clustering time: " << pixel_clustering_time / 1e6 << " ms");
417 
418  // get the time of strip clustering
419  cl_ulong strip_clustering_time = evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
420  m_stripClusteringTime += strip_clustering_time;
421  ATH_MSG_DEBUG("Strip clustering time: " << strip_clustering_time / 1e6 << " ms");
422 
423  // get the time of strip L2G
424  cl_ulong strip_l2g_time = evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
425  m_stripL2GTime += strip_l2g_time;
426  ATH_MSG_DEBUG("Strip L2G time: " << strip_l2g_time / 1e6 << " ms");
427 
428  cl_ulong pixel_edm_prep_time = evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
429  cl_ulong strip_edm_prep_time = evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
430 
431  m_pixelEdmPrepTime += pixel_edm_prep_time;
432  ATH_MSG_DEBUG("PixelEDMPrep time: " << pixel_edm_prep_time / 1e6 << " ms");
433 
434  m_stripEdmPrepTime += strip_edm_prep_time;
435  ATH_MSG_DEBUG("StripEDMPrep time: " << strip_edm_prep_time / 1e6 << " ms");
436 
437 
438  // get the time of the whole kernel execution
439  cl_ulong kernel_start = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
440  cl_ulong kernel_end = std::max(evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>(), evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>());
441  m_kernelTime += (kernel_end - kernel_start);
442  ATH_MSG_DEBUG("Kernel execution time: " << (kernel_end - kernel_start) / 1e6 << " ms");
443 
444  // get the time of reading pixel output buffer
445  cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
446  m_pixelOutputTime += pixel_output_time;
447  ATH_MSG_DEBUG("Pixel output buffer read time: " << pixel_output_time / 1e6 << " ms");
448 
449  // get the time of reading strip output buffer
450  cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
451  m_stripOutputTime += strip_output_time;
452  ATH_MSG_DEBUG("Strip output buffer read time: " << strip_output_time / 1e6 << " ms");
453 
454  return StatusCode::SUCCESS;
455  }
456 
458  {
459 
460  ATH_MSG_INFO("Finalizing F150IntegrationAlg");
461  ATH_MSG_INFO("Number of events: " << m_numEvents);
462 
463  if(m_numEvents > 0){
464  ATH_MSG_INFO("Pixel input ave time: " << m_pixelInputTime / m_numEvents / 1e6 << " ms");
465  ATH_MSG_INFO("Strip input ave time: " << m_stripInputTime / m_numEvents / 1e6 << " ms");
466  ATH_MSG_INFO("Pixel clustering ave time: " << m_pixelClusteringTime / m_numEvents / 1e6 << " ms");
467  ATH_MSG_INFO("Strip clustering ave time: " << m_stripClusteringTime / m_numEvents / 1e6 << " ms");
468  ATH_MSG_INFO("Strip L2G ave time: " << m_stripL2GTime / m_numEvents / 1e6 << " ms");
469  ATH_MSG_INFO("PixelEDMPrep ave time: " << m_pixelEdmPrepTime / m_numEvents / 1e6 << " ms");
470  ATH_MSG_INFO("StripEDMPrep ave time: " << m_stripEdmPrepTime / m_numEvents / 1e6 << " ms");
471  ATH_MSG_INFO("Kernel execution ave time: " << m_kernelTime / m_numEvents / 1e6 << " ms");
472  ATH_MSG_INFO("Pixel output ave time: " << m_pixelOutputTime / m_numEvents / 1e6 << " ms");
473  ATH_MSG_INFO("Strip output ave time: " << m_stripOutputTime / m_numEvents / 1e6 << " ms");
474  }
475 
476  return StatusCode::SUCCESS;
477  }
478 
479  void F150IntegrationAlg::getListofCUs(std::vector<std::string>& cuNames)
480  {
481  xrt::xclbin xrt_xclbin(m_xclbin.value());
482 
483  ATH_MSG_INFO("xsa name: "<<xrt_xclbin.get_xsa_name());
484  ATH_MSG_INFO("fpga name: "<<xrt_xclbin.get_fpga_device_name());
485  ATH_MSG_INFO("uuid: "<<xrt_xclbin.get_uuid().to_string());
486 
487  for (const xrt::xclbin::kernel &kernel : xrt_xclbin.get_kernels()) {
488  const std::string& kernelName = kernel.get_name();
489 
490  ATH_MSG_INFO("kernelName: "<<kernelName);
491 
492 
493  for (const xrt::xclbin::ip &computeUnit : kernel.get_cus()) {
494  const std::string& computeUnitName = computeUnit.get_name();
495  const std::string computeUnitIsolatedName = computeUnitName.substr(kernelName.size() + 1);
496 
497  const std::string computeUnitUsableName = kernelName + ":{" + computeUnitIsolatedName + "}";
498 
499  ATH_MSG_INFO("CU name: "<<computeUnitUsableName);
500  cuNames.push_back(computeUnitUsableName);
501  }
502  }
503  }
504 
505 } // namespace EFTrackingFPGAIntegration
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripL2GKernelName
Gaudi::Property< std::string > m_stripL2GKernelName
Name of the strip L2G kernelS.
Definition: F150IntegrationAlg.h:62
IntegrationBase::m_accelerator
cl::Device m_accelerator
Device object for the accelerator card.
Definition: IntegrationBase.h:66
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
F150IntegrationAlg.h
IntegrationBase::initialize
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
Definition: IntegrationBase.cxx:16
ATH_MSG_FATAL
#define ATH_MSG_FATAL(x)
Definition: AthMsgStreamMacros.h:34
TrigDefs::Group
Group
Properties of a chain group.
Definition: GroupProperties.h:13
EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE
constexpr unsigned long PIXEL_CONTAINER_BUF_SIZE
Definition: EFTrackingTransient.h:38
EFTrackingFPGAIntegration::F150IntegrationAlg::m_FPGATrackOutput
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGATrackOutput
Definition: F150IntegrationAlg.h:51
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripOutputTime
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
Definition: F150IntegrationAlg.h:79
EFTrackingFPGAIntegration::F150IntegrationAlg::m_numEvents
std::atomic< ulonglong > m_numEvents
Number of events processed.
Definition: F150IntegrationAlg.h:70
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelClusteringTime
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
Definition: F150IntegrationAlg.h:73
EFTrackingFPGAIntegration::F150IntegrationAlg::finalize
virtual StatusCode finalize() override final
Definition: F150IntegrationAlg.cxx:457
EFTrackingFPGAIntegration::F150IntegrationAlg::m_outputTextFile
Gaudi::Property< bool > m_outputTextFile
Whether to run SE or not.
Definition: F150IntegrationAlg.h:54
hist_file_dump.d
d
Definition: hist_file_dump.py:142
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripClusterKernelName
Gaudi::Property< std::string > m_stripClusterKernelName
Name of the strip clustering kerne.
Definition: F150IntegrationAlg.h:61
EFTrackingFPGAIntegration::F150IntegrationAlg::m_xclbin
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
Definition: F150IntegrationAlg.h:56
EFTrackingFPGAIntegration::F150IntegrationAlg::m_monTool
ToolHandle< GenericMonitoringTool > m_monTool
Definition: F150IntegrationAlg.h:68
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelClusterOutputBufferList
std::vector< cl::Buffer > m_pixelClusterOutputBufferList
Definition: F150IntegrationAlg.h:103
JiveXML::Event
struct Event_t Event
Definition: ONCRPCServer.h:65
IntegrationBase::m_context
cl::Context m_context
Context object for the application.
Definition: IntegrationBase.h:67
EFTrackingFPGAIntegration::F150IntegrationAlg::m_slicingEngineInputName
Gaudi::Property< std::string > m_slicingEngineInputName
Definition: F150IntegrationAlg.h:63
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelEdmKernelName
Gaudi::Property< std::string > m_pixelEdmKernelName
Name of the FPGA kernel.
Definition: F150IntegrationAlg.h:58
LArG4FSStartPointFilter.evt
evt
Definition: LArG4FSStartPointFilter.py:42
EFTrackingFPGAIntegration::F150IntegrationAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
Definition: F150IntegrationAlg.cxx:158
Chrono.h
Exception-safe IChronoSvc caller.
EFTrackingFPGAIntegration::F150IntegrationAlg::m_FPGAStripOutput
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGAStripOutput
Definition: F150IntegrationAlg.h:50
MuonR4::to_string
std::string to_string(const SectorProjector proj)
Definition: MsTrackSeeder.cxx:66
EFTrackingFPGAIntegration::F150IntegrationAlg::m_FPGAPixelRDO
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
Definition: F150IntegrationAlg.h:46
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripL2GEDMOutputBufferList
std::vector< cl::Buffer > m_stripL2GEDMOutputBufferList
Definition: F150IntegrationAlg.h:109
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripClusterInputBufferList
std::vector< cl::Buffer > m_stripClusterInputBufferList
Definition: F150IntegrationAlg.h:101
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelInputTime
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
Definition: F150IntegrationAlg.h:71
EFTrackingFPGAIntegration::F150IntegrationAlg::getListofCUs
void getListofCUs(std::vector< std::string > &cuNames)
Definition: F150IntegrationAlg.cxx:479
EFTrackingFPGAIntegration::F150IntegrationAlg::m_insideOutOutputName
Gaudi::Property< std::string > m_insideOutOutputName
Definition: F150IntegrationAlg.h:66
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripClusteringTime
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
Definition: F150IntegrationAlg.h:74
EFTrackingFPGAIntegration::F150IntegrationAlg::m_FPGAStripRDO
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
Definition: F150IntegrationAlg.h:47
compareGeometries.outputFile
string outputFile
Definition: compareGeometries.py:25
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripL2GTime
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
Definition: F150IntegrationAlg.h:75
EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long PIXEL_BLOCK_BUF_SIZE
Definition: EFTrackingTransient.h:36
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripL2GOutputBufferList
std::vector< cl::Buffer > m_stripL2GOutputBufferList
Definition: F150IntegrationAlg.h:108
EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_BUF_SIZE
Definition: EFTrackingTransient.h:39
EFTrackingFPGAIntegration::F150IntegrationAlg::m_edmPixelOutputBufferList
std::vector< cl::Buffer > m_edmPixelOutputBufferList
Definition: F150IntegrationAlg.h:110
EFTrackingFPGAIntegration::F150IntegrationAlg::initialize
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
Definition: F150IntegrationAlg.cxx:16
EFTrackingFPGAIntegration::F150IntegrationAlg::m_slicingEngineOutputBufferList
std::vector< cl::Buffer > m_slicingEngineOutputBufferList
Definition: F150IntegrationAlg.h:113
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripClusterEDMOutputBufferList
std::vector< cl::Buffer > m_stripClusterEDMOutputBufferList
Definition: F150IntegrationAlg.h:106
Athena::Chrono
Exception-safe IChronoSvc caller.
Definition: Chrono.h:50
dqt_zlumi_pandas.err
err
Definition: dqt_zlumi_pandas.py:183
lumiFormat.i
int i
Definition: lumiFormat.py:85
SG::get
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.
Definition: ReadCondHandle.h:283
EFTrackingFPGAIntegration::F150IntegrationAlg::m_insideOutOutputBufferList
std::vector< cl::Buffer > m_insideOutOutputBufferList
Definition: F150IntegrationAlg.h:114
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
Definition: EFTrackingTransient.h:47
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelClusterEDMOutputBufferList
std::vector< cl::Buffer > m_pixelClusterEDMOutputBufferList
Definition: F150IntegrationAlg.h:105
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripEdmPrepTime
std::atomic< cl_ulong > m_stripEdmPrepTime
Time for strip EDM preparation.
Definition: F150IntegrationAlg.h:77
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripInputTime
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
Definition: F150IntegrationAlg.h:72
find_tgc_unfilled_channelids.ip
ip
Definition: find_tgc_unfilled_channelids.py:3
EFTrackingFPGAIntegration::F150IntegrationAlg::m_insideOutInputName
Gaudi::Property< std::string > m_insideOutInputName
Definition: F150IntegrationAlg.h:65
xAOD::uint64_t
uint64_t
Definition: EventInfo_v1.cxx:123
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelClusterInputBufferList
std::vector< cl::Buffer > m_pixelClusterInputBufferList
Definition: F150IntegrationAlg.h:100
IntegrationBase::precheck
StatusCode precheck(const std::vector< Gaudi::Property< std::string >> &inputs) const
Check if the the desired Gaudi properties are set.
Definition: IntegrationBase.cxx:154
SG::VarHandleKey::initialize
StatusCode initialize(bool used=true)
If this object is used as a property, then this should be called during the initialize phase.
Definition: AthToolSupport/AsgDataHandles/Root/VarHandleKey.cxx:103
EFTrackingFPGAIntegration
The class for enconding RDO to FPGA format.
Definition: BenchmarkAlg.h:28
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelEdmPrepTime
std::atomic< cl_ulong > m_pixelEdmPrepTime
Time for pixel EDM preparation.
Definition: F150IntegrationAlg.h:76
IntegrationBase::loadProgram
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
Definition: IntegrationBase.cxx:115
EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
Definition: EFTrackingTransient.h:48
EFTrackingFPGAIntegration::F150IntegrationAlg::m_acc_queues
std::vector< cl::CommandQueue > m_acc_queues
Definition: F150IntegrationAlg.h:117
EFTrackingFPGAIntegration::F150IntegrationAlg::m_slicingEngineOutputName
Gaudi::Property< std::string > m_slicingEngineOutputName
Definition: F150IntegrationAlg.h:64
EFTrackingFPGAIntegration::F150IntegrationAlg::m_kernelTime
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
Definition: F150IntegrationAlg.h:80
EFTrackingFPGAIntegration::F150IntegrationAlg::m_FPGAPixelOutput
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGAPixelOutput
Definition: F150IntegrationAlg.h:49
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:76
EFTrackingFPGAIntegration::F150IntegrationAlg::m_edmStripOutputBufferList
std::vector< cl::Buffer > m_edmStripOutputBufferList
Definition: F150IntegrationAlg.h:111
EFTrackingFPGAIntegration::F150IntegrationAlg::m_chronoSvc
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
Definition: F150IntegrationAlg.h:44
EFTrackingTransient::STRIP_BLOCK_BUF_SIZE
constexpr unsigned long STRIP_BLOCK_BUF_SIZE
Definition: EFTrackingTransient.h:37
python.AthDsoLogger.fname
string fname
Definition: AthDsoLogger.py:66
SG::WriteHandle
Definition: StoreGate/StoreGate/WriteHandle.h:73
EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE
constexpr unsigned long TRACK_CONTAINER_BUF_SIZE
Definition: EFTrackingTransient.h:41
EFTrackingFPGAIntegration::F150IntegrationAlg::m_FPGAThreads
Gaudi::Property< int > m_FPGAThreads
Definition: F150IntegrationAlg.h:53
SG::WriteHandle::record
StatusCode record(std::unique_ptr< T > data)
Record a const object to the store.
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripClusterOutputBufferList
std::vector< cl::Buffer > m_stripClusterOutputBufferList
Definition: F150IntegrationAlg.h:104
SG::getNSlots
size_t getNSlots()
Return the number of event slots.
Definition: SlotSpecificObj.cxx:64
SlotSpecificObj.h
Maintain a set of objects, one per slot.
dot
Definition: dot.py:1
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelClusterKernelName
Gaudi::Property< std::string > m_pixelClusterKernelName
Name of the pixel clustering kernel.
Definition: F150IntegrationAlg.h:60
EFTrackingFPGAIntegration::F150IntegrationAlg::m_pixelOutputTime
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
Definition: F150IntegrationAlg.h:78
IntegrationBase::m_program
cl::Program m_program
Program object containing the kernel.
Definition: IntegrationBase.h:68
EFTrackingFPGAIntegration::F150IntegrationAlg::m_stripEdmKernelName
Gaudi::Property< std::string > m_stripEdmKernelName
Name of the FPGA kernel.
Definition: F150IntegrationAlg.h:59
Monitored::Timer
A monitored timer.
Definition: MonitoredTimer.h:32
EFTrackingFPGAIntegration::F150IntegrationAlg::dumpHexData
void dumpHexData(std::span< const uint64_t > data, const std::string &dataDescriptor, const EventContext &ctx) const
Definition: F150IntegrationAlg.cxx:133