ATLAS Offline Software
Loading...
Searching...
No Matches
F110IntegrationAlg.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 */
4
8#include <xrt/xrt_bo.h>
9#include <xrt/xrt_device.h>
10#include <xrt/xrt_kernel.h>
11#include <xrt/xrt_uuid.h>
12
14{
16 {
17 ATH_MSG_INFO("Running on the FPGA accelerator");
18
20
21 ATH_CHECK(m_chronoSvc.retrieve());
22
23 {
24 Athena::Chrono chrono("Platform and device initlize", m_chronoSvc.get());
26 }
27
28 {
29 Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
31 }
32 ATH_MSG_INFO("loading "<<m_xclbin);
33
34
35 ATH_CHECK(m_FPGAStripRDO.initialize());
36 ATH_CHECK(m_FPGAPixelRDO.initialize());
37
38 ATH_CHECK(m_FPGAPixelRDOSize.initialize());
39 ATH_CHECK(m_FPGAStripRDOSize.initialize());
40
41 ATH_CHECK(m_FPGAStripOutput.initialize());
42 ATH_CHECK(m_FPGAPixelOutput.initialize());
43
44 cl_int err = 0;
45
46 // Get the list of CUs
47 std::vector<std::string> listofCUs;
48 getListofCUs(listofCUs);
49
50 // Create kernels for each one of CUs that is inside device
51 for (const auto& cuName: listofCUs)
52 {
53 // Pixel clustering
54 if(cuName.find(m_pixelClusterKernelName.value()) != std::string::npos) m_pixelClusterKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
55
56 // Strip clustering
57 else if(cuName.find(m_stripClusterKernelName.value()) != std::string::npos) m_stripClusterKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
58
59 // Strip L2G
60 else if(cuName.find(m_stripL2GKernelName.value()) != std::string::npos) m_stripL2GKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
61
62 // EDM prep
63 else if(cuName.find(m_pixelEdmKernelName.value()) != std::string::npos) m_pixelEDMKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
64
65 else if(cuName.find(m_stripEdmKernelName.value()) != std::string::npos) m_stripEDMKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
66
67 else
68 {
69 ATH_MSG_WARNING("Do not recognize kernel name: "<<cuName);
70 }
71
72 }
73
74 ATH_MSG_INFO(m_pixelClusterKernelName.value()<<" size: "<<m_pixelClusterKernels.size());
75 ATH_MSG_INFO(m_stripClusterKernelName.value()<<" size: "<<m_stripClusterKernels.size());
76 ATH_MSG_INFO(m_stripL2GKernelName.value()<<" size: "<<m_stripL2GKernels.size());
77 ATH_MSG_INFO(m_pixelEdmKernelName.value()<<" size: "<<m_pixelEDMKernels.size());
78 ATH_MSG_INFO(m_stripEdmKernelName.value()<<" size: "<<m_stripEDMKernels.size());
79
80
81 // Strip
82 // Set vector size to be = to # of CUs
83 m_stripClusterEndEvents.resize(m_stripClusterKernels.size());
84 m_stripL2GEndEvents.resize(m_stripL2GKernels.size());
85 m_stripEDMEndEvents.resize(m_stripEDMKernels.size());
86
87 // Pixel
88 m_pixelClusterEndEvents.resize(m_pixelClusterKernels.size());
89 m_pixelEDMEndEvents.resize(m_pixelEDMKernels.size());
90
91 unsigned int nthreads = m_FPGAThreads.value();
92
93 if(m_FPGAThreads.value() < 1){
94 nthreads = SG::getNSlots();
95 }
96
97 // create the buffers
98 for(unsigned int i = 0; i < nthreads; i++)
99 {
100 // Input
101 m_pixelClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), NULL, &err));
102 m_stripClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), NULL, &err));
103
104 m_stripClusterOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
105 m_pixelClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
106 m_stripClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
107
108 m_stripL2GOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
109 m_stripL2GEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err));
110 // EDMPrep
111 m_edmPixelOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint32_t), NULL, &err));
112 m_edmStripOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint32_t), NULL, &err));
113 }
114
115
116
117 m_acc_queue = cl::CommandQueue(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
118
119 if (err != 0) return StatusCode::FAILURE;
120 return StatusCode::SUCCESS;
121 }
122
123 std::vector<cl::Event> F110IntegrationAlg::getDepVector(std::vector<cl::Event> &endEvents, size_t cu) const {
124 std::vector<cl::Event> deps;
125
126 cl::Event event = endEvents[cu];
127
128 if (event() != NULL)
129 {
130 // Event exists
131 deps.push_back(event);
132 }
133
134 return deps;
135 }
136
137
138 StatusCode F110IntegrationAlg::execute(const EventContext &ctx) const
139 {
140 ATH_MSG_DEBUG("Executing F110IntegrationAlg");
141 m_numEvents++;
142
144 auto pixelInput = SG::get(m_FPGAPixelRDO, ctx);
145 auto stripInput = SG::get(m_FPGAStripRDO, ctx);
146
147 const int* pixelInputSize{nullptr}, *stripInputSize{nullptr};
148 ATH_CHECK(SG::get(pixelInputSize, m_FPGAPixelRDOSize, ctx));
149 ATH_CHECK(SG::get(stripInputSize, m_FPGAStripRDOSize, ctx));
150
151 // logic
152 unsigned int nthreads = m_FPGAThreads.value();
153
154 if(m_FPGAThreads.value() < 1){
155 nthreads = SG::getNSlots();
156 }
157
158 size_t bufferIndex = ctx.slot() % nthreads;
159
160 // Get index for each of the kernels
161 size_t pixelClusterIndex = ctx.slot() % m_pixelClusterKernels.size();
162 size_t stripClusterIndex = ctx.slot() % m_stripClusterKernels.size();
163 size_t stripL2GIndex = ctx.slot() % m_stripL2GKernels.size();
164 size_t pixelEDMIndex = ctx.slot() % m_pixelEDMKernels.size();
165 size_t stripEDMIndex = ctx.slot() % m_stripEDMKernels.size();
166
167
168 // Explicit mutex needed so we don't block multithreading from functioning properly but end the execute function in accordance with FPGA hardware resource utilization
169 std::unique_lock lock(m_fpgaHandleMtx);
170
171
172 //ATH_MSG_DEBUG("F100 Thread number "<<ctx.slot()<<" running on buffer "<<bufferIndex<<" pixelClusterIndex: "<< pixelClusterIndex<<" stripClusterIndex: "<< stripClusterIndex<<" stripL2GIndex: "<< stripL2GIndex<<" pixelEDMIndex: "<< pixelEDMIndex<<" stripEDMIndex: "<< stripEDMIndex);
173
174
175 // Grab buffers
176 cl::Buffer pixelClusterInputBuffer = m_pixelClusterInputBufferList[bufferIndex];
177 cl::Buffer stripClusterInputBuffer = m_stripClusterInputBufferList[bufferIndex];
178 cl::Buffer stripClusterOutputBuffer = m_stripClusterOutputBufferList[bufferIndex];
179 cl::Buffer pixelClusterEDMOutputBuffer = m_pixelClusterEDMOutputBufferList[bufferIndex];
180 cl::Buffer stripClusterEDMOutputBuffer = m_stripClusterEDMOutputBufferList[bufferIndex];
181 cl::Buffer stripL2GOutputBuffer = m_stripL2GOutputBufferList[bufferIndex];
182 cl::Buffer stripL2GEDMOutputBuffer = m_stripL2GEDMOutputBufferList[bufferIndex];
183 cl::Buffer edmPixelOutputBuffer = m_edmPixelOutputBufferList[bufferIndex];
184 cl::Buffer edmStripOutputBuffer = m_edmStripOutputBufferList[bufferIndex];
185
186
187 // Grab kernels
188 cl::Kernel &pixelClusteringKernel = m_pixelClusterKernels[pixelClusterIndex];
189 cl::Kernel &pixelEdmPrepKernel = m_pixelEDMKernels[pixelEDMIndex];
190
191 cl::Kernel &stripClusteringKernel = m_stripClusterKernels[stripClusterIndex];
192 cl::Kernel &stripL2GKernel = m_stripL2GKernels[stripL2GIndex];
193 cl::Kernel &stripEdmPrepKernel = m_stripEDMKernels[stripEDMIndex];
194
195
196 // Set kernel args
197 pixelClusteringKernel.setArg<cl::Buffer>(0, pixelClusterInputBuffer);
198 pixelClusteringKernel.setArg<cl::Buffer>(1, pixelClusterEDMOutputBuffer);
199
200 stripClusteringKernel.setArg<cl::Buffer>(0, stripClusterInputBuffer);
201 stripClusteringKernel.setArg<cl::Buffer>(1, stripClusterOutputBuffer);
202 stripClusteringKernel.setArg<cl::Buffer>(2, stripClusterEDMOutputBuffer);
203 stripClusteringKernel.setArg<unsigned int>(3, *stripInputSize);
204
205 stripL2GKernel.setArg<cl::Buffer>(0, stripClusterOutputBuffer);
206 stripL2GKernel.setArg<cl::Buffer>(1, stripClusterEDMOutputBuffer);
207 stripL2GKernel.setArg<cl::Buffer>(2, stripL2GOutputBuffer);
208 stripL2GKernel.setArg<cl::Buffer>(3, stripL2GEDMOutputBuffer);
209
210 pixelEdmPrepKernel.setArg<cl::Buffer>(0, pixelClusterEDMOutputBuffer);
211 pixelEdmPrepKernel.setArg<cl::Buffer>(1, edmPixelOutputBuffer);
212 stripEdmPrepKernel.setArg<cl::Buffer>(0, stripL2GEDMOutputBuffer);
213 stripEdmPrepKernel.setArg<cl::Buffer>(1, edmStripOutputBuffer);
214
215
216 // Start memory transfers while respecting event deps
217 // Note that no explicit mutex is needed anymore due to the m_fpgaHandleMtx mutex
218 std::vector<cl::Event> writePixelInputDeps = getDepVector(m_pixelClusterEndEvents, pixelClusterIndex);
219 std::vector<cl::Event> writeStripInputDeps = getDepVector(m_stripClusterEndEvents, stripClusterIndex);
220
221 cl::Event writePixelInputEvt;
222 cl::Event writeStripInputEvt;
223 m_acc_queue.enqueueWriteBuffer(pixelClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), &writePixelInputDeps, &writePixelInputEvt);
224 m_acc_queue.enqueueWriteBuffer(stripClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), &writeStripInputDeps, &writeStripInputEvt);
225
226 std::vector<cl::Event> pixelClusteringDeps = { writePixelInputEvt };
227 std::vector<cl::Event> stripClusteringDeps = { writeStripInputEvt };
228
229 cl::Event pixelClusteringEvt;
230 cl::Event stripClusteringEvt;
231 cl::Event pixelL2GEvt;
232 cl::Event stripL2GEvt;
233 cl::Event edmPrepEvt;
234 cl::Event pixelEdmPrepEvt;
235 cl::Event stripEdmPrepEvt;
236
237 {
238 Athena::Chrono chrono("Kernel execution", m_chronoSvc.get());
239
240 // CLUSTERING
241 m_acc_queue.enqueueTask(pixelClusteringKernel, &pixelClusteringDeps, &pixelClusteringEvt);
242 m_acc_queue.enqueueTask(stripClusteringKernel, &stripClusteringDeps, &stripClusteringEvt);
243
244 // Track the clustering end events
245 m_pixelClusterEndEvents[pixelClusterIndex] = pixelClusteringEvt;
246 m_stripClusterEndEvents[stripClusterIndex] = stripClusteringEvt;
247
248 std::vector<cl::Event> stripL2GDeps = getDepVector(m_stripL2GEndEvents, stripClusterIndex);
249 stripL2GDeps.push_back(stripClusteringEvt);
250
251 m_acc_queue.enqueueTask(stripL2GKernel, &stripL2GDeps, &stripL2GEvt);
252
253 m_stripL2GEndEvents[stripClusterIndex] = stripL2GEvt;
254
255 // EDM PREP
256 std::vector<cl::Event> pixelEdmPrepDeps = getDepVector(m_pixelEDMEndEvents, pixelClusterIndex);
257 pixelEdmPrepDeps.push_back(pixelClusteringEvt);
258
259 // Run discrete EDM prep kernels for F110
260 std::vector<cl::Event> stripEdmPrepDeps = getDepVector(m_stripEDMEndEvents, stripClusterIndex);
261 stripEdmPrepDeps.push_back(stripL2GEvt);
262
263 m_acc_queue.enqueueTask(stripEdmPrepKernel, &stripEdmPrepDeps, &stripEdmPrepEvt);
264 m_acc_queue.enqueueTask(pixelEdmPrepKernel, &pixelEdmPrepDeps, &pixelEdmPrepEvt);
265
266 }
267
268 // READ OUTPUTS
269 cl::Event readPixelOutputEvt;
270 cl::Event readStripOutputEvt;
271 std::vector<cl::Event> readPixelOutputDeps;
272 std::vector<cl::Event> readStripOutputDeps;
273
274 readPixelOutputDeps.push_back(pixelEdmPrepEvt);
275 readStripOutputDeps.push_back(stripEdmPrepEvt);
276
277
279 ATH_CHECK(FPGAPixelOutput.record(std::make_unique<std::vector<uint32_t> >(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0)));
280
282 ATH_CHECK(FPGAStripOutput.record(std::make_unique<std::vector<uint32_t> >(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0)));
283
284 m_acc_queue.enqueueReadBuffer(edmPixelOutputBuffer, CL_FALSE, 0, sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &readPixelOutputDeps, &readPixelOutputEvt);
285 m_acc_queue.enqueueReadBuffer(edmStripOutputBuffer, CL_FALSE, 0, sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &readStripOutputDeps, &readStripOutputEvt);
286
287
288 // Unlocks mutex so other events can handle their FPGA interactions while this event is waiting
289 lock.unlock();
290
291
292 // Wait for the reading to finish before terminating the event
293 std::vector<cl::Event> terminationDeps = { readPixelOutputEvt, readStripOutputEvt };
294 cl::Event::waitForEvents(terminationDeps);
295
296 if(*pixelInputSize == 6) (*FPGAPixelOutput)[0] = 0; // if no pixel input, set the first element to 0
297 if(*stripInputSize == 6) (*FPGAStripOutput)[0] = 0; // if no strip input, set the first element to 0
298
299
300 // calculate the time for the kernel execution
301 // get the time of writing pixel input buffer
302 cl_ulong pixel_input_time = writePixelInputEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - writePixelInputEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
303 m_pixelInputTime += pixel_input_time;
304 ATH_MSG_DEBUG("Pixel input buffer write time: " << pixel_input_time / 1e6 << " ms");
305
306 // get the time of writing strip input buffer
307 cl_ulong strip_input_time = writeStripInputEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - writeStripInputEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
308 m_stripInputTime += strip_input_time;
309 ATH_MSG_DEBUG("Strip input buffer write time: " << strip_input_time / 1e6 << " ms");
310
311 // get the time of pixel clustering
312 cl_ulong pixel_clustering_time = pixelClusteringEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - pixelClusteringEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
313 m_pixelClusteringTime += pixel_clustering_time;
314 ATH_MSG_DEBUG("Pixel clustering time: " << pixel_clustering_time / 1e6 << " ms");
315
316 // get the time of strip clustering
317 cl_ulong strip_clustering_time = stripClusteringEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - stripClusteringEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
318 m_stripClusteringTime += strip_clustering_time;
319 ATH_MSG_DEBUG("Strip clustering time: " << strip_clustering_time / 1e6 << " ms");
320
321
322 // get the time of strip L2G
323 cl_ulong strip_l2g_time = stripL2GEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - stripL2GEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
324 m_stripL2GTime += strip_l2g_time;
325 ATH_MSG_DEBUG("Strip L2G time: " << strip_l2g_time / 1e6 << " ms");
326
327 // get the time of EDMPrep
328
329 cl_ulong pixel_edm_prep_time = pixelEdmPrepEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - pixelEdmPrepEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
330 cl_ulong strip_edm_prep_time = stripEdmPrepEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - stripEdmPrepEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
331
332 m_pixelEdmPrepTime += pixel_edm_prep_time;
333 ATH_MSG_DEBUG("PixelEDMPrep time: " << pixel_edm_prep_time / 1e6 << " ms");
334
335 m_stripEdmPrepTime += strip_edm_prep_time;
336 ATH_MSG_DEBUG("StripEDMPrep time: " << strip_edm_prep_time / 1e6 << " ms");
337
338 // get the time of the whole kernel execution
339 cl_ulong kernel_start = pixelClusteringEvt.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
340 cl_ulong kernel_end = std::max(pixelEdmPrepEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>(), stripEdmPrepEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>());
341 m_kernelTime += (kernel_end - kernel_start);
342 ATH_MSG_DEBUG("Kernel execution time: " << (kernel_end - kernel_start) / 1e6 << " ms");
343
344 // get the time of reading pixel output buffer
345 cl_ulong pixel_output_time = readPixelOutputEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - readPixelOutputEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
346 m_pixelOutputTime += pixel_output_time;
347 ATH_MSG_DEBUG("Pixel output buffer read time: " << pixel_output_time / 1e6 << " ms");
348
349 // get the time of reading strip output buffer
350 cl_ulong strip_output_time = readStripOutputEvt.getProfilingInfo<CL_PROFILING_COMMAND_END>() - readStripOutputEvt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
351 m_stripOutputTime += strip_output_time;
352 ATH_MSG_DEBUG("Strip output buffer read time: " << strip_output_time / 1e6 << " ms");
353
354
355 return StatusCode::SUCCESS;
356 }
357
359 {
360
361 ATH_MSG_INFO("Finalizing F110IntegrationAlg");
362 ATH_MSG_INFO("Number of events: " << m_numEvents);
363
364 if(m_numEvents > 0){
365 ATH_MSG_INFO("Pixel input ave time: " << m_pixelInputTime / m_numEvents / 1e6 << " ms");
366 ATH_MSG_INFO("Strip input ave time: " << m_stripInputTime / m_numEvents / 1e6 << " ms");
367 ATH_MSG_INFO("Pixel clustering ave time: " << m_pixelClusteringTime / m_numEvents / 1e6 << " ms");
368 ATH_MSG_INFO("Strip clustering ave time: " << m_stripClusteringTime / m_numEvents / 1e6 << " ms");
369 ATH_MSG_INFO("Strip L2G ave time: " << m_stripL2GTime / m_numEvents / 1e6 << " ms");
370 ATH_MSG_INFO("PixelEDMPrep ave time: " << m_pixelEdmPrepTime / m_numEvents / 1e6 << " ms");
371 ATH_MSG_INFO("StripEDMPrep ave time: " << m_stripEdmPrepTime / m_numEvents / 1e6 << " ms");
372 ATH_MSG_INFO("Kernel execution ave time: " << m_kernelTime / m_numEvents / 1e6 << " ms");
373 ATH_MSG_INFO("Pixel output ave time: " << m_pixelOutputTime / m_numEvents / 1e6 << " ms");
374 ATH_MSG_INFO("Strip output ave time: " << m_stripOutputTime / m_numEvents / 1e6 << " ms");
375 }
376
377 return StatusCode::SUCCESS;
378 }
379
380 void F110IntegrationAlg::getListofCUs(std::vector<std::string>& cuNames)
381 {
382 xrt::xclbin xrt_xclbin(m_xclbin.value());
383
384 ATH_MSG_INFO("xsa name: "<<xrt_xclbin.get_xsa_name());
385 ATH_MSG_INFO("fpga name: "<<xrt_xclbin.get_fpga_device_name());
386 ATH_MSG_INFO("uuid: "<<xrt_xclbin.get_uuid().to_string());
387
388 for (const xrt::xclbin::kernel &kernel : xrt_xclbin.get_kernels()) {
389 const std::string& kernelName = kernel.get_name();
390
391 ATH_MSG_INFO("kernelName: "<<kernelName);
392
393
394 for (const xrt::xclbin::ip &computeUnit : kernel.get_cus()) {
395 const std::string& computeUnitName = computeUnit.get_name();
396 const std::string computeUnitIsolatedName = computeUnitName.substr(kernelName.size() + 1);
397
398 const std::string computeUnitUsableName = kernelName + ":{" + computeUnitIsolatedName + "}";
399
400 ATH_MSG_INFO("CU name: "<<computeUnitUsableName);
401 cuNames.push_back(computeUnitUsableName);
402 }
403 }
404
405 }
406
407} // namespace EFTrackingFPGAIntegration
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_INFO(x)
#define ATH_MSG_WARNING(x)
#define ATH_MSG_DEBUG(x)
Maintain a set of objects, one per slot.
Exception-safe IChronoSvc caller.
Exception-safe IChronoSvc caller.
Definition Chrono.h:50
std::vector< cl::Event > getDepVector(std::vector< cl::Event > &endEvents, size_t cu) const
Gaudi::Property< std::string > m_stripL2GKernelName
Name of the strip L2G kernelS.
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
Gaudi::Property< std::string > m_pixelEdmKernelName
Name of the FPGA kernel.
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
virtual StatusCode finalize() override final
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
Gaudi::Property< std::string > m_stripEdmKernelName
Name of the FPGA kernel.
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
Gaudi::Property< std::string > m_pixelClusterKernelName
Name of the pixel clustering kernel.
Gaudi::Property< std::string > m_stripClusterKernelName
Name of the strip clustering kernel.
std::atomic< ulonglong > m_numEvents
Number of events processed.
void getListofCUs(std::vector< std::string > &cuNames)
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
std::atomic< cl_ulong > m_pixelEdmPrepTime
Time for pixel EDM preparation.
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
std::atomic< cl_ulong > m_stripEdmPrepTime
Time for strip EDM preparation.
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
cl::Program m_program
Program object containing the kernel.
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
cl::Context m_context
Context object for the application.
StatusCode precheck(const std::vector< Gaudi::Property< std::string > > &inputs) const
Check if the the desired Gaudi properties are set.
cl::Device m_accelerator
Device object for the accelerator card.
StatusCode record(std::unique_ptr< T > data)
Record a const object to the store.
The class for enconding RDO to FPGA format.
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
size_t getNSlots()
Return the number of event slots.
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.