ATLAS Offline Software
Loading...
Searching...
No Matches
F150IntegrationAlg.cxx
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 */
4
8#include <xrt/xrt_bo.h>
9#include <xrt/xrt_device.h>
10#include <xrt/xrt_kernel.h>
11#include <xrt/xrt_uuid.h>
12#include <fstream>
13
15{
17 {
18 ATH_MSG_INFO("Running on the FPGA accelerator");
19
21
22 ATH_CHECK(m_chronoSvc.retrieve());
23
24 {
25 Athena::Chrono chrono("Platform and device initlize", m_chronoSvc.get());
27 }
28
29 {
30 Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
32 }
33 ATH_MSG_INFO("loading "<<m_xclbin);
34
35
36 ATH_CHECK(m_FPGAStripRDO.initialize());
37 ATH_CHECK(m_FPGAPixelRDO.initialize());
38
39 ATH_CHECK(m_FPGAStripOutput.initialize());
40 ATH_CHECK(m_FPGAPixelOutput.initialize());
41 ATH_CHECK(m_FPGATrackOutput.initialize());
42
43 std::vector<std::string> listofCUs;
44
45 getListofCUs(listofCUs);
46
47 cl_int err = 0;
48
49 unsigned int nthreads = m_FPGAThreads.value();
50
51 if(m_FPGAThreads.value() < 1){
52 nthreads = SG::getNSlots();
53 }
54
55 // create the buffers
56 for(unsigned int i = 0; i < nthreads; i++)
57 {
58 m_acc_queues.emplace_back(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
59
60 // Input
61 m_pixelClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::PIXEL_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
62 m_stripClusterInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::STRIP_CONTAINER_INPUT_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
63
64 m_pixelClusterOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
65 m_stripClusterOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
66 m_pixelClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
67 m_stripClusterEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
68
69 m_stripL2GInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
70 m_stripL2GEDMInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
71 m_stripL2GOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
72 m_stripL2GEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
73
74 // EDMPrep
75 m_edmPixelInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
76 m_edmStripInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
77
78 m_edmPixelOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint32_t), nullptr, &err));
79 m_edmStripOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint32_t), nullptr, &err));
80
81
82 m_slicingEngineInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
83 m_slicingEngineOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
84
85 m_insideOutInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE,EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
86 m_insideOutOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), nullptr, &err));
87 }
88
89 // Create kernels for each one of CUs that is inside device
90 for (const auto& cuName: listofCUs)
91 {
92 // Pixel clustering
93 if(cuName.find(m_pixelClusterKernelName.value()) != std::string::npos) m_pixelClusteringKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
94
95 // Strip clustering
96 else if(cuName.find(m_stripClusterKernelName.value()) != std::string::npos) m_stripClusteringKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
97 // Strip L2G
98 else if(cuName.find(m_stripL2GKernelName.value()) != std::string::npos) m_stripL2GKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
99
100 // EDM prep
101 else if(cuName.find(m_pixelEdmKernelName.value()) != std::string::npos) m_pixelEdmPrepKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
102 else if(cuName.find(m_stripEdmKernelName.value()) != std::string::npos) m_stripEdmPrepKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
103 // Slicing
104 else if(cuName.find(m_slicingEngineInputName.value()) != std::string::npos) m_slicingEngineInputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
105 else if(cuName.find(m_slicingEngineOutputName.value()) != std::string::npos) m_slicingEngineOutputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
106 // IO
107 else if(cuName.find(m_insideOutInputName.value()) != std::string::npos) m_insideOutInputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
108 else if(cuName.find(m_insideOutOutputName.value()) != std::string::npos) m_insideOutOutputKernels.emplace_back(cl::Kernel(m_program, cuName.c_str()));
109 else
110 {
111 ATH_MSG_WARNING("Do not recognize kernel name: "<<cuName);
112 }
113 }
114
115 ATH_MSG_INFO(m_pixelClusterKernelName.value()<<" size: "<<m_pixelClusteringKernels.size());
116 ATH_MSG_INFO(m_stripClusterKernelName.value()<<" size: "<<m_stripClusteringKernels.size());
117 ATH_MSG_INFO(m_stripL2GKernelName.value()<<" size: "<<m_stripL2GKernels.size());
118 ATH_MSG_INFO(m_pixelEdmKernelName.value()<<" size: "<<m_pixelEdmPrepKernels.size());
119 ATH_MSG_INFO(m_stripEdmKernelName.value()<<" size: "<<m_stripEdmPrepKernels.size());
120 ATH_MSG_INFO(m_slicingEngineInputName.value()<<" size: "<<m_slicingEngineInputKernels.size());
121 ATH_MSG_INFO(m_slicingEngineOutputName.value()<<" size: "<<m_slicingEngineOutputKernels.size());
122 ATH_MSG_INFO(m_insideOutInputName.value()<<" size: "<<m_insideOutInputKernels.size());
123 ATH_MSG_INFO(m_insideOutOutputName.value()<<" size: "<<m_insideOutOutputKernels.size());
124
125 if(m_pixelClusteringKernels.size()==0){
126 ATH_MSG_FATAL("No m_pixelClusteringKernels constructed");
127 return StatusCode::FAILURE;
128 }
129
130 // monitoring
131 if ( !m_monTool.empty() ) {
132 ATH_CHECK(m_monTool.retrieve() );
133 }
134 else {
135 ATH_MSG_INFO("Monitoring tool is empty");
136 }
137
138 return StatusCode::SUCCESS;
139 }
140
141 void F150IntegrationAlg::dumpHexData(std::span<const uint64_t> data, const std::string& dataDescriptor, const EventContext &ctx) const {
142
143 if(!m_outputTextFile) return;
144 auto withEvt = [&](const std::string& fname) {
145 const auto evt = ctx.eventID().event_number(); // get current event number
146 const auto dot = fname.rfind('.');
147 if (dot == std::string::npos) {
148 return fname + "_" + std::to_string(evt);
149 }
150 return fname.substr(0, dot) + "_" + std::to_string(evt) + fname.substr(dot);
151 };
152
153
154 ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
155 std::ofstream outputFile(withEvt(dataDescriptor));
156
157 for (uint64_t d : data) {
158 outputFile << std::hex << std::setw(16) << std::setfill('0') << d << '\n';
159 }
160
161 // Write different data types
162 outputFile.close();
163 }
164
165
166 StatusCode F150IntegrationAlg::execute(const EventContext &ctx) const
167 {
168 ATH_MSG_DEBUG("Executing F150IntegrationAlg");
169 auto mnt_timer_Total = Monitored::Timer<std::chrono::milliseconds>("TIME_Total");
170 auto monTime = Monitored::Group(m_monTool, mnt_timer_Total);
171
172 mnt_timer_Total.start();
173
174 m_numEvents++;
175
177 const std::vector<uint64_t>* pixelInput{nullptr}, *stripInput{nullptr};
178 ATH_CHECK(SG::get(pixelInput, m_FPGAPixelRDO, ctx));
179 ATH_CHECK(SG::get(stripInput, m_FPGAStripRDO, ctx));
180
181
182 // logic
183 unsigned int nthreads = m_FPGAThreads.value();
184
185 if(m_FPGAThreads.value() < 1){
186 nthreads = SG::getNSlots();
187 }
188
189 size_t bufferIndex = ctx.slot() % nthreads;
190
191 // Get index for each of the kernels
192 size_t pixelClusterIndex = ctx.slot() % m_pixelClusteringKernels.size();
193 size_t stripClusterIndex = ctx.slot() % m_stripClusteringKernels.size();
194 size_t stripL2GIndex = ctx.slot() % m_stripL2GKernels.size();
195 size_t pixelEDMIndex = ctx.slot() % m_pixelEdmPrepKernels.size();
196 size_t stripEDMIndex = ctx.slot() % m_stripEdmPrepKernels.size();
197 size_t slicingInIndex = ctx.slot() % m_slicingEngineInputKernels.size();
198 size_t slicingOutIndex = ctx.slot() % m_slicingEngineOutputKernels.size();
199 size_t insideOutInputIndex = ctx.slot() % m_insideOutInputKernels.size();
200 size_t insideOutOutputIndex = ctx.slot() % m_insideOutOutputKernels.size();
201
202 const cl::CommandQueue &acc_queue = m_acc_queues[bufferIndex];
203
204 cl::Kernel &pixelClusteringKernel = m_pixelClusteringKernels[pixelClusterIndex];
205 cl::Kernel &stripClusteringKernel = m_stripClusteringKernels[stripClusterIndex];
206 cl::Kernel &stripL2GKernel = m_stripL2GKernels[stripL2GIndex];
207 cl::Kernel &pixelEdmPrepKernel = m_pixelEdmPrepKernels[pixelEDMIndex];
208 cl::Kernel &stripEdmPrepKernel = m_stripEdmPrepKernels[stripEDMIndex];
209 cl::Kernel &slicingEngineInputKernel = m_slicingEngineInputKernels[slicingInIndex];
210 cl::Kernel &slicingEngineOutputKernel = m_slicingEngineOutputKernels[slicingOutIndex];
211 cl::Kernel &insideOutInputKernel = m_insideOutInputKernels[insideOutInputIndex];
212 cl::Kernel &insideOutOutputKernel = m_insideOutOutputKernels[insideOutOutputIndex];
213
214
215 // Set kernel arguments
216 // Pixel clustering: (0=input, 1=raw out, 2=EDM out)
217 pixelClusteringKernel.setArg(0, m_pixelClusterInputBufferList[bufferIndex]);
218 pixelClusteringKernel.setArg(1, m_pixelClusterOutputBufferList[bufferIndex]);
219 pixelClusteringKernel.setArg(2, m_pixelClusterEDMOutputBufferList[bufferIndex]);
220
221 // Strip clustering: (0=input, 1=raw out, 2=EDM out, 3=size)
222 stripClusteringKernel.setArg(0, m_stripClusterInputBufferList[bufferIndex]);
223 stripClusteringKernel.setArg(1, m_stripClusterOutputBufferList[bufferIndex]);
224 stripClusteringKernel.setArg(2, m_stripClusterEDMOutputBufferList[bufferIndex]);
225 stripClusteringKernel.setArg(3, static_cast<unsigned int>((*stripInput).size()));
226
227 // Strip L2G: (0=clusters in, 1=EDM in, 2=clusters out, 3=EDM out)
228 stripL2GKernel.setArg(0, m_stripL2GInputBufferList[bufferIndex]);
229 stripL2GKernel.setArg(1, m_stripL2GEDMInputBufferList[bufferIndex]);
230 stripL2GKernel.setArg(2, m_stripL2GOutputBufferList[bufferIndex]);
231 stripL2GKernel.setArg(3, m_stripL2GEDMOutputBufferList[bufferIndex]);
232
233 // EDM prep: (0=in, 1=out)
234 pixelEdmPrepKernel.setArg(0, m_edmPixelInputBufferList[bufferIndex]);
235 pixelEdmPrepKernel.setArg(1, m_edmPixelOutputBufferList[bufferIndex]);
236
237 stripEdmPrepKernel.setArg(0, m_edmStripInputBufferList[bufferIndex]);
238 stripEdmPrepKernel.setArg(1, m_edmStripOutputBufferList[bufferIndex]);
239
240 // SE: input + output
241 // input: (0=in buffer, 2=NWords), output: (1=out buffer)
242 slicingEngineInputKernel.setArg(0, m_slicingEngineInputBufferList[bufferIndex]);
243 slicingEngineOutputKernel.setArg(1, m_slicingEngineOutputBufferList[bufferIndex]);
244 // Arg 2 (NWords) is set later after we compute it.
245
246 // IO: input + output
247 insideOutInputKernel.setArg(0, m_insideOutInputBufferList[bufferIndex]);
248 insideOutOutputKernel.setArg(0, m_insideOutOutputBufferList[bufferIndex]);
249
250
251 // Start the transfers
252 cl::Event evt_write_pixel_input;
253 cl::Event evt_write_strip_input;
254
255 acc_queue.enqueueWriteBuffer(m_pixelClusterInputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), nullptr, &evt_write_pixel_input);
256 acc_queue.enqueueWriteBuffer(m_stripClusterInputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), nullptr, &evt_write_strip_input);
257 std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
258 std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
259
260
261 cl::Event evt_pixel_clustering, evt_strip_clustering;
262 cl::Event evt_strip_l2g;
263 cl::Event evt_pixel_edm_prep, evt_strip_edm_prep;
264 cl::Event evt_copy_strip_clusters_to_l2g_in, evt_copy_strip_edm_to_l2g_in;
265 cl::Event evt_copy_pix_edm_in, evt_copy_str_edm_in;
266
267 {
268 Athena::Chrono chrono("Kernel execution", m_chronoSvc.get());
269 acc_queue.enqueueTask(pixelClusteringKernel, &evt_vec_pixel_input, &evt_pixel_clustering);
270 acc_queue.enqueueTask(stripClusteringKernel, &evt_vec_strip_input, &evt_strip_clustering);
271
272 std::vector<cl::Event> after_strip_cluster { evt_strip_clustering };
273 acc_queue.enqueueCopyBuffer(m_stripClusterOutputBufferList[bufferIndex], m_stripL2GInputBufferList[bufferIndex], 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &after_strip_cluster, &evt_copy_strip_clusters_to_l2g_in);
274 acc_queue.enqueueCopyBuffer(m_stripClusterEDMOutputBufferList[bufferIndex], m_stripL2GEDMInputBufferList[bufferIndex],0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t),&after_strip_cluster, &evt_copy_strip_edm_to_l2g_in);
275
276 std::vector<cl::Event> l2g_inputs {evt_copy_strip_clusters_to_l2g_in, evt_copy_strip_edm_to_l2g_in};
277 acc_queue.enqueueTask(stripL2GKernel, &l2g_inputs, &evt_strip_l2g);
278
279 std::vector<cl::Event> after_pix_cluster { evt_pixel_clustering };
280 acc_queue.enqueueCopyBuffer( m_pixelClusterEDMOutputBufferList[bufferIndex], m_edmPixelInputBufferList[bufferIndex], 0, 0, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), &after_pix_cluster, &evt_copy_pix_edm_in);
281
282 std::vector<cl::Event> after_l2g { evt_strip_l2g };
283 acc_queue.enqueueCopyBuffer(m_stripL2GEDMOutputBufferList[bufferIndex], m_edmStripInputBufferList[bufferIndex], 0, 0, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), &after_l2g, &evt_copy_str_edm_in);
284
285 std::vector<cl::Event> after_pix_edm_in { evt_copy_pix_edm_in };
286 std::vector<cl::Event> after_str_edm_in { evt_copy_str_edm_in };
287 acc_queue.enqueueTask(pixelEdmPrepKernel, &after_pix_edm_in, &evt_pixel_edm_prep);
288 acc_queue.enqueueTask(stripEdmPrepKernel, &after_str_edm_in, &evt_strip_edm_prep);
289
290 }
291 cl::Event evt_pixel_cluster_output;
292 cl::Event evt_strip_cluster_output;
293
294 std::vector<cl::Event> evt_vec_pixel_edm_prep {evt_pixel_edm_prep};
295 std::vector<cl::Event> evt_vec_strip_edm_prep {evt_strip_edm_prep};
296
297 // output handles
299 ATH_CHECK(FPGAPixelOutput.record(std::make_unique<std::vector<uint32_t> >(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0)));
300
302 ATH_CHECK(FPGAStripOutput.record(std::make_unique<std::vector<uint32_t> >(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0)));
303
304 acc_queue.enqueueReadBuffer(m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_edm_prep, &evt_pixel_cluster_output);
305 acc_queue.enqueueReadBuffer(m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_edm_prep, &evt_strip_cluster_output);
306
307 // Read the clusters out for now
308 cl::Event evt_read_pixel_cluster_raw;
309 std::vector<uint64_t> pixelClusterOut(EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE, 0);
310 std::vector<cl::Event> after_pix_cluster { evt_pixel_clustering };
311 acc_queue.enqueueReadBuffer(m_pixelClusterOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &after_pix_cluster, &evt_read_pixel_cluster_raw);
312
313 std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_read_pixel_cluster_raw };
314 cl::Event::waitForEvents(wait_for_reads);
315
316 mnt_timer_Total.stop();
317
318 if(pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0; // if no pixel input, set the first element to 0
319 if(stripInput->size() == 6) (*FPGAStripOutput)[0] = 0; // if no strip input, set the first element to 0
320
321
322 // Scan the pixel clustering words to see where the footer is
323 // scan footer
324 int nWords = static_cast<int>(pixelClusterOut.size()) - 1;
325 for (; nWords >= 0; --nWords)
326 {
327 if (pixelClusterOut[nWords] == 0xcd00000000000000) break;
328 }
329 if (nWords < 0)
330 {
331 ATH_MSG_ERROR("Footer 0xcd00000000000000 not found in pixelClusterOut");
332 return StatusCode::FAILURE;
333 }
334 if (nWords > 0) nWords += 3; // account for 3-word footer
335
336 // Padd the output with zero to the next 8th word
337 for (int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size()); ++i)
338 {
339 pixelClusterOut[nWords + i] = 0;
340 }
341
342 cl::Event evt_write_se_in;
343 cl::Event evt_se_input_done, evt_se_output_done;
344 cl::Event evt_insideoutInput_done, evt_insideoutOutput_done;
345 cl::Event evt_track_output;
346
347 {
348 // set NWords (arg 2) for SE input kernel
349 slicingEngineInputKernel.setArg(2, static_cast<unsigned long long>(nWords));
350
351 // write SE input buffer
352 acc_queue.enqueueWriteBuffer(m_slicingEngineInputBufferList[bufferIndex], CL_FALSE, 0, pixelClusterOut.size() * sizeof(uint64_t), pixelClusterOut.data(), nullptr, &evt_write_se_in);
353
354 // run SE kernels (input then output)
355
356 std::vector<cl::Event> after_se_write { evt_write_se_in };
357 acc_queue.enqueueTask(slicingEngineInputKernel, &after_se_write, &evt_se_input_done);
358 acc_queue.enqueueTask(slicingEngineOutputKernel, nullptr, &evt_se_output_done);
359
360 // copy SE out → IO input
361 cl::Event evt_copy_se_to_io_in;
362
363 std::vector<cl::Event> after_se_out { evt_se_output_done };
364 acc_queue.enqueueCopyBuffer(m_slicingEngineOutputBufferList[bufferIndex], m_insideOutInputBufferList[bufferIndex], 0, 0, EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE * sizeof(uint64_t), &after_se_out, &evt_copy_se_to_io_in);
365
366 std::vector<cl::Event> after_io_in { evt_copy_se_to_io_in };
367 acc_queue.enqueueTask(insideOutInputKernel, &after_io_in, &evt_insideoutInput_done);
368 acc_queue.enqueueTask(insideOutOutputKernel, nullptr, &evt_insideoutOutput_done);
369 }
370
371
373 ATH_CHECK(FPGATrackOutput.record(std::make_unique<std::vector<uint64_t> >(EFTrackingTransient::TRACK_CONTAINER_BUF_SIZE, 0)));
374
375 // read back tracks (you already do this—just make it depend on IO out)
376 std::vector<cl::Event> evt_vec_insideout_output { evt_insideoutOutput_done };
377 acc_queue.enqueueReadBuffer(m_insideOutOutputBufferList[bufferIndex], CL_FALSE, 0, sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evt_vec_insideout_output, &evt_track_output);
378
379 std::vector<cl::Event> wait_for_Trackreads = { evt_track_output };
380 cl::Event::waitForEvents(wait_for_Trackreads);
381
382 // calculate the time for the kernel execution
383 // get the time of writing pixel input buffer
384 cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
385 m_pixelInputTime += pixel_input_time;
386 ATH_MSG_DEBUG("Pixel input buffer write time: " << pixel_input_time / 1e6 << " ms");
387
388 // get the time of writing strip input buffer
389 cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
390 m_stripInputTime += strip_input_time;
391 ATH_MSG_DEBUG("Strip input buffer write time: " << strip_input_time / 1e6 << " ms");
392
393 // get the time of pixel clustering
394 cl_ulong pixel_clustering_time = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
395 m_pixelClusteringTime += pixel_clustering_time;
396 ATH_MSG_DEBUG("Pixel clustering time: " << pixel_clustering_time / 1e6 << " ms");
397
398 // get the time of strip clustering
399 cl_ulong strip_clustering_time = evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
400 m_stripClusteringTime += strip_clustering_time;
401 ATH_MSG_DEBUG("Strip clustering time: " << strip_clustering_time / 1e6 << " ms");
402
403 // get the time of strip L2G
404 cl_ulong strip_l2g_time = evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
405 m_stripL2GTime += strip_l2g_time;
406 ATH_MSG_DEBUG("Strip L2G time: " << strip_l2g_time / 1e6 << " ms");
407
408 cl_ulong pixel_edm_prep_time = evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
409 cl_ulong strip_edm_prep_time = evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
410
411 m_pixelEdmPrepTime += pixel_edm_prep_time;
412 ATH_MSG_DEBUG("PixelEDMPrep time: " << pixel_edm_prep_time / 1e6 << " ms");
413
414 m_stripEdmPrepTime += strip_edm_prep_time;
415 ATH_MSG_DEBUG("StripEDMPrep time: " << strip_edm_prep_time / 1e6 << " ms");
416
417
418 // get the time of the whole kernel execution
419 cl_ulong kernel_start = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
420 cl_ulong kernel_end = std::max(evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>(), evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>());
421 m_kernelTime += (kernel_end - kernel_start);
422 ATH_MSG_DEBUG("Kernel execution time: " << (kernel_end - kernel_start) / 1e6 << " ms");
423
424 // get the time of reading pixel output buffer
425 cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
426 m_pixelOutputTime += pixel_output_time;
427 ATH_MSG_DEBUG("Pixel output buffer read time: " << pixel_output_time / 1e6 << " ms");
428
429 // get the time of reading strip output buffer
430 cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
431 m_stripOutputTime += strip_output_time;
432 ATH_MSG_DEBUG("Strip output buffer read time: " << strip_output_time / 1e6 << " ms");
433
434 return StatusCode::SUCCESS;
435 }
436
438 {
439
440 ATH_MSG_INFO("Finalizing F150IntegrationAlg");
441 ATH_MSG_INFO("Number of events: " << m_numEvents);
442
443 if(m_numEvents > 0){
444 ATH_MSG_INFO("Pixel input ave time: " << m_pixelInputTime / m_numEvents / 1e6 << " ms");
445 ATH_MSG_INFO("Strip input ave time: " << m_stripInputTime / m_numEvents / 1e6 << " ms");
446 ATH_MSG_INFO("Pixel clustering ave time: " << m_pixelClusteringTime / m_numEvents / 1e6 << " ms");
447 ATH_MSG_INFO("Strip clustering ave time: " << m_stripClusteringTime / m_numEvents / 1e6 << " ms");
448 ATH_MSG_INFO("Strip L2G ave time: " << m_stripL2GTime / m_numEvents / 1e6 << " ms");
449 ATH_MSG_INFO("PixelEDMPrep ave time: " << m_pixelEdmPrepTime / m_numEvents / 1e6 << " ms");
450 ATH_MSG_INFO("StripEDMPrep ave time: " << m_stripEdmPrepTime / m_numEvents / 1e6 << " ms");
451 ATH_MSG_INFO("Kernel execution ave time: " << m_kernelTime / m_numEvents / 1e6 << " ms");
452 ATH_MSG_INFO("Pixel output ave time: " << m_pixelOutputTime / m_numEvents / 1e6 << " ms");
453 ATH_MSG_INFO("Strip output ave time: " << m_stripOutputTime / m_numEvents / 1e6 << " ms");
454 }
455
456 return StatusCode::SUCCESS;
457 }
458
459 void F150IntegrationAlg::getListofCUs(std::vector<std::string>& cuNames)
460 {
461 xrt::xclbin xrt_xclbin(m_xclbin.value());
462
463 ATH_MSG_INFO("xsa name: "<<xrt_xclbin.get_xsa_name());
464 ATH_MSG_INFO("fpga name: "<<xrt_xclbin.get_fpga_device_name());
465 ATH_MSG_INFO("uuid: "<<xrt_xclbin.get_uuid().to_string());
466
467 for (const xrt::xclbin::kernel &kernel : xrt_xclbin.get_kernels()) {
468 const std::string& kernelName = kernel.get_name();
469
470 ATH_MSG_INFO("kernelName: "<<kernelName);
471
472
473 for (const xrt::xclbin::ip &computeUnit : kernel.get_cus()) {
474 const std::string& computeUnitName = computeUnit.get_name();
475 const std::string computeUnitIsolatedName = computeUnitName.substr(kernelName.size() + 1);
476
477 const std::string computeUnitUsableName = kernelName + ":{" + computeUnitIsolatedName + "}";
478
479 ATH_MSG_INFO("CU name: "<<computeUnitUsableName);
480 cuNames.push_back(computeUnitUsableName);
481 }
482 }
483 }
484
485} // namespace EFTrackingFPGAIntegration
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_FATAL(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_WARNING(x)
#define ATH_MSG_DEBUG(x)
Maintain a set of objects, one per slot.
Exception-safe IChronoSvc caller.
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
Exception-safe IChronoSvc caller.
Definition Chrono.h:50
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
ToolHandle< GenericMonitoringTool > m_monTool
Gaudi::Property< std::string > m_slicingEngineInputName
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
Gaudi::Property< std::string > m_insideOutInputName
std::vector< cl::Buffer > m_slicingEngineOutputBufferList
void getListofCUs(std::vector< std::string > &cuNames)
std::atomic< ulonglong > m_numEvents
Number of events processed.
Gaudi::Property< std::string > m_pixelClusterKernelName
Name of the pixel clustering kernel.
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
Gaudi::Property< std::string > m_stripEdmKernelName
Name of the FPGA kernel.
std::vector< cl::Buffer > m_stripClusterEDMOutputBufferList
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
virtual StatusCode finalize() override final
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
Gaudi::Property< std::string > m_stripL2GKernelName
Name of the strip L2G kernelS.
void dumpHexData(std::span< const uint64_t > data, const std::string &dataDescriptor, const EventContext &ctx) const
Gaudi::Property< std::string > m_slicingEngineOutputName
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
Gaudi::Property< bool > m_outputTextFile
Whether to run SE or not.
std::vector< cl::Buffer > m_pixelClusterEDMOutputBufferList
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGATrackOutput
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
std::atomic< cl_ulong > m_stripEdmPrepTime
Time for strip EDM preparation.
Gaudi::Property< std::string > m_pixelEdmKernelName
Name of the FPGA kernel.
Gaudi::Property< std::string > m_insideOutOutputName
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
std::atomic< cl_ulong > m_pixelEdmPrepTime
Time for pixel EDM preparation.
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
Gaudi::Property< std::string > m_stripClusterKernelName
Name of the strip clustering kerne.
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
cl::Program m_program
Program object containing the kernel.
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
cl::Context m_context
Context object for the application.
StatusCode precheck(const std::vector< Gaudi::Property< std::string > > &inputs) const
Check if the the desired Gaudi properties are set.
cl::Device m_accelerator
Device object for the accelerator card.
Group of local monitoring quantities and retain correlation when filling histograms
A monitored timer.
StatusCode record(std::unique_ptr< T > data)
Record a const object to the store.
The class for enconding RDO to FPGA format.
constexpr unsigned long PIXEL_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_INPUT_BUF_SIZE
constexpr uint32_t STRIP_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long TRACK_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
size_t getNSlots()
Return the number of event slots.
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.
Definition dot.py:1