Should be overriden by derived classes to perform meaningful work.
167 {
169 auto mnt_timer_Total = Monitored::Timer<std::chrono::milliseconds>("TIME_Total");
170 auto monTime = Monitored::Group(
m_monTool, mnt_timer_Total);
171
172 mnt_timer_Total.start();
173
175
177 const std::vector<uint64_t>* pixelInput{nullptr}, *stripInput{nullptr};
180
181
182
184
187 }
188
189 size_t bufferIndex = ctx.slot() % nthreads;
190
191
192 size_t pixelClusterIndex = ctx.slot() % m_pixelClusteringKernels.size();
193 size_t stripClusterIndex = ctx.slot() % m_stripClusteringKernels.size();
194 size_t stripL2GIndex = ctx.slot() % m_stripL2GKernels.size();
195 size_t pixelEDMIndex = ctx.slot() % m_pixelEdmPrepKernels.size();
196 size_t stripEDMIndex = ctx.slot() % m_stripEdmPrepKernels.size();
197 size_t slicingInIndex = ctx.slot() % m_slicingEngineInputKernels.size();
198 size_t slicingOutIndex = ctx.slot() % m_slicingEngineOutputKernels.size();
199 size_t insideOutInputIndex = ctx.slot() % m_insideOutInputKernels.size();
200 size_t insideOutOutputIndex = ctx.slot() % m_insideOutOutputKernels.size();
201
202 const cl::CommandQueue &acc_queue =
m_acc_queues[bufferIndex];
203
204 cl::Kernel &pixelClusteringKernel = m_pixelClusteringKernels[pixelClusterIndex];
205 cl::Kernel &stripClusteringKernel = m_stripClusteringKernels[stripClusterIndex];
206 cl::Kernel &stripL2GKernel = m_stripL2GKernels[stripL2GIndex];
207 cl::Kernel &pixelEdmPrepKernel = m_pixelEdmPrepKernels[pixelEDMIndex];
208 cl::Kernel &stripEdmPrepKernel = m_stripEdmPrepKernels[stripEDMIndex];
209 cl::Kernel &slicingEngineInputKernel = m_slicingEngineInputKernels[slicingInIndex];
210 cl::Kernel &slicingEngineOutputKernel = m_slicingEngineOutputKernels[slicingOutIndex];
211 cl::Kernel &insideOutInputKernel = m_insideOutInputKernels[insideOutInputIndex];
212 cl::Kernel &insideOutOutputKernel = m_insideOutOutputKernels[insideOutOutputIndex];
213
214
215
216
220
221
225 stripClusteringKernel.setArg(3, static_cast<unsigned int>((*stripInput).size()));
226
227
232
233
236
239
240
241
244
245
246
249
250
251
252 cl::Event evt_write_pixel_input;
253 cl::Event evt_write_strip_input;
254
255 acc_queue.enqueueWriteBuffer(
m_pixelClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(),
nullptr, &evt_write_pixel_input);
256 acc_queue.enqueueWriteBuffer(
m_stripClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(),
nullptr, &evt_write_strip_input);
257 std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
258 std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
259
260
261 cl::Event evt_pixel_clustering, evt_strip_clustering;
262 cl::Event evt_strip_l2g;
263 cl::Event evt_pixel_edm_prep, evt_strip_edm_prep;
264 cl::Event evt_copy_strip_clusters_to_l2g_in, evt_copy_strip_edm_to_l2g_in;
265 cl::Event evt_copy_pix_edm_in, evt_copy_str_edm_in;
266
267 {
268 Athena::Chrono chrono(
"Kernel execution",
m_chronoSvc.get());
269 acc_queue.enqueueTask(pixelClusteringKernel, &evt_vec_pixel_input, &evt_pixel_clustering);
270 acc_queue.enqueueTask(stripClusteringKernel, &evt_vec_strip_input, &evt_strip_clustering);
271
272 std::vector<cl::Event> after_strip_cluster { evt_strip_clustering };
275
276 std::vector<cl::Event> l2g_inputs {evt_copy_strip_clusters_to_l2g_in, evt_copy_strip_edm_to_l2g_in};
277 acc_queue.enqueueTask(stripL2GKernel, &l2g_inputs, &evt_strip_l2g);
278
279 std::vector<cl::Event> after_pix_cluster { evt_pixel_clustering };
281
282 std::vector<cl::Event> after_l2g { evt_strip_l2g };
284
285 std::vector<cl::Event> after_pix_edm_in { evt_copy_pix_edm_in };
286 std::vector<cl::Event> after_str_edm_in { evt_copy_str_edm_in };
287 acc_queue.enqueueTask(pixelEdmPrepKernel, &after_pix_edm_in, &evt_pixel_edm_prep);
288 acc_queue.enqueueTask(stripEdmPrepKernel, &after_str_edm_in, &evt_strip_edm_prep);
289
290 }
291 cl::Event evt_pixel_cluster_output;
292 cl::Event evt_strip_cluster_output;
293
294 std::vector<cl::Event> evt_vec_pixel_edm_prep {evt_pixel_edm_prep};
295 std::vector<cl::Event> evt_vec_strip_edm_prep {evt_strip_edm_prep};
296
297
300
303
304 acc_queue.enqueueReadBuffer(
m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_edm_prep, &evt_pixel_cluster_output);
305 acc_queue.enqueueReadBuffer(
m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_edm_prep, &evt_strip_cluster_output);
306
307
308 cl::Event evt_read_pixel_cluster_raw;
310 std::vector<cl::Event> after_pix_cluster { evt_pixel_clustering };
311 acc_queue.enqueueReadBuffer(
m_pixelClusterOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * pixelClusterOut.size(), pixelClusterOut.data(), &after_pix_cluster, &evt_read_pixel_cluster_raw);
312
313 std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_read_pixel_cluster_raw };
314 cl::Event::waitForEvents(wait_for_reads);
315
316 mnt_timer_Total.stop();
317
318 if(pixelInput->size() == 6) (*FPGAPixelOutput)[0] = 0;
319 if(stripInput->size() == 6) (*FPGAStripOutput)[0] = 0;
320
321
322
323
324 int nWords = static_cast<int>(pixelClusterOut.size()) - 1;
325 for (; nWords >= 0; --nWords)
326 {
327 if (pixelClusterOut[nWords] == 0xcd00000000000000) break;
328 }
329 if (nWords < 0)
330 {
331 ATH_MSG_ERROR(
"Footer 0xcd00000000000000 not found in pixelClusterOut");
332 return StatusCode::FAILURE;
333 }
334 if (nWords > 0) nWords += 3;
335
336
337 for (
int i = 0; i < 8 && (nWords + i) < static_cast<int>(pixelClusterOut.size()); ++
i)
338 {
339 pixelClusterOut[nWords +
i] = 0;
340 }
341
342 cl::Event evt_write_se_in;
343 cl::Event evt_se_input_done, evt_se_output_done;
344 cl::Event evt_insideoutInput_done, evt_insideoutOutput_done;
345 cl::Event evt_track_output;
346
347 {
348
349 slicingEngineInputKernel.setArg(2, static_cast<unsigned long long>(nWords));
350
351
352 acc_queue.enqueueWriteBuffer(
m_slicingEngineInputBufferList[bufferIndex], CL_FALSE, 0, pixelClusterOut.size() *
sizeof(uint64_t), pixelClusterOut.data(),
nullptr, &evt_write_se_in);
353
354
355
356 std::vector<cl::Event> after_se_write { evt_write_se_in };
357 acc_queue.enqueueTask(slicingEngineInputKernel, &after_se_write, &evt_se_input_done);
358 acc_queue.enqueueTask(slicingEngineOutputKernel, nullptr, &evt_se_output_done);
359
360
361 cl::Event evt_copy_se_to_io_in;
362
363 std::vector<cl::Event> after_se_out { evt_se_output_done };
365
366 std::vector<cl::Event> after_io_in { evt_copy_se_to_io_in };
367 acc_queue.enqueueTask(insideOutInputKernel, &after_io_in, &evt_insideoutInput_done);
368 acc_queue.enqueueTask(insideOutOutputKernel, nullptr, &evt_insideoutOutput_done);
369 }
370
371
374
375
376 std::vector<cl::Event> evt_vec_insideout_output { evt_insideoutOutput_done };
377 acc_queue.enqueueReadBuffer(
m_insideOutOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*FPGATrackOutput).size(), (*FPGATrackOutput).data(), &evt_vec_insideout_output, &evt_track_output);
378
379 std::vector<cl::Event> wait_for_Trackreads = { evt_track_output };
380 cl::Event::waitForEvents(wait_for_Trackreads);
381
382
383
384 cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
386 ATH_MSG_DEBUG(
"Pixel input buffer write time: " << pixel_input_time / 1e6 <<
" ms");
387
388
389 cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
391 ATH_MSG_DEBUG(
"Strip input buffer write time: " << strip_input_time / 1e6 <<
" ms");
392
393
394 cl_ulong pixel_clustering_time = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
396 ATH_MSG_DEBUG(
"Pixel clustering time: " << pixel_clustering_time / 1e6 <<
" ms");
397
398
399 cl_ulong strip_clustering_time = evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
401 ATH_MSG_DEBUG(
"Strip clustering time: " << strip_clustering_time / 1e6 <<
" ms");
402
403
404 cl_ulong strip_l2g_time = evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
406 ATH_MSG_DEBUG(
"Strip L2G time: " << strip_l2g_time / 1e6 <<
" ms");
407
408 cl_ulong pixel_edm_prep_time = evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
409 cl_ulong strip_edm_prep_time = evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
410
412 ATH_MSG_DEBUG(
"PixelEDMPrep time: " << pixel_edm_prep_time / 1e6 <<
" ms");
413
415 ATH_MSG_DEBUG(
"StripEDMPrep time: " << strip_edm_prep_time / 1e6 <<
" ms");
416
417
418
419 cl_ulong kernel_start = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
420 cl_ulong kernel_end = std::max(evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>(), evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>());
422 ATH_MSG_DEBUG(
"Kernel execution time: " << (kernel_end - kernel_start) / 1e6 <<
" ms");
423
424
425 cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
427 ATH_MSG_DEBUG(
"Pixel output buffer read time: " << pixel_output_time / 1e6 <<
" ms");
428
429
430 cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
432 ATH_MSG_DEBUG(
"Strip output buffer read time: " << strip_output_time / 1e6 <<
" ms");
433
434 return StatusCode::SUCCESS;
435 }
#define ATH_CHECK
Evaluate an expression and check for errors.
std::vector< cl::Buffer > m_stripClusterInputBufferList
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
ToolHandle< GenericMonitoringTool > m_monTool
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
std::vector< cl::Buffer > m_slicingEngineOutputBufferList
std::atomic< ulonglong > m_numEvents
Number of events processed.
std::vector< cl::Buffer > m_edmStripInputBufferList
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
std::vector< cl::Buffer > m_slicingEngineInputBufferList
std::vector< cl::Buffer > m_stripL2GEDMOutputBufferList
std::vector< cl::Buffer > m_stripL2GInputBufferList
std::vector< cl::Buffer > m_stripClusterEDMOutputBufferList
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
std::vector< cl::Buffer > m_pixelClusterInputBufferList
std::vector< cl::Buffer > m_edmStripOutputBufferList
std::vector< cl::Buffer > m_insideOutOutputBufferList
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
std::vector< cl::Buffer > m_edmPixelOutputBufferList
std::vector< cl::CommandQueue > m_acc_queues
std::vector< cl::Buffer > m_insideOutInputBufferList
std::vector< cl::Buffer > m_stripL2GEDMInputBufferList
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
std::vector< cl::Buffer > m_pixelClusterEDMOutputBufferList
std::vector< cl::Buffer > m_stripL2GOutputBufferList
std::vector< cl::Buffer > m_pixelClusterOutputBufferList
SG::WriteHandleKey< std::vector< uint64_t > > m_FPGATrackOutput
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
std::vector< cl::Buffer > m_stripClusterOutputBufferList
std::atomic< cl_ulong > m_stripEdmPrepTime
Time for strip EDM preparation.
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
std::atomic< cl_ulong > m_pixelEdmPrepTime
Time for pixel EDM preparation.
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
std::vector< cl::Buffer > m_edmPixelInputBufferList
Gaudi::Property< int > m_FPGAThreads
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr uint32_t STRIP_BLOCK_BUF_SIZE
constexpr uint32_t PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long TRACK_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
size_t getNSlots()
Return the number of event slots.
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.