Should be overriden by derived classes to perform meaningful work.
132 {
134 auto mnt_timer_Total = Monitored::Timer<std::chrono::milliseconds>("TIME_Total");
135 auto monTime = Monitored::Group(
m_monTool, mnt_timer_Total);
136
137 mnt_timer_Total.start();
138
140
142 const std::vector<uint64_t>* pixelInput{nullptr}, *stripInput{nullptr};
145
146 const int* pixelInputSize{nullptr}, *stripInputSize{nullptr};
149
150
152
155 }
156
157 size_t bufferIndex = ctx.slot() % nthreads;
158
159
160 size_t pixelClusterIndex = ctx.slot() % m_pixelClusteringKernels.size();
161 size_t stripClusterIndex = ctx.slot() % m_stripClusteringKernels.size();
162 size_t stripL2GIndex = ctx.slot() % m_stripL2GKernels.size();
163 size_t pixelL2GIndex = m_pixelL2GKernels.size() ? ctx.slot() % m_pixelL2GKernels.size() : 0;
164 size_t pixelEDMIndex = m_pixelEdmPrepKernels.size() ? ctx.slot() % m_pixelEdmPrepKernels.size() : 0;
165 size_t stripEDMIndex = m_stripEdmPrepKernels.size() ? ctx.slot() % m_stripEdmPrepKernels.size() : 0;
166
167 const cl::CommandQueue &acc_queue =
m_acc_queues[bufferIndex];
168
169 cl::Kernel &pixelClusteringKernel = m_pixelClusteringKernels[pixelClusterIndex];
170 cl::Kernel &stripClusteringKernel = m_stripClusteringKernels[stripClusterIndex];
171 cl::Kernel &stripL2GKernel = m_stripL2GKernels[stripL2GIndex];
172 cl::Kernel &pixelEdmPrepKernel = m_pixelEdmPrepKernels[pixelEDMIndex];
173 cl::Kernel &stripEdmPrepKernel = m_stripEdmPrepKernels[stripEDMIndex];
174 cl::Kernel *pixelL2GKernel = nullptr;
175
176 if (!
m_doF110) pixelL2GKernel = &m_pixelL2GKernels[pixelL2GIndex];
177
178
182 } else {
185
187 uint32_t cluster_vector_size_bytes =
sizeof(
uint64_t) * (*pixelInput).size();
188 uint32_t edm_vector_size_bytes =
sizeof(
uint64_t) * (*pixelInput).size() * 8;
189
190
191 pixelClusteringKernel.setArg(3, hit_vector_size_bytes);
192 pixelClusteringKernel.setArg(4, cluster_vector_size_bytes);
193 pixelClusteringKernel.setArg(5, edm_vector_size_bytes);
194 }
195
199 stripClusteringKernel.setArg(3, static_cast<unsigned int>(*stripInputSize));
200
206 }
207
212
213
218
219
220
221
222 cl::Event evt_write_pixel_input;
223 cl::Event evt_write_strip_input;
224
225 acc_queue.enqueueWriteBuffer(
m_pixelClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), NULL, &evt_write_pixel_input);
226 acc_queue.enqueueWriteBuffer(
m_stripClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), NULL, &evt_write_strip_input);
227 std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
228 std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
229
230
231 cl::Event evt_pixel_clustering;
232 cl::Event evt_strip_clustering;
233 cl::Event evt_strip_l2g;
234 cl::Event evt_pixel_l2g;
235 cl::Event evt_edm_prep;
236 cl::Event evt_pixel_edm_prep;
237 cl::Event evt_strip_edm_prep;
238 {
239 Athena::Chrono chrono(
"Kernel execution",
m_chronoSvc.get());
240 acc_queue.enqueueTask(pixelClusteringKernel, &evt_vec_pixel_input, &evt_pixel_clustering);
241 acc_queue.enqueueTask(stripClusteringKernel, &evt_vec_strip_input, &evt_strip_clustering);
242
243 std::vector<cl::Event> evt_vec_strip_clustering{evt_strip_clustering};
245 {
246 std::vector<cl::Event> evt_vec_pixel_clustering{evt_pixel_clustering};
247 acc_queue.enqueueTask(*pixelL2GKernel, &evt_vec_pixel_clustering, &evt_pixel_l2g);
248 }
249 acc_queue.enqueueTask(stripL2GKernel, &evt_vec_strip_clustering, &evt_strip_l2g);
250
251 std::vector<cl::Event> evt_vec_pixelEDM;
252 if(
m_doF110) evt_vec_pixelEDM.push_back(evt_pixel_clustering);
253 else evt_vec_pixelEDM.push_back(evt_pixel_l2g);
254 std::vector<cl::Event> evt_vec_strip_l2g{evt_strip_l2g};
255 acc_queue.enqueueTask(pixelEdmPrepKernel, &evt_vec_pixelEDM, &evt_pixel_edm_prep);
256 acc_queue.enqueueTask(stripEdmPrepKernel, &evt_vec_strip_l2g, &evt_strip_edm_prep);
257
258 }
259
260 cl::Event evt_pixel_cluster_output;
261 cl::Event evt_strip_cluster_output;
262
263 std::vector<cl::Event> evt_vec_pixel_edm_prep;
264 std::vector<cl::Event> evt_vec_strip_edm_prep;
265
266 evt_vec_pixel_edm_prep.push_back(evt_pixel_edm_prep);
267 evt_vec_strip_edm_prep.push_back(evt_strip_edm_prep);
268
269
270
271
274
277
278 acc_queue.enqueueReadBuffer(
m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_edm_prep, &evt_pixel_cluster_output);
279 acc_queue.enqueueReadBuffer(
m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_edm_prep, &evt_strip_cluster_output);
280
281 std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_strip_cluster_output };
282 cl::Event::waitForEvents(wait_for_reads);
283
284 mnt_timer_Total.stop();
285
286 if(*pixelInputSize == 6) (*FPGAPixelOutput)[0] = 0;
287 if(*stripInputSize == 6) (*FPGAStripOutput)[0] = 0;
288
289
290
291 cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
293 ATH_MSG_DEBUG(
"Pixel input buffer write time: " << pixel_input_time / 1e6 <<
" ms");
294
295
296 cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
298 ATH_MSG_DEBUG(
"Strip input buffer write time: " << strip_input_time / 1e6 <<
" ms");
299
300
301 cl_ulong pixel_clustering_time = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
303 ATH_MSG_DEBUG(
"Pixel clustering time: " << pixel_clustering_time / 1e6 <<
" ms");
304
305
306 cl_ulong strip_clustering_time = evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
308 ATH_MSG_DEBUG(
"Strip clustering time: " << strip_clustering_time / 1e6 <<
" ms");
309
311
312 cl_ulong pixel_l2g_time = evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
314 ATH_MSG_DEBUG(
"Pixel L2G time: " << pixel_l2g_time / 1e6 <<
" ms");
315 }
316
317
318 cl_ulong strip_l2g_time = evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
320 ATH_MSG_DEBUG(
"Strip L2G time: " << strip_l2g_time / 1e6 <<
" ms");
321
322
324 cl_ulong pixel_edm_prep_time = evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
325 cl_ulong strip_edm_prep_time = evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
326
328 ATH_MSG_DEBUG(
"PixelEDMPrep time: " << pixel_edm_prep_time / 1e6 <<
" ms");
329
331 ATH_MSG_DEBUG(
"StripEDMPrep time: " << strip_edm_prep_time / 1e6 <<
" ms");
332 }
333 else {
334 cl_ulong edm_prep_time = evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
335
337 ATH_MSG_DEBUG(
"EDMPrep time: " << edm_prep_time / 1e6 <<
" ms");
338 }
339
340
341 cl_ulong kernel_start = evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
343 std::max(evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>(), evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>()) :
344 evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>();
346 ATH_MSG_DEBUG(
"Kernel execution time: " << (kernel_end - kernel_start) / 1e6 <<
" ms");
347
348
349 cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
351 ATH_MSG_DEBUG(
"Pixel output buffer read time: " << pixel_output_time / 1e6 <<
" ms");
352
353
354 cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
356 ATH_MSG_DEBUG(
"Strip output buffer read time: " << strip_output_time / 1e6 <<
" ms");
357
358 return StatusCode::SUCCESS;
359 }
#define ATH_CHECK
Evaluate an expression and check for errors.
std::vector< cl::Buffer > m_edmPixelOutputBufferList
std::atomic< ulonglong > m_numEvents
Number of events processed.
ToolHandle< GenericMonitoringTool > m_monTool
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
std::vector< cl::Buffer > m_edmStripOutputBufferList
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
Gaudi::Property< int > m_FPGAThreads
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
std::atomic< cl_ulong > m_stripEdmPrepTime
Time for strip EDM preparation.
std::vector< cl::Buffer > m_pixelClusterInputBufferList
SG::ReadHandleKey< int > m_FPGAPixelRDOSize
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
std::vector< cl::CommandQueue > m_acc_queues
std::vector< cl::Buffer > m_pixelL2GOutputBufferList
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
std::atomic< cl_ulong > m_pixelEdmPrepTime
Time for pixel EDM preparation.
std::vector< cl::Buffer > m_stripClusterEDMOutputBufferList
std::atomic< cl_ulong > m_pixelL2GTime
Time for pixel L2G.
Gaudi::Property< bool > m_doF110
Boolean to run F110 instead of F100.
std::vector< cl::Buffer > m_pixelClusterOutputBufferList
std::vector< cl::Buffer > m_stripClusterOutputBufferList
SG::ReadHandleKey< int > m_FPGAStripRDOSize
std::vector< cl::Buffer > m_pixelL2GEDMOutputBufferList
std::vector< cl::Buffer > m_stripL2GEDMOutputBufferList
std::vector< cl::Buffer > m_stripL2GOutputBufferList
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
std::vector< cl::Buffer > m_stripClusterInputBufferList
std::vector< cl::Buffer > m_pixelClusterEDMOutputBufferList
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
std::atomic< cl_ulong > m_edmPrepTime
Time for EDM preparation.
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
size_t getNSlots()
Return the number of event slots.
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.