Should be overriden by derived classes to perform meaningful work.
124 {
127
129 const std::vector<uint64_t>* pixelInput{nullptr}, *stripInput{nullptr};
132
133 const int* pixelInputSize{nullptr}, *stripInputSize{nullptr};
136
137
138
140
143 }
144
145 size_t bufferIndex = ctx.slot() % nthreads;
146
147
148 size_t pixelStartClusterIndex = ctx.slot() % m_pixelStartClusteringKernels.size();
149 size_t pixelEndClusterIndex = ctx.slot() % m_pixelEndClusteringClusterKernels.size();
150 size_t pixelEndClusterEDMIndex = ctx.slot() % m_pixelEndClusteringEDMKernels.size();
151 size_t stripStartClusterIndex = ctx.slot() % m_stripStartClusteringKernels.size();
152 size_t stripEndClusterIndex = ctx.slot() % m_stripEndClusteringKernels.size();
153 size_t pixelL2GIndex = m_pixelL2GKernels.size() ? ctx.slot() % m_pixelL2GKernels.size() : 0;
154 size_t stripL2GIndex = ctx.slot() % m_stripL2GKernels.size();
155 size_t pixelEDMIndex = m_pixelEdmPrepKernels.size() ? ctx.slot() % m_pixelEdmPrepKernels.size() : 0;
156 size_t stripEDMIndex = m_stripEdmPrepKernels.size() ? ctx.slot() % m_stripEdmPrepKernels.size() : 0;
157
158 const cl::CommandQueue &acc_queue =
m_acc_queues[bufferIndex];
159
160 ATH_MSG_INFO(
"Thread number "<<ctx.slot()<<
" running on buffer "<<bufferIndex<<
" pixelStartClusterIndex: "<< pixelStartClusterIndex<<
" stripStartClusterIndex: "<< stripStartClusterIndex<<
" stripEndClusterIndex: "<< stripEndClusterIndex<<
" stripL2GIndex: "<< stripL2GIndex<<
" pixelEDMIndex: "<< pixelEDMIndex<<
" stripEDMIndex: "<< stripEDMIndex);
161
162 cl::Kernel &pixelStartClusteringKernel = m_pixelStartClusteringKernels[pixelStartClusterIndex];
163 cl::Kernel &pixelEndClusteringClusterKernel = m_pixelEndClusteringClusterKernels[pixelEndClusterIndex];
164 cl::Kernel &pixelEndClusteringEDMKernel = m_pixelEndClusteringEDMKernels[pixelEndClusterEDMIndex];
165 cl::Kernel &stripStartClusteringKernel = m_stripStartClusteringKernels[stripStartClusterIndex];
166 cl::Kernel &stripEndClusteringKernel = m_stripEndClusteringKernels[stripEndClusterIndex];
167 cl::Kernel &stripL2GKernel = m_stripL2GKernels[stripL2GIndex];
168 cl::Kernel &pixelL2GKernel = m_pixelL2GKernels[pixelL2GIndex];
169
170 cl::Kernel &pixelEdmPrepKernel = m_pixelEdmPrepKernels[pixelEDMIndex];
171 cl::Kernel &stripEdmPrepKernel = m_stripEdmPrepKernels[stripEDMIndex];
172
173
174
176 pixelStartClusteringKernel.setArg(2, static_cast<unsigned long long>(*pixelInputSize));
177
180
182 stripStartClusteringKernel.setArg(2, static_cast<unsigned long long>(*stripInputSize));
183
185
190
191
196
201
202
203
204
205 cl::Event evt_write_pixel_input;
206 cl::Event evt_write_strip_input;
207
208 acc_queue.enqueueWriteBuffer(
m_pixelClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*pixelInput).size(), (*pixelInput).data(), NULL, &evt_write_pixel_input);
209 acc_queue.enqueueWriteBuffer(
m_stripClusterInputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint64_t) * (*stripInput).size(), (*stripInput).data(), NULL, &evt_write_strip_input);
210 std::vector<cl::Event> evt_vec_pixel_input{evt_write_pixel_input};
211 std::vector<cl::Event> evt_vec_strip_input{evt_write_strip_input};
212
213
214 cl::Event evt_pixel_start_clustering;
215 cl::Event evt_pixel_end_clustering_cluster;
216 cl::Event evt_pixel_end_clustering_edm;
217 cl::Event evt_strip_start_clustering;
218 cl::Event evt_strip_end_clustering;
219 cl::Event evt_strip_l2g;
220 cl::Event evt_pixel_l2g;
221 cl::Event evt_edm_prep;
222 cl::Event evt_pixel_edm_prep;
223 cl::Event evt_strip_edm_prep;
224 {
225 Athena::Chrono chrono(
"Kernel execution",
m_chronoSvc.get());
226 acc_queue.enqueueTask(pixelStartClusteringKernel, &evt_vec_pixel_input, &evt_pixel_start_clustering);
227 acc_queue.enqueueTask(pixelEndClusteringClusterKernel, NULL , &evt_pixel_end_clustering_cluster);
228 acc_queue.enqueueTask(pixelEndClusteringEDMKernel, NULL , &evt_pixel_end_clustering_edm);
229 acc_queue.enqueueTask(stripStartClusteringKernel, &evt_vec_strip_input, &evt_strip_start_clustering);
230 acc_queue.enqueueTask(stripEndClusteringKernel, NULL, &evt_strip_end_clustering);
231
232 std::vector<cl::Event> evt_vec_pixel_clustering{
233 evt_pixel_end_clustering_cluster,
234 evt_pixel_end_clustering_edm,
235 };
236
237 acc_queue.enqueueTask(pixelL2GKernel, &evt_vec_pixel_clustering, &evt_pixel_l2g);
238
239 std::vector<cl::Event> evt_vec_strip_clustering{evt_strip_end_clustering};
240 acc_queue.enqueueTask(stripL2GKernel, &evt_vec_strip_clustering, &evt_strip_l2g);
241
242 std::vector<cl::Event> evt_vec_pixel_l2g{evt_pixel_l2g};
243 acc_queue.enqueueTask(pixelEdmPrepKernel, &evt_vec_pixel_l2g, &evt_pixel_edm_prep);
244
245 std::vector<cl::Event> evt_vec_strip_l2g{evt_strip_l2g};
246 acc_queue.enqueueTask(stripEdmPrepKernel, &evt_vec_strip_l2g, &evt_strip_edm_prep);
247
248 }
249
250 cl::Event evt_pixel_cluster_output;
251 cl::Event evt_strip_cluster_output;
252
253 std::vector<cl::Event> evt_vec_pixel_edm_prep;
254 std::vector<cl::Event> evt_vec_strip_edm_prep;
255
256 evt_vec_pixel_edm_prep.push_back(evt_pixel_edm_prep);
257 evt_vec_strip_edm_prep.push_back(evt_strip_edm_prep);
258
259
260
261
264
267
268 acc_queue.enqueueReadBuffer(
m_edmPixelOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAPixelOutput).size(), (*FPGAPixelOutput).data(), &evt_vec_pixel_edm_prep, &evt_pixel_cluster_output);
269 acc_queue.enqueueReadBuffer(
m_edmStripOutputBufferList[bufferIndex], CL_FALSE, 0,
sizeof(uint32_t) * (*FPGAStripOutput).size(), (*FPGAStripOutput).data(), &evt_vec_strip_edm_prep, &evt_strip_cluster_output);
270
271 std::vector<cl::Event> wait_for_reads = { evt_pixel_cluster_output, evt_strip_cluster_output };
272 cl::Event::waitForEvents(wait_for_reads);
273
274
275 if(*pixelInputSize == 6) (*FPGAPixelOutput)[0] = 0;
276 if(*stripInputSize == 6) (*FPGAStripOutput)[0] = 0;
277
278
279
280
281 cl_ulong pixel_input_time = evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
283 ATH_MSG_DEBUG(
"Pixel input buffer write time: " << pixel_input_time / 1e6 <<
" ms");
284
285
286 cl_ulong strip_input_time = evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
288 ATH_MSG_DEBUG(
"Strip input buffer write time: " << strip_input_time / 1e6 <<
" ms");
289
290
291 cl_ulong pixel_clustering_cluster_time = evt_pixel_end_clustering_cluster.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
293 ATH_MSG_DEBUG(
"Pixel clustering_cluster time: " << pixel_clustering_cluster_time / 1e6 <<
" ms");
294
295 cl_ulong pixel_clustering_edm_time = evt_pixel_end_clustering_edm.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
297 ATH_MSG_DEBUG(
"Pixel clustering_edm time: " << pixel_clustering_edm_time / 1e6 <<
" ms");
298
299
300 cl_ulong strip_clustering_time = evt_strip_end_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
302 ATH_MSG_DEBUG(
"Strip clustering time: " << strip_clustering_time / 1e6 <<
" ms");
303
304
305 cl_ulong pixel_l2g_time = evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
307 ATH_MSG_DEBUG(
"Pixel L2G time: " << pixel_l2g_time / 1e6 <<
" ms");
308
309
310 cl_ulong strip_l2g_time = evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
312 ATH_MSG_DEBUG(
"Strip L2G time: " << strip_l2g_time / 1e6 <<
" ms");
313
314
315 cl_ulong pixel_edm_prep_time = evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
316 cl_ulong strip_edm_prep_time = evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
317
319 ATH_MSG_DEBUG(
"PixelEDMPrep time: " << pixel_edm_prep_time / 1e6 <<
" ms");
320
322 ATH_MSG_DEBUG(
"StripEDMPrep time: " << strip_edm_prep_time / 1e6 <<
" ms");
323
324
325
326 cl_ulong kernel_start = evt_pixel_start_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
327 cl_ulong kernel_end = std::max(evt_pixel_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>(), evt_strip_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>());
329 ATH_MSG_DEBUG(
"Kernel execution time: " << (kernel_end - kernel_start) / 1e6 <<
" ms");
330
331
332 cl_ulong pixel_output_time = evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
334 ATH_MSG_DEBUG(
"Pixel output buffer read time: " << pixel_output_time / 1e6 <<
" ms");
335
336
337 cl_ulong strip_output_time = evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>() - evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
339 ATH_MSG_DEBUG(
"Strip output buffer read time: " << strip_output_time / 1e6 <<
" ms");
340
341 return StatusCode::SUCCESS;
342 }
#define ATH_CHECK
Evaluate an expression and check for errors.
std::vector< cl::Buffer > m_stripClusterOutputBufferList
std::vector< cl::Buffer > m_stripClusterInputBufferList
std::atomic< cl_ulong > m_pixelL2GTime
Time for pixel L2G.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAPixelOutput
std::vector< cl::Buffer > m_stripClusterEDMOutputBufferList
std::vector< cl::Buffer > m_pixelClusterEDMOutputBufferList
std::atomic< cl_ulong > m_stripEdmPrepTime
Time for strip EDM preparation.
std::atomic< ulonglong > m_numEvents
Number of events processed.
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
std::atomic< cl_ulong > m_pixelEdmPrepTime
Time for pixel EDM preparation.
SG::WriteHandleKey< std::vector< uint32_t > > m_FPGAStripOutput
SG::ReadHandleKey< int > m_FPGAStripRDOSize
std::vector< cl::Buffer > m_pixelClusterOutputBufferList
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
std::vector< cl::Buffer > m_stripL2GOutputBufferList
std::vector< cl::Buffer > m_pixelL2GOutputBufferList
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAPixelRDO
SG::ReadHandleKey< std::vector< uint64_t > > m_FPGAStripRDO
std::vector< cl::Buffer > m_pixelClusterInputBufferList
std::vector< cl::Buffer > m_edmPixelOutputBufferList
SG::ReadHandleKey< int > m_FPGAPixelRDOSize
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
Gaudi::Property< int > m_FPGAThreads
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
std::vector< cl::Buffer > m_stripL2GEDMOutputBufferList
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
std::vector< cl::Buffer > m_pixelL2GEDMOutputBufferList
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
std::vector< cl::CommandQueue > m_acc_queues
std::vector< cl::Buffer > m_edmStripOutputBufferList
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
size_t getNSlots()
Return the number of event slots.
const T * get(const ReadCondHandleKey< T > &key, const EventContext &ctx)
Convenience function to retrieve an object given a ReadCondHandleKey.