Loading [MathJax]/extensions/tex2jax.js
ATLAS Offline Software
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
BenchmarkAlg.cxx
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 */
4 
13 #include "AthenaKernel/Chrono.h"
14 
16 {
18  {
19  ATH_MSG_INFO("Running on the FPGA accelerator");
20 
22 
23  ATH_CHECK(m_chronoSvc.retrieve());
24 
25  {
26  Athena::Chrono chrono("Platform and device initlize", m_chronoSvc.get());
28  }
29 
30  {
31  Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
33  }
34 
39 
40  ATH_CHECK(m_xaodClusterMaker.retrieve());
41  ATH_CHECK(m_testVectorTool.retrieve());
42  ATH_CHECK(m_FPGADataFormatTool.retrieve());
43  return StatusCode::SUCCESS;
44  }
45 
46  StatusCode BenchmarkAlg::execute(const EventContext &ctx) const
47  {
48  ATH_MSG_DEBUG("Executing BenchmarkAlg");
49  m_numEvents++;
50 
51  // Create host side output vectors
52  std::vector<uint64_t> pixelOutput(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0);
53  std::vector<uint64_t> stripOutput(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0);
54 
55  if (m_runPassThrough)
56  {
57  ATH_CHECK(runPassThrough(pixelOutput, stripOutput, ctx));
58  }
59  else // using the actual FPGA kernel chain
60  {
61  ATH_CHECK(runDataPrep(pixelOutput, stripOutput, ctx));
62  }
63 
64  // use 64-bit pointer to access output
65  uint64_t *stripClusters = stripOutput.data();
66  uint64_t *pixelClusters = pixelOutput.data();
67 
68  unsigned int numStripClusters = stripClusters[0];
69  ATH_MSG_DEBUG("numStripClusters: " << numStripClusters);
70 
71  unsigned int numPixelClusters = pixelClusters[0];
72  ATH_MSG_DEBUG("numPixelClusters: " << numPixelClusters);
73 
74  std::unique_ptr<EFTrackingTransient::Metadata> metadata =
75  std::make_unique<EFTrackingTransient::Metadata>();
76 
77  metadata->numOfStripClusters = numStripClusters;
78  metadata->scRdoIndexSize = numStripClusters;
79  metadata->numOfPixelClusters = numPixelClusters;
80  metadata->pcRdoIndexSize = numPixelClusters;
81 
84 
85  // Declare a few vairiables to be used in the loop
86  int row = 0;
87  uint64_t rdo;
88  int rdoCounter = 0;
89 
90  // Make strip cluster aux input
91  {
92  Athena::Chrono chrono("Make strip cluster container", m_chronoSvc.get());
93  for (unsigned int i = 0; i < numStripClusters; i++)
94  {
95  rdoCounter = 0;
96  row = 0; // idhash
97  scAux.idHash.push_back(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
98  row = 1; // id
99  scAux.id.push_back(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
100  row = 2; // rdo w1
101  rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
102  if (rdo)
103  {
104  scAux.rdoList.push_back(rdo);
105  rdoCounter++;
106  }
107  row = 3; // rdo w2
108  rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
109  if (rdo)
110  {
111  scAux.rdoList.push_back(rdo);
112  rdoCounter++;
113  }
114  row = 4; // rdo w3
115  rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
116  if (rdo)
117  {
118  scAux.rdoList.push_back(rdo);
119  rdoCounter++;
120  }
121  row = 5; // rdo w4
122  rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
123  if (rdo)
124  {
125  scAux.rdoList.push_back(rdo);
126  rdoCounter++;
127  }
128  row = 6; // local x
129  scAux.localPosition.push_back(*(double *)&stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
130  row = 8; // local covariance xx
131  scAux.localCovariance.push_back(*(double *)&stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
132  row = 9; // global x
133  scAux.globalPosition.push_back(*(double *)&stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
134  row = 10; // global y
135  scAux.globalPosition.push_back(*(double *)&stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
136  row = 11; // global z
137  scAux.globalPosition.push_back(*(double *)&stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
138  row = 12; // channels in phi
139  scAux.channelsInPhi.push_back(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
140 
141  metadata->scRdoIndex[i] = rdoCounter;
142  }
143  ATH_CHECK(m_xaodClusterMaker->makeStripClusterContainer(scAux, metadata.get(), ctx));
144  // print out the strip cluster aux input
145  if (msgLvl(MSG::DEBUG))
146  {
147  for (unsigned int i = 0; i < numStripClusters; i++)
148  {
149  ATH_MSG_DEBUG("Strip cluster " << i << " idHash: " << scAux.idHash[i]);
150  ATH_MSG_DEBUG("Strip cluster " << i << " id: " << scAux.id[i]);
151  ATH_MSG_DEBUG("Strip cluster " << i << " localPosition x: " << scAux.localPosition[i]);
152  ATH_MSG_DEBUG("Strip cluster " << i << " localCovariance: " << scAux.localCovariance[i]);
153  ATH_MSG_DEBUG("Strip cluster " << i << " globalPosition x: " << scAux.globalPosition[i * 3]);
154  ATH_MSG_DEBUG("Strip cluster " << i << " globalPosition y: " << scAux.globalPosition[i * 3 + 1]);
155  ATH_MSG_DEBUG("Strip cluster " << i << " globalPosition z: " << scAux.globalPosition[i * 3 + 2]);
156  ATH_MSG_DEBUG("Strip cluster " << i << " channelsInPhi: " << scAux.channelsInPhi[i]);
157  ATH_MSG_DEBUG("Strip cluster " << i << " rdoList size: " << metadata->scRdoIndex[i]);
158  }
159  }
160  }
161 
162  // Make pixel cluster aux input
163  {
164  Athena::Chrono chrono("Make pixel cluster container", m_chronoSvc.get());
165  for (unsigned int i = 0; i < numPixelClusters; i++)
166  {
167  rdoCounter = 0;
168  row = 0; // id hash
169  pcAux.idHash.push_back(pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
170 
171  row = 1; // id
172  pcAux.id.push_back(pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
173 
174  row = 2; // rdo w1
175  rdo = pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
176  if (rdo)
177  {
178  pcAux.rdoList.push_back(rdo);
179  rdoCounter++;
180  }
181 
182  row = 3; // rdo w2
183  rdo = pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
184  if (rdo)
185  {
186  pcAux.rdoList.push_back(rdo);
187  rdoCounter++;
188  }
189 
190  row = 4; // rdo w3
191  rdo = pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
192  if (rdo)
193  {
194  pcAux.rdoList.push_back(rdo);
195  rdoCounter++;
196  }
197 
198  row = 5; // rdo w4
199  rdo = pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
200  if (rdo)
201  {
202  pcAux.rdoList.push_back(rdo);
203  rdoCounter++;
204  }
205 
206  row = 6; // local x
207  pcAux.localPosition.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
208 
209  row = 7; // local y
210  pcAux.localPosition.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
211 
212  row = 8; // local covariance xx
213  pcAux.localCovariance.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
214 
215  row = 9; // local covariance yy
216  pcAux.localCovariance.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
217 
218  row = 10; // global x
219  pcAux.globalPosition.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
220 
221  row = 11; // global y
222  pcAux.globalPosition.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
223 
224  row = 12; // global z
225  pcAux.globalPosition.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
226 
227  row = 13; // channels in phi
228  pcAux.channelsInPhi.push_back(pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
229 
230  row = 14; // channels in eta
231  pcAux.channelsInEta.push_back(pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
232 
233  row = 15; // width in eta
234  pcAux.widthInEta.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
235 
236  row = 16; // omega x
237  pcAux.omegaX.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
238 
239  row = 17; // omega y
240  pcAux.omegaY.push_back(*(double *)&pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
241 
242  row = 18; // total ToT
243  pcAux.totalToT.push_back(pixelClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
244 
245  metadata->pcRdoIndex[i] = rdoCounter;
246  }
247 
248  ATH_CHECK(m_xaodClusterMaker->makePixelClusterContainer(pcAux, metadata.get(), ctx));
249 
250  // print out pixel cluster aux input
251  if (msgLvl(MSG::DEBUG))
252  {
253  for (unsigned int i = 0; i < numPixelClusters; i++)
254  {
255  ATH_MSG_DEBUG("Pixel cluster " << i << " idHash: " << pcAux.idHash[i]);
256  ATH_MSG_DEBUG("Pixel cluster " << i << " id: " << pcAux.id[i]);
257  ATH_MSG_DEBUG("Pixel cluster " << i << " localPosition x: " << pcAux.localPosition[i * 2]);
258  ATH_MSG_DEBUG("Pixel cluster " << i << " localPosition y: " << pcAux.localPosition[i * 2 + 1]);
259  ATH_MSG_DEBUG("Pixel cluster " << i << " localCovariance xx: " << pcAux.localCovariance[i * 2]);
260  ATH_MSG_DEBUG("Pixel cluster " << i << " localCovariance yy: " << pcAux.localCovariance[i * 2 + 1]);
261  ATH_MSG_DEBUG("Pixel cluster " << i << " globalPosition x: " << pcAux.globalPosition[i * 3]);
262  ATH_MSG_DEBUG("Pixel cluster " << i << " globalPosition y: " << pcAux.globalPosition[i * 3 + 1]);
263  ATH_MSG_DEBUG("Pixel cluster " << i << " globalPosition z: " << pcAux.globalPosition[i * 3 + 2]);
264  ATH_MSG_DEBUG("Pixel cluster " << i << " channelsInPhi: " << pcAux.channelsInPhi[i]);
265  ATH_MSG_DEBUG("Pixel cluster " << i << " channelsInEta: " << pcAux.channelsInEta[i]);
266  ATH_MSG_DEBUG("Pixel cluster " << i << " widthInEta: " << pcAux.widthInEta[i]);
267  ATH_MSG_DEBUG("Pixel cluster " << i << " omegaX: " << pcAux.omegaX[i]);
268  ATH_MSG_DEBUG("Pixel cluster " << i << " omegaY: " << pcAux.omegaY[i]);
269  ATH_MSG_DEBUG("Pixel cluster " << i << " totalToT: " << pcAux.totalToT[i]);
270  ATH_MSG_DEBUG("Pixel cluster " << i << " rdoList size: " << metadata->pcRdoIndex[i]);
271  }
272  }
273  }
274 
275  return StatusCode::SUCCESS;
276  }
277 
278  StatusCode BenchmarkAlg::runPassThrough(std::vector<uint64_t> &pixelChainOutput, std::vector<uint64_t> &stripChainOutput, const EventContext &ctx) const
279  {
280  cl_int err = 0;
281 
282  // Load the ITk clusters from SG
284  if (!scContainerHandle.isValid())
285  {
286  ATH_MSG_ERROR("Failed to retrieve: " << m_inputStripClusterKey);
287  return StatusCode::FAILURE;
288  }
289 
291  if (!pcContainerHandle.isValid())
292  {
293  ATH_MSG_ERROR("Failed to retrieve: " << m_inputPixelClusterKey);
294  return StatusCode::FAILURE;
295  }
296 
297  // Encode ITK clusters into byte stream
298  std::vector<uint64_t> encodedStripClusters;
299  std::vector<uint64_t> encodedPixelClusters;
300  ATH_CHECK(m_testVectorTool->encodeStripL2G(scContainerHandle.get(), encodedStripClusters));
301  ATH_CHECK(m_testVectorTool->encodePixelL2G(pcContainerHandle.get(), encodedPixelClusters));
302 
303  // Create local CL buffers and kernel object for pixel and strip
304  cl::Buffer inputPixelBuffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err);
305  cl::Buffer inputStripBuffer(m_context, CL_MEM_READ_ONLY, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err);
306  cl::Buffer outputPixelBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint64_t), NULL, &err);
307  cl::Buffer outputStripBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint64_t), NULL, &err);
308 
309  cl::Kernel kernel(m_program, m_edmKernelName.value().data());
310  kernel.setArg<cl::Buffer>(0, inputPixelBuffer);
311  kernel.setArg<cl::Buffer>(1, inputStripBuffer);
312  kernel.setArg<cl::Buffer>(2, outputPixelBuffer);
313  kernel.setArg<cl::Buffer>(3, outputStripBuffer);
314 
315  // Migrate the input test vectors to the accelerator
316  cl::CommandQueue acc_queue(m_context, m_accelerator);
317  acc_queue.enqueueWriteBuffer(inputPixelBuffer, CL_FALSE, 0, sizeof(uint64_t) * encodedPixelClusters.size(), encodedPixelClusters.data(), NULL, NULL);
318  acc_queue.enqueueWriteBuffer(inputStripBuffer, CL_FALSE, 0, sizeof(uint64_t) * encodedStripClusters.size(), encodedStripClusters.data(), NULL, NULL);
319  acc_queue.finish();
320 
321  // enqueue the kernel
322  {
323  Athena::Chrono chrono("EDMPrep kernel execution", m_chronoSvc.get());
324  acc_queue.enqueueTask(kernel);
325  acc_queue.finish();
326  }
327 
328  // Read back the results
329  {
330  Athena::Chrono chrono("Read buffers", m_chronoSvc.get());
331  acc_queue.enqueueReadBuffer(outputPixelBuffer, CL_FALSE, 0, sizeof(uint64_t) * pixelChainOutput.size(), pixelChainOutput.data(), NULL, NULL);
332  acc_queue.enqueueReadBuffer(outputStripBuffer, CL_FALSE, 0, sizeof(uint64_t) * stripChainOutput.size(), stripChainOutput.data(), NULL, NULL);
333  acc_queue.finish();
334  }
335 
336  return StatusCode::SUCCESS;
337  }
338 
339  StatusCode BenchmarkAlg::runDataPrep(std::vector<uint64_t> &pixelChainOutput, std::vector<uint64_t> &stripChainOutput, const EventContext &ctx) const
340  {
341  ATH_MSG_DEBUG("Running DataPrep on FPGA");
342  cl_int err = 0;
343 
344  // Get the RDOs from the SG
345  auto pixelRDOHandle = SG::makeHandle(m_pixelRDOKey, ctx);
346  auto stripRDOHandle = SG::makeHandle(m_stripRDOKey, ctx);
347 
348  // Encode RDO into byte stream
349  std::vector<uint64_t> encodedPixelRDO;
350  std::vector<uint64_t> encodedStripRDO;
351 
352  // Encode RDOs into byte stream
353  ATH_CHECK(m_FPGADataFormatTool->convertPixelHitsToFPGADataFormat(*pixelRDOHandle, encodedPixelRDO, ctx));
354  ATH_CHECK(m_FPGADataFormatTool->convertStripHitsToFPGADataFormat(*stripRDOHandle, encodedStripRDO, ctx));
355 
356  for (unsigned int i = 0; i < encodedPixelRDO.size(); i++)
357  {
358  ATH_MSG_DEBUG("Pixel RDO[" << i << "]: " << std::hex << encodedPixelRDO[i] << std::dec);
359  }
360  for (unsigned int i = 0; i < encodedStripRDO.size(); i++)
361  {
362  ATH_MSG_DEBUG("Strip RDO[" << i << "]: " << std::hex << encodedStripRDO[i] << std::dec);
363  }
364 
365  // Create local CL buffers
366  // Clustering
367  cl::Buffer pixelClusterInputBuffer(m_context, CL_MEM_READ_ONLY, sizeof(uint64_t) * encodedPixelRDO.size(), NULL, &err);
368  cl::Buffer stripClusterInputBuffer(m_context, CL_MEM_READ_ONLY, sizeof(uint64_t) * encodedStripRDO.size(), NULL, &err);
369  cl::Buffer pixelClusterOutputBuffer(m_context, CL_MEM_READ_WRITE, sizeof(uint64_t) * encodedPixelRDO.size(), NULL, &err); // Don't care in DataPrep
370  cl::Buffer stripClusterOutputBuffer(m_context, CL_MEM_READ_WRITE, sizeof(uint64_t) * encodedStripRDO.size(), NULL, &err); // Don't care in DataPrep
371  cl::Buffer pixelClusterEDMOutputBuffer(m_context, CL_MEM_READ_WRITE, sizeof(uint64_t) * encodedPixelRDO.size() * EFTrackingTransient::NUM_PIXEL_WORD, NULL, &err);
372  cl::Buffer stripClusterEDMOutputBuffer(m_context, CL_MEM_READ_WRITE, sizeof(uint64_t) * encodedStripRDO.size() * EFTrackingTransient::NUM_STRIP_WORD, NULL, &err);
373  // L2G
374  cl::Buffer pixelL2GOutputBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err); // Don't care in DataPrep
375  cl::Buffer stripL2GOutputBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err); // Don't care in DataPrep
376  cl::Buffer pixelL2GEDMOutputBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err);
377  cl::Buffer stripL2GEDMOutputBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_BLOCK_BUF_SIZE * sizeof(uint64_t), NULL, &err);
378  // EDMPrep
379  cl::Buffer edmPixelOutputBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE * sizeof(uint64_t), NULL, &err);
380  cl::Buffer edmStripOutputBuffer(m_context, CL_MEM_READ_WRITE, EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE * sizeof(uint64_t), NULL, &err);
381 
382  // Create local CL kernel objects
383  // Clustering
384  // Kernel names are hard-coded for the current development
385  cl::Kernel pixelClusteringKernel(m_program, m_pixelClusterKernelName.value().data());
386  pixelClusteringKernel.setArg<cl::Buffer>(0, pixelClusterInputBuffer);
387  pixelClusteringKernel.setArg<cl::Buffer>(1, pixelClusterOutputBuffer);
388  pixelClusteringKernel.setArg<cl::Buffer>(2, pixelClusterEDMOutputBuffer);
389 
390  cl::Kernel stripClusteringKernel(m_program, m_stripClusterKernelName.value().data());
391  stripClusteringKernel.setArg<cl::Buffer>(0, stripClusterInputBuffer);
392  stripClusteringKernel.setArg<cl::Buffer>(1, stripClusterOutputBuffer);
393  stripClusteringKernel.setArg<cl::Buffer>(2, stripClusterEDMOutputBuffer);
394  stripClusteringKernel.setArg<unsigned int>(3, encodedStripRDO.size());
395 
396  // L2G
397  cl::Kernel pixelL2GKernel(m_program, m_pixelL2GKernelName.value().data());
398  pixelL2GKernel.setArg<cl::Buffer>(0, pixelClusterOutputBuffer);
399  pixelL2GKernel.setArg<cl::Buffer>(1, pixelClusterEDMOutputBuffer);
400  pixelL2GKernel.setArg<cl::Buffer>(2, pixelL2GOutputBuffer);
401  pixelL2GKernel.setArg<cl::Buffer>(3, pixelL2GEDMOutputBuffer);
402 
403  cl::Kernel stripL2GKernel(m_program, m_stripL2GKernelName.value().data());
404  stripL2GKernel.setArg<cl::Buffer>(0, stripClusterOutputBuffer);
405  stripL2GKernel.setArg<cl::Buffer>(1, stripClusterEDMOutputBuffer);
406  stripL2GKernel.setArg<cl::Buffer>(2, stripL2GOutputBuffer);
407  stripL2GKernel.setArg<cl::Buffer>(3, stripL2GEDMOutputBuffer);
408 
409  // Create EDMPrep kernel object and connect to buffers
410  cl::Kernel edmPrepKernel(m_program, m_edmKernelName.value().data());
411  edmPrepKernel.setArg<cl::Buffer>(0, pixelL2GEDMOutputBuffer);
412  edmPrepKernel.setArg<cl::Buffer>(1, stripL2GEDMOutputBuffer);
413  edmPrepKernel.setArg<cl::Buffer>(2, edmPixelOutputBuffer);
414  edmPrepKernel.setArg<cl::Buffer>(3, edmStripOutputBuffer);
415 
416  cl::CommandQueue acc_queue(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE, &err);
417 
418  cl::Event cl_evt_write_pixel_input;
419  cl::Event cl_evt_write_strip_input;
420  acc_queue.enqueueWriteBuffer(pixelClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * encodedPixelRDO.size(), encodedPixelRDO.data(), NULL, &cl_evt_write_pixel_input);
421  acc_queue.enqueueWriteBuffer(stripClusterInputBuffer, CL_FALSE, 0, sizeof(uint64_t) * encodedStripRDO.size(), encodedStripRDO.data(), NULL, &cl_evt_write_strip_input);
422  std::vector<cl::Event> cl_evt_vec_pixel_input{cl_evt_write_pixel_input};
423  std::vector<cl::Event> cl_evt_vec_strip_input{cl_evt_write_strip_input};
424  // Ideally, `finish` shouldn't be here because the kernels are invoked by event dependencies,
425  // but we use this to temporarily enable kernel profiling.
426  acc_queue.finish();
427 
428  cl::Event cl_evt_pixel_clustering;
429  cl::Event cl_evt_strip_clustering;
430  cl::Event cl_evt_pixel_l2g;
431  cl::Event cl_evt_strip_l2g;
432  cl::Event cl_evt_edm_prep;
433  {
434  Athena::Chrono chrono("Kernel execution", m_chronoSvc.get());
435  acc_queue.enqueueTask(pixelClusteringKernel, &cl_evt_vec_pixel_input, &cl_evt_pixel_clustering);
436  acc_queue.enqueueTask(stripClusteringKernel, &cl_evt_vec_strip_input, &cl_evt_strip_clustering);
437 
438  std::vector<cl::Event> cl_evt_vec_pixel_clustering{cl_evt_pixel_clustering};
439  std::vector<cl::Event> cl_evt_vec_strip_clustering{cl_evt_strip_clustering};
440  acc_queue.enqueueTask(pixelL2GKernel, &cl_evt_vec_pixel_clustering, &cl_evt_pixel_l2g);
441  acc_queue.enqueueTask(stripL2GKernel, &cl_evt_vec_strip_clustering, &cl_evt_strip_l2g);
442  std::vector<cl::Event> cl_evt_vec_l2g{cl_evt_pixel_l2g, cl_evt_strip_l2g};
443 
444  acc_queue.enqueueTask(edmPrepKernel, &cl_evt_vec_l2g, &cl_evt_edm_prep);
445  // Ideally, `finish` shouldn't be here because the kernels are invoked by event dependencies,
446  // but we use this to temporarily enable kernel profiling. CPU wall time
447  acc_queue.finish();
448  }
449 
450  cl::Event cl_evt_pixel_cluster_output;
451  cl::Event cl_evt_strip_cluster_output;
452  acc_queue.enqueueReadBuffer(edmPixelOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * pixelChainOutput.size(), pixelChainOutput.data(), NULL, &cl_evt_pixel_cluster_output);
453  acc_queue.enqueueReadBuffer(edmStripOutputBuffer, CL_FALSE, 0, sizeof(uint64_t) * stripChainOutput.size(), stripChainOutput.data(), NULL, &cl_evt_strip_cluster_output);
454  acc_queue.finish();
455 
456  // calculate the time for the kernel execution
457  // get the time of writing pixel input buffer
458  cl_ulong pixel_input_start = cl_evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
459  cl_ulong pixel_input_end = cl_evt_write_pixel_input.getProfilingInfo<CL_PROFILING_COMMAND_END>();
460  cl_ulong pixel_input_time = pixel_input_end - pixel_input_start;
461  m_pixelInputTime += pixel_input_time;
462  ATH_MSG_DEBUG("Pixel input buffer write time: " << pixel_input_time / 1e6 << " ms");
463 
464  // get the time of writing strip input buffer
465  cl_ulong strip_input_start = cl_evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_START>();
466  cl_ulong strip_input_end = cl_evt_write_strip_input.getProfilingInfo<CL_PROFILING_COMMAND_END>();
467  cl_ulong strip_input_time = strip_input_end - strip_input_start;
468  m_stripInputTime += strip_input_time;
469  ATH_MSG_DEBUG("Strip input buffer write time: " << strip_input_time / 1e6 << " ms");
470 
471  // get the time of pixel clustering
472  cl_ulong pixel_clustering_start = cl_evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
473  cl_ulong pixel_clustering_end = cl_evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>();
474  cl_ulong pixel_clustering_time = pixel_clustering_end - pixel_clustering_start;
475  m_pixelClusteringTime += pixel_clustering_time;
476  ATH_MSG_DEBUG("Pixel clustering time: " << pixel_clustering_time / 1e6 << " ms");
477 
478  // get the time of strip clustering
479  cl_ulong strip_clustering_start = cl_evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_START>();
480  cl_ulong strip_clustering_end = cl_evt_strip_clustering.getProfilingInfo<CL_PROFILING_COMMAND_END>();
481  cl_ulong strip_clustering_time = strip_clustering_end - strip_clustering_start;
482  m_stripClusteringTime += strip_clustering_time;
483  ATH_MSG_DEBUG("Strip clustering time: " << strip_clustering_time / 1e6 << " ms");
484 
485  // get the time of pixel L2G
486  cl_ulong pixel_l2g_start = cl_evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
487  cl_ulong pixel_l2g_end = cl_evt_pixel_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>();
488  cl_ulong pixel_l2g_time = pixel_l2g_end - pixel_l2g_start;
489  m_pixelL2GTime += pixel_l2g_time;
490  ATH_MSG_DEBUG("Pixel L2G time: " << pixel_l2g_time / 1e6 << " ms");
491 
492  // get the time of strip L2G
493  cl_ulong strip_l2g_start = cl_evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_START>();
494  cl_ulong strip_l2g_end = cl_evt_strip_l2g.getProfilingInfo<CL_PROFILING_COMMAND_END>();
495  cl_ulong strip_l2g_time = strip_l2g_end - strip_l2g_start;
496  m_stripL2GTime += strip_l2g_time;
497  ATH_MSG_DEBUG("Strip L2G time: " << strip_l2g_time / 1e6 << " ms");
498 
499  // get the time of EDMPrep
500  cl_ulong edm_prep_start = cl_evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_START>();
501  cl_ulong edm_prep_end = cl_evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>();
502  cl_ulong edm_prep_time = edm_prep_end - edm_prep_start;
503  m_edmPrepTime += edm_prep_time;
504  ATH_MSG_DEBUG("EDMPrep time: " << edm_prep_time / 1e6 << " ms");
505 
506  // get the time of the whole kernel execution
507  cl_ulong kernel_start = cl_evt_pixel_clustering.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
508  cl_ulong kernel_end = cl_evt_edm_prep.getProfilingInfo<CL_PROFILING_COMMAND_END>();
509  cl_ulong kernel_time = kernel_end - kernel_start;
510  m_kernelTime += kernel_time;
511  ATH_MSG_DEBUG("Kernel execution time: " << kernel_time / 1e6 << " ms");
512 
513  // get the time of reading pixel output buffer
514  cl_ulong pixel_output_start = cl_evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
515  cl_ulong pixel_output_end = cl_evt_pixel_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>();
516  cl_ulong pixel_output_time = pixel_output_end - pixel_output_start;
517  m_pixelOutputTime += pixel_output_time;
518  ATH_MSG_DEBUG("Pixel output buffer read time: " << pixel_output_time / 1e6 << " ms");
519 
520  // get the time of reading strip output buffer
521  cl_ulong strip_output_start = cl_evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_START>();
522  cl_ulong strip_output_end = cl_evt_strip_cluster_output.getProfilingInfo<CL_PROFILING_COMMAND_END>();
523  cl_ulong strip_output_time = strip_output_end - strip_output_start;
524  m_stripOutputTime += strip_output_time;
525  ATH_MSG_DEBUG("Strip output buffer read time: " << strip_output_time / 1e6 << " ms");
526 
527  return StatusCode::SUCCESS;
528  }
529 
531  {
532  if (!m_runPassThrough)
533  {
534  ATH_MSG_INFO("Finalizing BenchmarkAlg");
535  ATH_MSG_INFO("Number of events: " << m_numEvents);
536  ATH_MSG_INFO("Pixel input time: " << m_pixelInputTime / m_numEvents / 1e6 << " ms");
537  ATH_MSG_INFO("Strip input time: " << m_stripInputTime / m_numEvents / 1e6 << " ms");
538  ATH_MSG_INFO("Pixel clustering time: " << m_pixelClusteringTime / m_numEvents / 1e6 << " ms");
539  ATH_MSG_INFO("Strip clustering time: " << m_stripClusteringTime / m_numEvents / 1e6 << " ms");
540  ATH_MSG_INFO("Pixel L2G time: " << m_pixelL2GTime / m_numEvents / 1e6 << " ms");
541  ATH_MSG_INFO("Strip L2G time: " << m_stripL2GTime / m_numEvents / 1e6 << " ms");
542  ATH_MSG_INFO("EDMPrep time: " << m_edmPrepTime / m_numEvents / 1e6 << " ms");
543  ATH_MSG_INFO("Kernel execution time: " << m_kernelTime / m_numEvents / 1e6 << " ms");
544  ATH_MSG_INFO("Pixel output time: " << m_pixelOutputTime / m_numEvents / 1e6 << " ms");
545  ATH_MSG_INFO("Strip output time: " << m_stripOutputTime / m_numEvents / 1e6 << " ms");
546  }
547 
548  return StatusCode::SUCCESS;
549  }
550 } // namespace EFTrackingFPGAIntegration
query_example.row
row
Definition: query_example.py:24
EFTrackingTransient::PixelClusterAuxInput::localPosition
std::vector< float > localPosition
Definition: EFTrackingTransient.h:225
EFTrackingFPGAIntegration::BenchmarkAlg::m_edmKernelName
Gaudi::Property< std::string > m_edmKernelName
Name of the FPGA kernel.
Definition: BenchmarkAlg.h:74
IntegrationBase::m_accelerator
cl::Device m_accelerator
Device object for the accelerator card.
Definition: IntegrationBase.h:66
IntegrationBase::initialize
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
Definition: IntegrationBase.cxx:16
EFTrackingFPGAIntegration::BenchmarkAlg::m_stripClusteringTime
std::atomic< cl_ulong > m_stripClusteringTime
Time for strip clustering.
Definition: BenchmarkAlg.h:96
EFTrackingTransient::PixelClusterAuxInput::channelsInEta
std::vector< int > channelsInEta
Definition: EFTrackingTransient.h:230
EFTrackingFPGAIntegration::BenchmarkAlg::m_chronoSvc
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
Definition: BenchmarkAlg.h:46
EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE
constexpr unsigned long PIXEL_CONTAINER_BUF_SIZE
Definition: EFTrackingTransient.h:34
EFTrackingTransient::PixelClusterAuxInput
The PixelClusterAuxInput struct is used to simplify the creaction of the xAOD::PixelClusterContainer.
Definition: EFTrackingTransient.h:222
EFTrackingFPGAIntegration::BenchmarkAlg::m_pixelL2GKernelName
Gaudi::Property< std::string > m_pixelL2GKernelName
Name of the pixel L2G kernel.
Definition: BenchmarkAlg.h:83
EFTrackingTransient::StripClusterAuxInput::globalPosition
std::vector< float > globalPosition
Definition: EFTrackingTransient.h:212
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
EFTrackingTransient::MAX_NUM_CLUSTERS
constexpr unsigned int MAX_NUM_CLUSTERS
Definition: EFTrackingTransient.h:27
SG::ReadHandle
Definition: StoreGate/StoreGate/ReadHandle.h:67
BenchmarkAlg.h
JiveXML::Event
struct Event_t Event
Definition: ONCRPCServer.h:65
IntegrationBase::m_context
cl::Context m_context
Context object for the application.
Definition: IntegrationBase.h:67
EFTrackingFPGAIntegration::BenchmarkAlg::m_pixelL2GTime
std::atomic< cl_ulong > m_pixelL2GTime
Time for pixel L2G.
Definition: BenchmarkAlg.h:97
EFTrackingTransient::StripClusterAuxInput::localCovariance
std::vector< float > localCovariance
Definition: EFTrackingTransient.h:209
AthCommonMsg< Gaudi::Algorithm >::msgLvl
bool msgLvl(const MSG::Level lvl) const
Definition: AthCommonMsg.h:30
EFTrackingFPGAIntegration::BenchmarkAlg::initialize
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
Definition: BenchmarkAlg.cxx:17
EFTrackingFPGAIntegration::BenchmarkAlg::m_inputPixelClusterKey
SG::ReadHandleKey< xAOD::PixelClusterContainer > m_inputPixelClusterKey
Key to access input pixel clusters.
Definition: BenchmarkAlg.h:61
Chrono.h
Exception-safe IChronoSvc caller.
EFTrackingFPGAIntegration::BenchmarkAlg::finalize
virtual StatusCode finalize() override final
Definition: BenchmarkAlg.cxx:530
EFTrackingTransient::StripClusterAuxInput::rdoList
std::vector< unsigned long long > rdoList
Definition: EFTrackingTransient.h:213
EFTrackingFPGAIntegration::BenchmarkAlg::m_pixelRDOKey
SG::ReadHandleKey< PixelRDO_Container > m_pixelRDOKey
Definition: BenchmarkAlg.h:67
EFTrackingFPGAIntegration::BenchmarkAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
Definition: BenchmarkAlg.cxx:46
EFTrackingFPGAIntegration::BenchmarkAlg::m_stripOutputTime
std::atomic< cl_ulong > m_stripOutputTime
Time for strip output buffer read.
Definition: BenchmarkAlg.h:101
python.checkMetadata.metadata
metadata
Definition: checkMetadata.py:175
SG::makeHandle
SG::ReadCondHandle< T > makeHandle(const SG::ReadCondHandleKey< T > &key, const EventContext &ctx=Gaudi::Hive::currentContext())
Definition: ReadCondHandle.h:274
EFTrackingFPGAIntegration::BenchmarkAlg::m_stripRDOKey
SG::ReadHandleKey< SCT_RDO_Container > m_stripRDOKey
Definition: BenchmarkAlg.h:69
EFTrackingTransient::PIXEL_BLOCK_BUF_SIZE
constexpr unsigned long PIXEL_BLOCK_BUF_SIZE
Definition: EFTrackingTransient.h:32
EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE
constexpr unsigned long STRIP_CONTAINER_BUF_SIZE
Definition: EFTrackingTransient.h:35
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
Athena::Chrono
Exception-safe IChronoSvc caller.
Definition: Chrono.h:50
EFTrackingTransient::PixelClusterAuxInput::rdoList
std::vector< unsigned long long > rdoList
Definition: EFTrackingTransient.h:228
dqt_zlumi_pandas.err
err
Definition: dqt_zlumi_pandas.py:182
lumiFormat.i
int i
Definition: lumiFormat.py:85
EFTrackingTransient::PixelClusterAuxInput::localCovariance
std::vector< float > localCovariance
Definition: EFTrackingTransient.h:226
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
SG::ReadHandle::get
const_pointer_type get() const
Dereference the pointer, but don't cache anything.
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
EFTrackingTransient::StripClusterAuxInput::idHash
std::vector< unsigned int > idHash
Definition: EFTrackingTransient.h:210
EFTrackingTransient::PixelClusterAuxInput::totalToT
std::vector< int > totalToT
Definition: EFTrackingTransient.h:235
EFTrackingFPGAIntegration::BenchmarkAlg::m_pixelInputTime
std::atomic< cl_ulong > m_pixelInputTime
Time for pixel input buffer write.
Definition: BenchmarkAlg.h:93
xAOD::uint64_t
uint64_t
Definition: EventInfo_v1.cxx:123
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
IntegrationBase::precheck
StatusCode precheck(const std::vector< Gaudi::Property< std::string >> &inputs) const
Check if the the desired Gaudi properties are set.
Definition: IntegrationBase.cxx:141
SG::VarHandleKey::initialize
StatusCode initialize(bool used=true)
If this object is used as a property, then this should be called during the initialize phase.
Definition: AthToolSupport/AsgDataHandles/Root/VarHandleKey.cxx:103
EFTrackingFPGAIntegration
Definition: BenchmarkAlg.h:28
EFTrackingFPGAIntegration::BenchmarkAlg::m_stripL2GKernelName
Gaudi::Property< std::string > m_stripL2GKernelName
Name of the strip L2G kernelS.
Definition: BenchmarkAlg.h:86
EFTrackingFPGAIntegration::BenchmarkAlg::m_FPGADataFormatTool
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
Tool for formatting FPGA data.
Definition: BenchmarkAlg.h:58
EFTrackingTransient::NUM_PIXEL_WORD
constexpr unsigned int NUM_PIXEL_WORD
Definition: EFTrackingTransient.h:28
EFTrackingFPGAIntegration::BenchmarkAlg::m_xclbin
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
Definition: BenchmarkAlg.h:71
SG::ReadHandle::isValid
virtual bool isValid() override final
Can the handle be successfully dereferenced?
EFTrackingTransient::PixelClusterAuxInput::idHash
std::vector< unsigned int > idHash
Definition: EFTrackingTransient.h:224
EFTrackingFPGAIntegration::BenchmarkAlg::m_testVectorTool
ToolHandle< TestVectorTool > m_testVectorTool
Tool for preparing test vectors.
Definition: BenchmarkAlg.h:55
IntegrationBase::loadProgram
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
Definition: IntegrationBase.cxx:102
EFTrackingFPGAIntegration::BenchmarkAlg::m_stripClusterKernelName
Gaudi::Property< std::string > m_stripClusterKernelName
Name of the strip clustering kernel.
Definition: BenchmarkAlg.h:80
EFTrackingTransient::StripClusterAuxInput
The StripClusterAuxInput struct is used to simplify the creaction of the xAOD::StripClusterContainer.
Definition: EFTrackingTransient.h:207
EFTrackingFPGAIntegration::BenchmarkAlg::m_xaodClusterMaker
ToolHandle< xAODClusterMaker > m_xaodClusterMaker
Tool for creating xAOD containers.
Definition: BenchmarkAlg.h:49
EFTrackingTransient::StripClusterAuxInput::localPosition
std::vector< float > localPosition
Definition: EFTrackingTransient.h:208
EFTrackingTransient.h
EFTrackingFPGAIntegration::BenchmarkAlg::m_numEvents
std::atomic< ulonglong > m_numEvents
Number of events processed.
Definition: BenchmarkAlg.h:92
EFTrackingTransient::PixelClusterAuxInput::channelsInPhi
std::vector< int > channelsInPhi
Definition: EFTrackingTransient.h:229
EFTrackingTransient::STRIP_BLOCK_BUF_SIZE
constexpr unsigned long STRIP_BLOCK_BUF_SIZE
Definition: EFTrackingTransient.h:33
EFTrackingFPGAIntegration::BenchmarkAlg::m_kernelTime
std::atomic< cl_ulong > m_kernelTime
Time for kernel execution.
Definition: BenchmarkAlg.h:102
EFTrackingFPGAIntegration::BenchmarkAlg::m_inputStripClusterKey
SG::ReadHandleKey< xAOD::StripClusterContainer > m_inputStripClusterKey
Key to access input strip clusters.
Definition: BenchmarkAlg.h:64
EFTrackingTransient::PixelClusterAuxInput::id
std::vector< long unsigned int > id
Definition: EFTrackingTransient.h:223
EFTrackingFPGAIntegration::BenchmarkAlg::m_stripInputTime
std::atomic< cl_ulong > m_stripInputTime
Time for strip input buffer write.
Definition: BenchmarkAlg.h:94
EFTrackingTransient::PixelClusterAuxInput::widthInEta
std::vector< float > widthInEta
Definition: EFTrackingTransient.h:231
EFTrackingFPGAIntegration::BenchmarkAlg::m_pixelClusterKernelName
Gaudi::Property< std::string > m_pixelClusterKernelName
Name of the pixel clustering kernel.
Definition: BenchmarkAlg.h:77
EFTrackingTransient::PixelClusterAuxInput::globalPosition
std::vector< float > globalPosition
Definition: EFTrackingTransient.h:227
EFTrackingFPGAIntegration::BenchmarkAlg::runPassThrough
StatusCode runPassThrough(std::vector< uint64_t > &pixelChainOutput, std::vector< uint64_t > &stripChainOutput, const EventContext &ctx) const
Definition: BenchmarkAlg.cxx:278
DEBUG
#define DEBUG
Definition: page_access.h:11
EFTrackingTransient::StripClusterAuxInput::id
std::vector< long unsigned int > id
Definition: EFTrackingTransient.h:211
EFTrackingTransient::StripClusterAuxInput::channelsInPhi
std::vector< int > channelsInPhi
Definition: EFTrackingTransient.h:214
EFTrackingTransient::NUM_STRIP_WORD
constexpr unsigned int NUM_STRIP_WORD
Definition: EFTrackingTransient.h:29
EFTrackingFPGAIntegration::BenchmarkAlg::m_pixelClusteringTime
std::atomic< cl_ulong > m_pixelClusteringTime
Time for pixel clustering.
Definition: BenchmarkAlg.h:95
EFTrackingFPGAIntegration::BenchmarkAlg::m_pixelOutputTime
std::atomic< cl_ulong > m_pixelOutputTime
Time for pixel output buffer read.
Definition: BenchmarkAlg.h:100
EFTrackingFPGAIntegration::BenchmarkAlg::m_stripL2GTime
std::atomic< cl_ulong > m_stripL2GTime
Time for strip L2G.
Definition: BenchmarkAlg.h:98
IntegrationBase::m_program
cl::Program m_program
Program object containing the kernel.
Definition: IntegrationBase.h:68
EFTrackingFPGAIntegration::BenchmarkAlg::m_edmPrepTime
std::atomic< cl_ulong > m_edmPrepTime
Time for EDM preparation.
Definition: BenchmarkAlg.h:99
EFTrackingTransient::PixelClusterAuxInput::omegaX
std::vector< float > omegaX
Definition: EFTrackingTransient.h:232
EFTrackingTransient::PixelClusterAuxInput::omegaY
std::vector< float > omegaY
Definition: EFTrackingTransient.h:233
EFTrackingFPGAIntegration::BenchmarkAlg::m_runPassThrough
Gaudi::Property< bool > m_runPassThrough
Run the pass-through kernel.
Definition: BenchmarkAlg.h:89
EFTrackingFPGAIntegration::BenchmarkAlg::runDataPrep
StatusCode runDataPrep(std::vector< uint64_t > &pixelChainOutput, std::vector< uint64_t > &stripChainOutput, const EventContext &ctx) const
Definition: BenchmarkAlg.cxx:339