Should be overriden by derived classes to perform meaningful work.
33{
35
37
38 std::vector<uint64_t> inbufPixClustVec;
39 std::vector<IdentifierHash> listOfIds;
40 if(!
m_FPGADataFormatTool->convertPixelHitsToFPGADataFormat(*pixelRDOHandle, inbufPixClustVec, listOfIds, ctx)) {
41 return StatusCode::FAILURE;
42 }
43
44 unsigned int inbufPixClustSize = inbufPixClustVec.size();
45
47
48 cl::CommandQueue queuePixClust{};
50
51 cl::Kernel kernelPixClust{};
53
54 cl::Buffer clInbufPixClust{};
55 std::unique_ptr<
uint64_t,
decltype(std::free) *> inbufPixClust(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * inbufPixClustSize)), std::free);
56 for (
unsigned int i = 0;
i < inbufPixClustSize; ++
i) {
57 inbufPixClust.get()[
i] = inbufPixClustVec.at(i);
58 ATH_MSG_DEBUG(
"inbufPixClust[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << inbufPixClust.get()[i] << std::setfill(
' ') << std::dec);
59 }
60
61 inbufPixClustVec.resize(1);
62 inbufPixClustVec.clear();
63 OCL_CHECK(err, clInbufPixClust = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * inbufPixClustSize, inbufPixClust.get(), &err));
64 OCL_CHECK(err, err = kernelPixClust.setArg(0, clInbufPixClust));
65 cl::Buffer clOutbufPixClust{};
66 unsigned int outbufPixClustSize = inbufPixClustSize + 9164 + 6;
67 std::unique_ptr<
uint64_t,
decltype(std::free) *> outbufPixClust(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufPixClustSize)), std::free);
68 OCL_CHECK(err, clOutbufPixClust = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufPixClustSize, outbufPixClust.get(), &err));
69 OCL_CHECK(err, err = kernelPixClust.setArg(1, clOutbufPixClust));
70 cl::Buffer clOutbufEDMPixClust{};
71 unsigned int outbufEDMPixClustSize = inbufPixClustSize * 10 + 6;
72 std::unique_ptr<
uint64_t,
decltype(std::free) *> outbufEDMPixClust(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
73 OCL_CHECK(err, clOutbufEDMPixClust = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, outbufEDMPixClust.get(), &err));
74 OCL_CHECK(err, err = kernelPixClust.setArg(2, clOutbufEDMPixClust));
75
76 ATH_MSG_INFO(
"Making clusters out of " << inbufPixClustSize - 6 <<
" channels over threshold");
77
78 auto write_start = std::chrono::high_resolution_clock::now();
79 OCL_CHECK(err, err = queuePixClust.enqueueMigrateMemObjects({clInbufPixClust}, 0));
80 OCL_CHECK(err, err = queuePixClust.finish());
81 auto write_end = std::chrono::high_resolution_clock::now();
82
83
84 OCL_CHECK(err, err = queuePixClust.enqueueTask(kernelPixClust));
85 OCL_CHECK(err, err = queuePixClust.finish());
86 auto compute_end = std::chrono::high_resolution_clock::now();
87
88
89 OCL_CHECK(err, err = queuePixClust.enqueueMigrateMemObjects({clOutbufPixClust}, CL_MIGRATE_MEM_OBJECT_HOST));
90 OCL_CHECK(err, err = queuePixClust.enqueueMigrateMemObjects({clOutbufEDMPixClust}, CL_MIGRATE_MEM_OBJECT_HOST));
91 OCL_CHECK(err, err = queuePixClust.finish());
92 auto read_end = std::chrono::high_resolution_clock::now();
93
94 auto total_latency = std::chrono::duration<float, std::micro>(read_end - write_start).count();
95 auto write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
96 auto compute_latency = std::chrono::duration<float, std::micro>(compute_end - write_end).count();
97 auto read_latency = std::chrono::duration<float, std::micro>(read_end - compute_end).count();
98 ATH_MSG_INFO(
"Total latency:" << total_latency <<
" us, " <<
"Compute latency: " << compute_latency <<
" us");
99 ATH_MSG_INFO(
"Write latency:" << write_latency <<
" us, " <<
"Read latency: " << read_latency <<
" us");
101
102 cl::CommandQueue queuePixCoords{};
104 cl::Kernel kernelPixCoords{};
106
107 cl::Buffer clInbufPixCoords{};
108 std::unique_ptr<
uint64_t,
decltype(std::free) *> inbufPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufPixClustSize)), std::free);
109 for (
unsigned int i = 0;
i < outbufPixClustSize; ++
i) {
110 ATH_MSG_DEBUG(
"outbufPixClust[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufPixClust.get()[i] << std::setfill(
' ') << std::dec);
111 inbufPixCoords.get()[
i] = outbufPixClust.get()[
i];
112 }
113 OCL_CHECK(err, clInbufPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * outbufPixClustSize, inbufPixCoords.get(), &err));
114 OCL_CHECK(err, err = kernelPixCoords.setArg(0, clInbufPixCoords));
115
116 cl::Buffer clInbufEDMPixCoords{};
117 std::unique_ptr<
uint64_t,
decltype(std::free) *> inbufEDMPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
118 for (
unsigned int i = 0;
i < outbufEDMPixClustSize; ++
i) {
119 ATH_MSG_DEBUG(
"outbufEDMPixClust[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufEDMPixClust.get()[i] << std::setfill(
' ') << std::dec);
120 inbufEDMPixCoords.get()[
i] = outbufEDMPixClust.get()[
i];
121
122 if ((i > 2) && ((i - 3) % 10 == 0) && (((outbufEDMPixClust.get()[i] & (((1ULL << 8) - 1ULL) << 56)) >> 56) == 0xcd) && (((outbufEDMPixClust.get()[i - 1] & (((1ULL << 1) - 1ULL) << 25)) >> 25))) {
123 outbufEDMPixClustSize =
i + 3;
124 }
125 }
126 OCL_CHECK(err, clInbufEDMPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, inbufEDMPixCoords.get(), &err));
127 OCL_CHECK(err, err = kernelPixCoords.setArg(1, clInbufEDMPixCoords));
128
129 cl::Buffer clOutbufPixCoords{};
130 unsigned int outbufPixCoordsSize = inbufPixClustSize * 2 + 9164 + 6;
131 std::unique_ptr<
uint64_t,
decltype(std::free) *> outbufPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufPixCoordsSize)), std::free);
132 OCL_CHECK(err, clOutbufPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufPixCoordsSize, outbufPixCoords.get(), &err));
133 OCL_CHECK(err, err = kernelPixCoords.setArg(2, clOutbufPixCoords));
134 cl::Buffer clOutbufEDMPixCoords{};
135 std::unique_ptr<
uint64_t,
decltype(std::free) *> outbufEDMPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
136 OCL_CHECK(err, clOutbufEDMPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, outbufEDMPixCoords.get(), &err));
137 OCL_CHECK(err, err = kernelPixCoords.setArg(3, clOutbufEDMPixCoords));
138
139
140 write_start = std::chrono::high_resolution_clock::now();
141 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clInbufPixCoords}, 0));
142 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clInbufEDMPixCoords}, 0));
143 OCL_CHECK(err, err = queuePixCoords.finish());
144 write_end = std::chrono::high_resolution_clock::now();
145
146
147 OCL_CHECK(err, err = queuePixCoords.enqueueTask(kernelPixCoords));
148 OCL_CHECK(err, err = queuePixCoords.finish());
149 compute_end = std::chrono::high_resolution_clock::now();
150
151
152 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clOutbufPixCoords}, CL_MIGRATE_MEM_OBJECT_HOST));
153 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clOutbufEDMPixCoords}, CL_MIGRATE_MEM_OBJECT_HOST));
154 OCL_CHECK(err, err = queuePixCoords.finish());
155 read_end = std::chrono::high_resolution_clock::now();
156
157 total_latency = std::chrono::duration<float, std::micro>(read_end - write_start).count();
158 write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
159 compute_latency = std::chrono::duration<float, std::micro>(compute_end - write_end).count();
160 read_latency = std::chrono::duration<float, std::micro>(read_end - compute_end).count();
161 ATH_MSG_INFO(
"Total latency:" << total_latency <<
" us, " <<
"Compute latency: " << compute_latency <<
" us");
162 ATH_MSG_INFO(
"Write latency:" << write_latency <<
" us, " <<
"Read latency: " << read_latency <<
" us");
163 ATH_MSG_INFO(
"Coordinates calculated, transform to xAOD::PixelCluster compatible formats");
164
165 for (
unsigned int i = 0;
i < outbufPixCoordsSize; ++
i) {
166 ATH_MSG_DEBUG(
"outbufPixCoords[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufPixCoords.get()[i] << std::setfill(
' ') << std::dec);
167 }
168
169 cl::CommandQueue queuePixEDMPrep{};
171 cl::Kernel kernelPixEDMPrep{};
173
174 cl::Buffer clInbufEDMPixEDMPrep{};
175 std::unique_ptr<
uint64_t,
decltype(std::free) *> inbufEDMPixEDMPrep(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
176 for (
unsigned int i = 0;
i < outbufEDMPixClustSize; ++
i) {
177 inbufEDMPixEDMPrep.get()[
i] = outbufEDMPixCoords.get()[
i];
178 ATH_MSG_DEBUG(
"inbufEDMPixEDMPrep[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << inbufEDMPixEDMPrep.get()[i] << std::setfill(
' ') << std::dec);
179 }
180 OCL_CHECK(err, clInbufEDMPixEDMPrep = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, inbufEDMPixEDMPrep.get(), &err));
181 OCL_CHECK(err, err = kernelPixEDMPrep.setArg(0, clInbufEDMPixEDMPrep));
182
183 cl::Buffer clOutbufEDMPixEDMPrep{};
186 OCL_CHECK(err, err = kernelPixEDMPrep.setArg(1, clOutbufEDMPixEDMPrep));
187
188
189 write_start = std::chrono::high_resolution_clock::now();
190 OCL_CHECK(err, err = queuePixEDMPrep.enqueueMigrateMemObjects({clInbufEDMPixEDMPrep}, 0));
191 OCL_CHECK(err, err = queuePixEDMPrep.finish());
192 write_end = std::chrono::high_resolution_clock::now();
193
194
195 OCL_CHECK(err, err = queuePixEDMPrep.enqueueTask(kernelPixEDMPrep));
196 OCL_CHECK(err, err = queuePixEDMPrep.finish());
197 compute_end = std::chrono::high_resolution_clock::now();
198
199
200 OCL_CHECK(err, err = queuePixEDMPrep.enqueueMigrateMemObjects({clOutbufEDMPixEDMPrep}, CL_MIGRATE_MEM_OBJECT_HOST));
201 OCL_CHECK(err, err = queuePixEDMPrep.finish());
202 read_end = std::chrono::high_resolution_clock::now();
203
205 ATH_MSG_DEBUG(
"outbufEDMPixEDMPrep[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufEDMPixEDMPrep.get()[i] << std::setfill(
' ') << std::dec);
206
207 total_latency = std::chrono::duration<float, std::micro>(read_end - write_start).count();
208 write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
209 compute_latency = std::chrono::duration<float, std::micro>(compute_end - write_end).count();
210 read_latency = std::chrono::duration<float, std::micro>(read_end - compute_end).count();
211 ATH_MSG_INFO(
"Total latency:" << total_latency <<
" us, " <<
"Compute latency: " << compute_latency <<
" us");
212 ATH_MSG_INFO(
"Write latency:" << write_latency <<
" us, " <<
"Read latency: " << read_latency <<
" us");
213 ATH_MSG_INFO(
"Received data for making xAOD::PixelCluster's");
214
216 unsigned int numClusters = pixelClusters[0];
217 ATH_MSG_INFO(
"Received " << numClusters <<
" clusters");
218
219 write_start = std::chrono::high_resolution_clock::now();
220 std::unique_ptr<EFTrackingTransient::Metadata>
metadata = std::make_unique<EFTrackingTransient::Metadata>();
221
222 metadata->numOfPixelClusters = numClusters;
223 metadata->pcRdoIndexSize = numClusters;
224
225 EFTrackingTransient::PixelClusterAuxInput pcAux;
226
229 for (
unsigned int i = 0;
i < numClusters; ++
i) {
231 rdoCounter = 0;
235
239
242 if (rdo) {
244 rdoCounter++;
246 }
247
250 if (rdo) {
252 rdoCounter++;
254 }
255
258 if (rdo) {
260 rdoCounter++;
262 }
263
266 if (rdo) {
268 rdoCounter++;
270 }
271
275
279
283
287
291
295
299
303
307
311
317
319 }
320
322 write_end = std::chrono::high_resolution_clock::now();
323 write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
324 ATH_MSG_INFO(
"xAOD::PixelClusterContainer made in " << write_latency <<
" us");
325
326 return StatusCode::SUCCESS;
327}
#define ATH_CHECK
Evaluate an expression and check for errors.
#define OCL_CHECK(err, call)
cl::Program m_program
Program object containing the kernel.
cl::Context m_context
Context object for the application.
cl::Device m_accelerator
Device object for the accelerator card.
ToolHandle< xAODClusterMaker > m_xAODClusterMaker
SG::ReadHandleKey< PixelRDO_Container > m_pixelRDOKey
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
constexpr unsigned int MAX_NUM_CLUSTERS
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
row
Appending html table to final .html summary file.
SG::ReadCondHandle< T > makeHandle(const SG::ReadCondHandleKey< T > &key, const EventContext &ctx=Gaudi::Hive::currentContext())