38 std::vector<uint64_t> inbufPixClustVec;
39 std::vector<IdentifierHash> listOfIds;
40 if(!
m_FPGADataFormatTool->convertPixelHitsToFPGADataFormat(*pixelRDOHandle, inbufPixClustVec, listOfIds, ctx)) {
41 return StatusCode::FAILURE;
44 unsigned int inbufPixClustSize = inbufPixClustVec.size();
48 cl::CommandQueue queuePixClust{};
51 cl::Kernel kernelPixClust{};
54 cl::Buffer clInbufPixClust{};
55 std::unique_ptr<uint64_t,
decltype(std::free) *> inbufPixClust(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * inbufPixClustSize)), std::free);
56 for (
unsigned int i = 0; i < inbufPixClustSize; ++i) {
57 inbufPixClust.get()[i] = inbufPixClustVec.at(i);
58 ATH_MSG_DEBUG(
"inbufPixClust[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << inbufPixClust.get()[i] << std::setfill(
' ') << std::dec);
61 inbufPixClustVec.resize(1);
62 inbufPixClustVec.clear();
63 OCL_CHECK(err, clInbufPixClust = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * inbufPixClustSize, inbufPixClust.get(), &err));
64 OCL_CHECK(err, err = kernelPixClust.setArg(0, clInbufPixClust));
65 cl::Buffer clOutbufPixClust{};
66 unsigned int outbufPixClustSize = inbufPixClustSize + 9164 + 6;
67 std::unique_ptr<uint64_t,
decltype(std::free) *> outbufPixClust(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufPixClustSize)), std::free);
68 OCL_CHECK(err, clOutbufPixClust = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufPixClustSize, outbufPixClust.get(), &err));
69 OCL_CHECK(err, err = kernelPixClust.setArg(1, clOutbufPixClust));
70 cl::Buffer clOutbufEDMPixClust{};
71 unsigned int outbufEDMPixClustSize = inbufPixClustSize * 10 + 6;
72 std::unique_ptr<uint64_t,
decltype(std::free) *> outbufEDMPixClust(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
73 OCL_CHECK(err, clOutbufEDMPixClust = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, outbufEDMPixClust.get(), &err));
74 OCL_CHECK(err, err = kernelPixClust.setArg(2, clOutbufEDMPixClust));
76 ATH_MSG_INFO(
"Making clusters out of " << inbufPixClustSize - 6 <<
" channels over threshold");
78 auto write_start = std::chrono::high_resolution_clock::now();
79 OCL_CHECK(err, err = queuePixClust.enqueueMigrateMemObjects({clInbufPixClust}, 0));
80 OCL_CHECK(err, err = queuePixClust.finish());
81 auto write_end = std::chrono::high_resolution_clock::now();
84 OCL_CHECK(err, err = queuePixClust.enqueueTask(kernelPixClust));
85 OCL_CHECK(err, err = queuePixClust.finish());
86 auto compute_end = std::chrono::high_resolution_clock::now();
89 OCL_CHECK(err, err = queuePixClust.enqueueMigrateMemObjects({clOutbufPixClust}, CL_MIGRATE_MEM_OBJECT_HOST));
90 OCL_CHECK(err, err = queuePixClust.enqueueMigrateMemObjects({clOutbufEDMPixClust}, CL_MIGRATE_MEM_OBJECT_HOST));
91 OCL_CHECK(err, err = queuePixClust.finish());
92 auto read_end = std::chrono::high_resolution_clock::now();
94 auto total_latency = std::chrono::duration<float, std::micro>(read_end - write_start).count();
95 auto write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
96 auto compute_latency = std::chrono::duration<float, std::micro>(compute_end - write_end).count();
97 auto read_latency = std::chrono::duration<float, std::micro>(read_end - compute_end).count();
98 ATH_MSG_INFO(
"Total latency:" << total_latency <<
" us, " <<
"Compute latency: " << compute_latency <<
" us");
99 ATH_MSG_INFO(
"Write latency:" << write_latency <<
" us, " <<
"Read latency: " << read_latency <<
" us");
102 cl::CommandQueue queuePixCoords{};
104 cl::Kernel kernelPixCoords{};
107 cl::Buffer clInbufPixCoords{};
108 std::unique_ptr<uint64_t,
decltype(std::free) *> inbufPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufPixClustSize)), std::free);
109 for (
unsigned int i = 0; i < outbufPixClustSize; ++i) {
110 ATH_MSG_DEBUG(
"outbufPixClust[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufPixClust.get()[i] << std::setfill(
' ') << std::dec);
111 inbufPixCoords.get()[i] = outbufPixClust.get()[i];
113 OCL_CHECK(err, clInbufPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * outbufPixClustSize, inbufPixCoords.get(), &err));
114 OCL_CHECK(err, err = kernelPixCoords.setArg(0, clInbufPixCoords));
116 cl::Buffer clInbufEDMPixCoords{};
117 std::unique_ptr<uint64_t,
decltype(std::free) *> inbufEDMPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
118 for (
unsigned int i = 0; i < outbufEDMPixClustSize; ++i) {
119 ATH_MSG_DEBUG(
"outbufEDMPixClust[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufEDMPixClust.get()[i] << std::setfill(
' ') << std::dec);
120 inbufEDMPixCoords.get()[i] = outbufEDMPixClust.get()[i];
122 if ((i > 2) && ((i - 3) % 10 == 0) && (((outbufEDMPixClust.get()[i] & (((1ULL << 8) - 1ULL) << 56)) >> 56) == 0xcd) && (((outbufEDMPixClust.get()[i - 1] & (((1ULL << 1) - 1ULL) << 25)) >> 25))) {
123 outbufEDMPixClustSize = i + 3;
126 OCL_CHECK(err, clInbufEDMPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, inbufEDMPixCoords.get(), &err));
127 OCL_CHECK(err, err = kernelPixCoords.setArg(1, clInbufEDMPixCoords));
129 cl::Buffer clOutbufPixCoords{};
130 unsigned int outbufPixCoordsSize = inbufPixClustSize * 2 + 9164 + 6;
131 std::unique_ptr<uint64_t,
decltype(std::free) *> outbufPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufPixCoordsSize)), std::free);
132 OCL_CHECK(err, clOutbufPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufPixCoordsSize, outbufPixCoords.get(), &err));
133 OCL_CHECK(err, err = kernelPixCoords.setArg(2, clOutbufPixCoords));
134 cl::Buffer clOutbufEDMPixCoords{};
135 std::unique_ptr<uint64_t,
decltype(std::free) *> outbufEDMPixCoords(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
136 OCL_CHECK(err, clOutbufEDMPixCoords = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, outbufEDMPixCoords.get(), &err));
137 OCL_CHECK(err, err = kernelPixCoords.setArg(3, clOutbufEDMPixCoords));
140 write_start = std::chrono::high_resolution_clock::now();
141 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clInbufPixCoords}, 0));
142 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clInbufEDMPixCoords}, 0));
143 OCL_CHECK(err, err = queuePixCoords.finish());
144 write_end = std::chrono::high_resolution_clock::now();
147 OCL_CHECK(err, err = queuePixCoords.enqueueTask(kernelPixCoords));
148 OCL_CHECK(err, err = queuePixCoords.finish());
149 compute_end = std::chrono::high_resolution_clock::now();
152 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clOutbufPixCoords}, CL_MIGRATE_MEM_OBJECT_HOST));
153 OCL_CHECK(err, err = queuePixCoords.enqueueMigrateMemObjects({clOutbufEDMPixCoords}, CL_MIGRATE_MEM_OBJECT_HOST));
154 OCL_CHECK(err, err = queuePixCoords.finish());
155 read_end = std::chrono::high_resolution_clock::now();
157 total_latency = std::chrono::duration<float, std::micro>(read_end - write_start).count();
158 write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
159 compute_latency = std::chrono::duration<float, std::micro>(compute_end - write_end).count();
160 read_latency = std::chrono::duration<float, std::micro>(read_end - compute_end).count();
161 ATH_MSG_INFO(
"Total latency:" << total_latency <<
" us, " <<
"Compute latency: " << compute_latency <<
" us");
162 ATH_MSG_INFO(
"Write latency:" << write_latency <<
" us, " <<
"Read latency: " << read_latency <<
" us");
163 ATH_MSG_INFO(
"Coordinates calculated, transform to xAOD::PixelCluster compatible formats");
165 for (
unsigned int i = 0; i < outbufPixCoordsSize; ++i) {
166 ATH_MSG_DEBUG(
"outbufPixCoords[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufPixCoords.get()[i] << std::setfill(
' ') << std::dec);
169 cl::CommandQueue queuePixEDMPrep{};
171 cl::Kernel kernelPixEDMPrep{};
174 cl::Buffer clInbufEDMPixEDMPrep{};
175 std::unique_ptr<uint64_t,
decltype(std::free) *> inbufEDMPixEDMPrep(
static_cast<uint64_t*
>(aligned_alloc(64,
sizeof(uint64_t) * outbufEDMPixClustSize)), std::free);
176 for (
unsigned int i = 0; i < outbufEDMPixClustSize; ++i) {
177 inbufEDMPixEDMPrep.get()[i] = outbufEDMPixCoords.get()[i];
178 ATH_MSG_DEBUG(
"inbufEDMPixEDMPrep[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << inbufEDMPixEDMPrep.get()[i] << std::setfill(
' ') << std::dec);
180 OCL_CHECK(err, clInbufEDMPixEDMPrep = cl::Buffer(
m_context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(uint64_t) * outbufEDMPixClustSize, inbufEDMPixEDMPrep.get(), &err));
181 OCL_CHECK(err, err = kernelPixEDMPrep.setArg(0, clInbufEDMPixEDMPrep));
183 cl::Buffer clOutbufEDMPixEDMPrep{};
186 OCL_CHECK(err, err = kernelPixEDMPrep.setArg(1, clOutbufEDMPixEDMPrep));
189 write_start = std::chrono::high_resolution_clock::now();
190 OCL_CHECK(err, err = queuePixEDMPrep.enqueueMigrateMemObjects({clInbufEDMPixEDMPrep}, 0));
191 OCL_CHECK(err, err = queuePixEDMPrep.finish());
192 write_end = std::chrono::high_resolution_clock::now();
195 OCL_CHECK(err, err = queuePixEDMPrep.enqueueTask(kernelPixEDMPrep));
196 OCL_CHECK(err, err = queuePixEDMPrep.finish());
197 compute_end = std::chrono::high_resolution_clock::now();
200 OCL_CHECK(err, err = queuePixEDMPrep.enqueueMigrateMemObjects({clOutbufEDMPixEDMPrep}, CL_MIGRATE_MEM_OBJECT_HOST));
201 OCL_CHECK(err, err = queuePixEDMPrep.finish());
202 read_end = std::chrono::high_resolution_clock::now();
205 ATH_MSG_DEBUG(
"outbufEDMPixEDMPrep[" << std::setw(6) << i <<
"] = 0x" << std::hex << std::setfill(
'0') << std::setw(16) << outbufEDMPixEDMPrep.get()[i] << std::setfill(
' ') << std::dec);
207 total_latency = std::chrono::duration<float, std::micro>(read_end - write_start).count();
208 write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
209 compute_latency = std::chrono::duration<float, std::micro>(compute_end - write_end).count();
210 read_latency = std::chrono::duration<float, std::micro>(read_end - compute_end).count();
211 ATH_MSG_INFO(
"Total latency:" << total_latency <<
" us, " <<
"Compute latency: " << compute_latency <<
" us");
212 ATH_MSG_INFO(
"Write latency:" << write_latency <<
" us, " <<
"Read latency: " << read_latency <<
" us");
213 ATH_MSG_INFO(
"Received data for making xAOD::PixelCluster's");
215 uint64_t* pixelClusters = (uint64_t*)outbufEDMPixEDMPrep.get();
216 unsigned int numClusters = pixelClusters[0];
217 ATH_MSG_INFO(
"Received " << numClusters <<
" clusters");
219 write_start = std::chrono::high_resolution_clock::now();
220 std::unique_ptr<EFTrackingTransient::Metadata> metadata = std::make_unique<EFTrackingTransient::Metadata>();
222 metadata->numOfPixelClusters = numClusters;
223 metadata->pcRdoIndexSize = numClusters;
229 for (
unsigned int i = 0; i < numClusters; ++i) {
318 metadata->pcRdoIndex[i] = rdoCounter;
322 write_end = std::chrono::high_resolution_clock::now();
323 write_latency = std::chrono::duration<float, std::micro>(write_end - write_start).count();
324 ATH_MSG_INFO(
"xAOD::PixelClusterContainer made in " << write_latency <<
" us");
326 return StatusCode::SUCCESS;