ATLAS Offline Software
Loading...
Searching...
No Matches
FPGAStripClustering.cxx
Go to the documentation of this file.
1
4
7#include <CL/cl_ext.h>
8#include <iomanip>
9
20
21StatusCode FPGAStripClustering::execute(const EventContext &ctx) const {
22 ATH_MSG_DEBUG("Executing StripClustering for event slot: " << ctx.slot());
23
24 // Handle input data
25 auto stripRDOHandle = SG::makeHandle(m_stripRDOKey, ctx);
26 if (!stripRDOHandle.isValid()) {
27 ATH_MSG_ERROR("Failed to retrieve input data.");
28 return StatusCode::FAILURE;
29 }
30 std::vector<IdentifierHash> listOfIds;
31 std::vector<uint64_t> outputData;
32 if (!m_FPGADataFormatTool->convertStripHitsToFPGADataFormat(*stripRDOHandle, outputData, listOfIds, ctx)) {
33 ATH_MSG_ERROR("Failed to convert input data to FPGA-compatible format.");
34 return StatusCode::FAILURE;
35 }
36
37 // Debug input data
38 ATH_MSG_DEBUG("Encoded strip data size: " << outputData.size());
39 int line = 0;
40 for (const auto& var : outputData) {
41 ATH_MSG_DEBUG("EncodedData[" << std::dec << std::setw(4) << line << "] = 0x"
42 << std::hex << std::setfill('0') << std::setw(16) << var << std::setfill(' '));
43 line++;
44 }
45
46 // Prepare output vector (same size as input data)
47 std::vector<uint64_t> kernelOutput(outputData.size(), 0);
48 std::vector<uint64_t> kernelEDMOutput(outputData.size()*10, 0);
49 std::vector<uint64_t> kernel_L2GOutput(outputData.size(), 0);
50 std::vector<uint64_t> kernel_L2GEDMOutput(outputData.size()*10, 0);
51 std::vector<uint32_t> kernel_EDMPrepOutputPixel(outputData.size(), 0);
52 // Create host side output vectors
53 std::vector<uint64_t> pixelOutput(EFTrackingTransient::PIXEL_CONTAINER_BUF_SIZE, 0);
54 std::vector<uint64_t> stripOutput(EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE, 0);
55
56 ATH_MSG_DEBUG("strip buffer size: "<<std::dec <<EFTrackingTransient::STRIP_CONTAINER_BUF_SIZE);
57
58 // Work with the FPGA accelerator
59 cl_int err = 0;
60
61 // Allocate buffers on the accelerator
62 cl::Buffer inputBuffer(m_context, CL_MEM_READ_ONLY, sizeof(uint64_t) * outputData.size(), nullptr, &err);
63 cl::Buffer outputBuffer(m_context, CL_MEM_READ_WRITE, sizeof(uint64_t) * kernelOutput.size(), nullptr, &err);
64 cl::Buffer outputEDMBuffer(m_context, CL_MEM_READ_WRITE, sizeof(uint64_t) * kernelEDMOutput.size(), nullptr, &err);
65
66 cl::Buffer output_L2GBuffer(m_context, CL_MEM_WRITE_ONLY, sizeof(uint64_t) * kernel_L2GOutput.size(), nullptr, &err);
67 cl::Buffer output_L2GEDMBuffer(m_context, CL_MEM_READ_WRITE, sizeof(uint64_t) * kernel_L2GEDMOutput.size(), nullptr, &err);
68
69
70 cl::Buffer inputBufferPixel(m_context, CL_MEM_READ_ONLY, sizeof(uint64_t) * outputData.size(), nullptr, &err);
71 cl::Buffer output_EDMPrepBufferPixel(m_context, CL_MEM_WRITE_ONLY, sizeof(uint64_t) * kernel_EDMPrepOutputPixel.size(), nullptr, &err);
72
73 cl::Buffer stripbuffer(m_context, CL_MEM_READ_WRITE, stripOutput.size() * sizeof(uint64_t), NULL, &err);
74
75 // Prepare and configure the kernel
76 cl::Kernel kernel(m_program, m_kernelName.value().c_str(), &err);
77 kernel.setArg(0, inputBuffer);
78 kernel.setArg(1, outputBuffer);
79 kernel.setArg(2, outputEDMBuffer);
80 kernel.setArg(3, static_cast<unsigned int>(outputData.size()));
81
82 cl::Kernel kernel_L2G(m_program, "l2g_strip_tool", &err);
83 kernel_L2G.setArg(0, outputBuffer);
84 kernel_L2G.setArg(1, outputEDMBuffer);
85 kernel_L2G.setArg(2, output_L2GBuffer);
86 kernel_L2G.setArg(3, output_L2GEDMBuffer);
87
88 cl::Kernel kernel_EMDPrep(m_program, "EDMPrep", &err);
89 kernel_EMDPrep.setArg(0, inputBufferPixel);
90 kernel_EMDPrep.setArg(1, output_L2GEDMBuffer);
91 kernel_EMDPrep.setArg(2, output_EDMPrepBufferPixel);
92 kernel_EMDPrep.setArg(3, stripbuffer);
93
94
95 // Command queue for the accelerator
96 cl::CommandQueue queue(m_context, m_accelerator, 0, &err);
97
98 // Enqueue write, execute kernel, and read back results
99 queue.enqueueWriteBuffer(inputBuffer, CL_TRUE, 0, sizeof(uint64_t) * outputData.size(), outputData.data());
100 queue.enqueueTask(kernel);
101 queue.finish();
102 queue.enqueueTask(kernel_L2G);
103 queue.finish();
104 queue.enqueueTask(kernel_EMDPrep);
105 queue.finish();
106 queue.enqueueReadBuffer(outputBuffer, CL_TRUE, 0, sizeof(uint64_t) * kernelOutput.size(), kernelOutput.data());
107 queue.enqueueReadBuffer(outputEDMBuffer, CL_TRUE, 0, sizeof(uint64_t) * kernelEDMOutput.size(), kernelEDMOutput.data());
108 queue.enqueueReadBuffer(output_L2GBuffer, CL_TRUE, 0, sizeof(uint64_t) * kernel_L2GOutput.size(), kernel_L2GOutput.data());
109 queue.enqueueReadBuffer(output_L2GEDMBuffer, CL_TRUE, 0, sizeof(uint64_t) * kernel_L2GEDMOutput.size(), kernel_L2GEDMOutput.data());
110 queue.enqueueReadBuffer(stripbuffer, CL_TRUE, 0, stripOutput.size() * sizeof(uint64_t), stripOutput.data());
111 queue.finish();
112
113 // Debug output data
114 ATH_MSG_DEBUG("Kernel execution completed.");
115 line = 0;
116 for (const auto& var : kernelOutput) {
117 if(var != 0) ATH_MSG_DEBUG("Clustering KernelOutput[" << std::dec << std::setw(4) << line << "] = 0x"
118 << std::hex << std::setfill('0') << std::setw(16) << var << std::setfill(' '));
119 line++;
120 }
121 line = 0;
122 for (const auto& var : kernelEDMOutput) {
123 if(var != 0) ATH_MSG_DEBUG("Clustering KernelEDMOutput[" << std::dec << std::setw(4) << line << "] = 0x"
124 << std::hex << std::setfill('0') << std::setw(16) << var << std::setfill(' '));
125 line++;
126 }
127
128 line = 0;
129 for (const auto& var : kernel_L2GOutput) {
130 if(var != 0) ATH_MSG_DEBUG("L2G KernelOutput[" << std::dec << std::setw(4) << line << "] = 0x"
131 << std::hex << std::setfill('0') << std::setw(16) << var << std::setfill(' '));
132 line++;
133 }
134 line = 0;
135 for (const auto& var : kernel_L2GEDMOutput) {
136 if(var != 0) ATH_MSG_DEBUG("L2g KernelEDMOutput[" << std::dec << std::setw(4) << line << "] = 0x"
137 << std::hex << std::setfill('0') << std::setw(16) << var << std::setfill(' '));
138 line++;
139 }
140 line = 0;
141 for (const auto& var : stripOutput) {
142 if(var != 0) ATH_MSG_DEBUG("EDM Prep kernel_EMDPrep[" << std::dec << std::setw(4) << line << "] = 0x"
143 << std::hex << std::setfill('0') << std::setw(16) << var << std::setfill(' '));
144 line++;
145 }
146 ATH_MSG_DEBUG("Total lines: "<<std::dec<<line<<std::endl);
147
148
149
150
151
152 // use 32-bit point to access output
153 uint64_t *stripClusters = (uint64_t *)stripOutput.data();
154
155 unsigned int numStripClusters = stripClusters[0];
156 ATH_MSG_DEBUG("numStripClusters: " << numStripClusters);
157
158
159 std::unique_ptr<EFTrackingTransient::Metadata> metadata =
160 std::make_unique<EFTrackingTransient::Metadata>();
161
162 metadata->numOfStripClusters = numStripClusters;
163 metadata->scRdoIndexSize = numStripClusters;
164
166
167 // Declare a few vairiables to be used in the loop
168 int row = 0;
169 uint64_t rdo;
170 int rdoCounter = 0;
171
172 // Make strip cluster aux input
173 {
174 for (unsigned int i = 0; i < numStripClusters; i++)
175 {
176 rdoCounter = 0;
177 row = 0; // idhash
178 scAux.idHash.push_back(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
179 row = 1; // id
180 scAux.id.push_back(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
181 row = 2; // rdo w1
182 rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
183 if (rdo)
184 {
185 scAux.rdoList.push_back(rdo);
186 rdoCounter++;
187 }
188 row = 3; // rdo w2
189 rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
190 if (rdo)
191 {
192 scAux.rdoList.push_back(rdo);
193 rdoCounter++;
194 }
195 row = 4; // rdo w3
196 rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
197 if (rdo)
198 {
199 scAux.rdoList.push_back(rdo);
200 rdoCounter++;
201 }
202 row = 5; // rdo w4
203 rdo = stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8];
204 if (rdo)
205 {
206 scAux.rdoList.push_back(rdo);
207 rdoCounter++;
208 }
209 row = 6; // local x
210 scAux.localPosition.push_back(std::bit_cast<double>(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]));
211
212 ATH_MSG_DEBUG("Strip [" << std::dec << "] "<<row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8 << " " << std::hex << stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
213 row = 8; // local covariance xx
214 scAux.localCovariance.push_back(std::bit_cast<double>(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]));
215 row = 9; // global x
216 scAux.globalPosition.push_back(std::bit_cast<double>(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]));
217 row = 10; // global y
218 scAux.globalPosition.push_back(std::bit_cast<double>(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]));
219 row = 11; // global z
220 scAux.globalPosition.push_back(std::bit_cast<double>(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]));
221 row = 12; // channels in phi
222 scAux.channelsInPhi.push_back(stripClusters[row * EFTrackingTransient::MAX_NUM_CLUSTERS + i + 8]);
223
224 metadata->scRdoIndex[i] = rdoCounter;
225 }
226 // print out the strip cluster aux input
227 if (msgLvl(MSG::DEBUG))
228 {
229 for (unsigned int i = 0; i < numStripClusters; i++)
230 {
231 ATH_MSG_DEBUG("Strip cluster " << i << " idHash: " << scAux.idHash[i]);
232 ATH_MSG_DEBUG("Strip cluster " << i << std::hex << " id: " << scAux.id[i]);
233 ATH_MSG_DEBUG("Strip cluster " << i << std::dec << " localPosition x: " << scAux.localPosition[i]);
234 ATH_MSG_DEBUG("Strip cluster " << i << " localCovariance: " << scAux.localCovariance[i]);
235 ATH_MSG_DEBUG("Strip cluster " << i << " globalPosition x: " << scAux.globalPosition[i * 3]);
236 ATH_MSG_DEBUG("Strip cluster " << i << " globalPosition y: " << scAux.globalPosition[i * 3 + 1]);
237 ATH_MSG_DEBUG("Strip cluster " << i << " globalPosition z: " << scAux.globalPosition[i * 3 + 2]);
238 ATH_MSG_DEBUG("Strip cluster " << i << " channelsInPhi: " << scAux.channelsInPhi[i]);
239 ATH_MSG_DEBUG("Strip cluster " << i << " rdoList size: " << metadata->scRdoIndex[i]);
240 }
241 }
242 }
243
244
245
246
247 return StatusCode::SUCCESS;
248}
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_DEBUG(x)
bool msgLvl(const MSG::Level lvl) const
StatusCode execute(const EventContext &ctx) const override
Should be overriden by derived classes to perform meaningful work.
StatusCode initialize() override
Implementation of strip clustering using FPGA acceleration.
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
Gaudi::Property< std::string > m_refTV
Reference TestVector.
Gaudi::Property< std::string > m_kernelName
Kernel name.
Gaudi::Property< std::string > m_inputTV
Input TestVector.
SG::ReadHandleKey< SCT_RDO_Container > m_stripRDOKey
Input data key.
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
cl::Program m_program
Program object containing the kernel.
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
cl::Context m_context
Context object for the application.
StatusCode precheck(const std::vector< Gaudi::Property< std::string > > &inputs) const
Check if the the desired Gaudi properties are set.
cl::Device m_accelerator
Device object for the accelerator card.
constexpr unsigned int MAX_NUM_CLUSTERS
constexpr uint32_t STRIP_CONTAINER_BUF_SIZE
constexpr uint32_t PIXEL_CONTAINER_BUF_SIZE
SG::ReadCondHandle< T > makeHandle(const SG::ReadCondHandleKey< T > &key, const EventContext &ctx=Gaudi::Hive::currentContext())
The StripClusterAuxInput struct is used to simplify the creaction of the xAOD::StripClusterContainer.
std::vector< unsigned long long > rdoList