ATLAS Offline Software
Loading...
Searching...
No Matches
F600IntegrationAlg.cxx
Go to the documentation of this file.
1/*
2Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
9
11{
12 std::string F600IntegrationAlg::get_cu_name(const std::string& kernel_name, int cu) {
13 std::string full_cu_name = kernel_name + ":{" + kernel_name + "_" + std::to_string(cu) + "}";
14 ATH_MSG_DEBUG("LOADING " + full_cu_name);
15 return full_cu_name;
16 }
17
18 void F600IntegrationAlg::outputHexData(size_t dataLen, uint64_t *data, const std::string& dataDescriptor) const {
19 ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
20 for (size_t i = 0; i < dataLen; i++) {
21 ATH_MSG_DEBUG(std::hex << std::setw(16) << std::setfill('0') << data[i]);
22 }
23 ATH_MSG_DEBUG("ENDING " << dataDescriptor << " words");
24 }
25
27 {
28 ATH_MSG_INFO("Running on the FPGA accelerator");
29
30 ATH_CHECK(m_chronoSvc.retrieve());
31
32 {
33 Athena::Chrono chrono("Platform and device initlize", m_chronoSvc.get());
35
36 ATH_MSG_INFO("Initializing xrt::device");
37 m_xrt_accelerator = xrt::opencl::get_xrt_device(m_accelerator.get());
38 ATH_MSG_INFO("xrt::device bdf: " + m_xrt_accelerator.get_info<xrt::info::device::bdf>() + " with name: " + m_xrt_accelerator.get_info<xrt::info::device::name>());
39 }
40
41 {
42 Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
43 ATH_MSG_INFO("LOADING PROGRAM: " + m_xclbin);
45 }
46
47 cl_int err = CL_SUCCESS;
48
49 xrt::uuid loaded_xclbin_uuid = m_xrt_accelerator.get_xclbin_uuid();
50 int cu = 1;
51
52 // Pixel clustering
53 m_pixelClusterL2GKernels.push_back(cl::Kernel(m_program, get_cu_name(m_pixelClusteringKernelName, cu).c_str(), &err));
54
55 // Slicing Engine
56 for (int i = 0; i < 1; ++i) {
57 m_pixelFirstStageSlicingIPs.push_back(xrt::ip(m_xrt_accelerator, loaded_xclbin_uuid, get_cu_name(m_pixelFirstStageSlicingIPName, cu + i)));
58 m_pixelFirstStageSlicingInputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_pixelFirstStageInputKernelName, cu + i).c_str(), &err));
59 m_pixelFirstStageSlicingOutputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_pixelFirstStageOutputKernelName, cu + i).c_str(), &err));
60 }
61
62 m_insideOutInputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_memReadKernelName, cu).c_str(), &err));
63 m_insideOutOutputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_memWriteKernelName, cu).c_str(), &err));
64
65 m_queue = cl::CommandQueue(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE , &err);
66
67 if (err != CL_SUCCESS) {
68 return StatusCode::FAILURE;
69 }
70
71 ATH_CHECK(m_xaodClusterMaker.retrieve());
72 ATH_CHECK(m_testVectorTool.retrieve());
74 ATH_CHECK(m_FPGATrackKey.initialize());
76 return StatusCode::SUCCESS;
77 }
78
79 int F600IntegrationAlg::readTVLength(std::string filepath) const {
80 int n_words;
81 int size = 0;
82 ATH_MSG_DEBUG("Reading len of: " << filepath);
83 size = std::filesystem::file_size(filepath);
84 n_words = size / (16 + 1);
85 ATH_MSG_DEBUG("Num TV words (64b): " << n_words);
86 return n_words;
87 }
88
89 void F600IntegrationAlg::readTV(const std::string& filepath, int n_words, uint64_t *data) const {
90 std::ifstream file_in;
91 std::string line_in = "";
92
93 std::fill(data, data + n_words, 0);
94
95 ATH_MSG_DEBUG("Reading data from file: " + filepath);
96 file_in.open(filepath);
97 if (file_in.is_open()) {
98 for(int i = 0; i < n_words; i++) {
99 std::getline (file_in, line_in);
100 data[i] = std::stoul(line_in.c_str(), NULL, 16);
101 }
102 } else {
103 ATH_MSG_INFO("ERROR! Unable to open data file: " + filepath);
104 }
105 ATH_MSG_DEBUG("Closing " << filepath);
106 file_in.close();
107 }
108
109 StatusCode F600IntegrationAlg::execute(const EventContext &ctx) const
110 {
111 ATH_MSG_DEBUG("Executing F600IntegrationAlg");
112
113 int n_pixel_words = readTVLength(m_pixelClusterInputPath);
114 int n_pixel_ghitz_words = readTVLength(m_pixelStageOneSlicingInputPath);
115 int n_pixel_first_sliced_words = readTVLength(m_insideOutInputPath);
116 int n_inside_out_words = 4 * 1024;
117
118 ATH_MSG_DEBUG("Pixel Clustering TV words = " << n_pixel_words << " (64b)");
119 ATH_MSG_DEBUG("Pixel GHITZ TV words = " << n_pixel_ghitz_words << " (64b)");
120 ATH_MSG_DEBUG("Pixel Sliced TV words = " << n_pixel_first_sliced_words << " (64b)");
121
122 uint64_t pixel_cls_input_data[n_pixel_words];
123 uint64_t pixel_ghitz_data[n_pixel_ghitz_words];
124 uint64_t pixel_first_sliced_data[n_pixel_first_sliced_words];
125
126 readTV(m_pixelClusterInputPath, n_pixel_words, pixel_cls_input_data);
127 readTV(m_pixelStageOneSlicingInputPath, n_pixel_ghitz_words, pixel_ghitz_data);
128 readTV(m_insideOutInputPath, n_pixel_first_sliced_words, pixel_first_sliced_data);
129
130 std::unique_lock lock(m_fpgaHandleMtx);
131
132 cl_int err = 0;
133 size_t pixel_size_bytes = n_pixel_words * sizeof(uint64_t);
134 size_t pixel_ghitz_size_bytes = n_pixel_ghitz_words * sizeof(uint64_t);
135 size_t pixel_first_sliced_size_bytes = n_pixel_first_sliced_words * sizeof(uint64_t);
136 size_t inside_out_size_bytes = n_inside_out_words * sizeof(uint64_t);
137
138 ATH_MSG_DEBUG("Allocating pxl cls input buffer");
139 m_pixelClusterL2GInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_size_bytes, NULL, &err));
140 ATH_MSG_DEBUG("Allocating pxl cls out buffer");
141 m_pixelClusterL2GOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_ghitz_size_bytes, NULL, &err));
142 ATH_MSG_DEBUG("Allocating pxl cls EDM out buffer");
143 m_pixelClusterL2GEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_ghitz_size_bytes, NULL, &err));
144 ATH_MSG_DEBUG("Allocating Input Buffer in Global Memory");
145 m_pixelFirstStageSlicingInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_ghitz_size_bytes, NULL, &err));
146 ATH_MSG_DEBUG("Allocating Output Buffer in Global Memory");
147 m_pixelFirstStageSlicingOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_first_sliced_size_bytes, NULL, &err));
148 ATH_MSG_DEBUG("Allocating Inside PR Input Buffer in Global Memory");
149 m_insideOutInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_first_sliced_size_bytes, NULL, &err));
150 ATH_MSG_DEBUG("Allocating Inside PR Output Buffer in Global Memory");
151 m_insideOutOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, inside_out_size_bytes, NULL, &err));
152
153 // Create Input/Output Kernel runs and set arguments
154 ATH_MSG_DEBUG("Setting buffer for pxl cls input.");
155 m_pixelClusterL2GKernels[0].setArg(0, m_pixelClusterL2GInputBufferList[0]);
156 ATH_MSG_DEBUG("Setting buffer for pxl cls output.");
157 m_pixelClusterL2GKernels[0].setArg(1, m_pixelClusterL2GOutputBufferList[0]);
158 ATH_MSG_DEBUG("Setting buffer for pxl cls EDM output.");
159 m_pixelClusterL2GKernels[0].setArg(2, m_pixelClusterL2GEDMOutputBufferList[0]);
160
161 ATH_MSG_DEBUG("Setting buffer for Input kernel run.");
162 m_pixelFirstStageSlicingInputKernels[0].setArg(0, m_pixelFirstStageSlicingInputBufferList[0]);
163 ATH_MSG_DEBUG("Setting # words for Input kernel run.");
164 m_pixelFirstStageSlicingInputKernels[0].setArg(1, n_pixel_ghitz_words);
165
166 ATH_MSG_DEBUG("Setting buffer for Output kernel run.");
167 m_pixelFirstStageSlicingOutputKernels[0].setArg(0, m_pixelFirstStageSlicingOutputBufferList[0]);
168 ATH_MSG_DEBUG("Setting # words for Output kernel run.");
169 m_pixelFirstStageSlicingOutputKernels[0].setArg(1, n_pixel_first_sliced_words);
170
171 ATH_MSG_DEBUG("Setting buffer for Input kernel run INSIDE OUT.");
172 m_insideOutInputKernels[0].setArg(0, m_insideOutInputBufferList[0]);
173
174 ATH_MSG_DEBUG("Setting buffer for Output kernel run INSIDE OUT.");
175 m_insideOutOutputKernels[0].setArg(0, m_insideOutOutputBufferList[0]);
176
177 // Pixel Clustering
178 // Write
179 ATH_MSG_DEBUG("Loading input data to pixel clustering kernel...");
180 m_queue.enqueueWriteBuffer(m_pixelClusterL2GInputBufferList[0], CL_TRUE, 0, pixel_size_bytes, &pixel_cls_input_data, NULL, NULL);
181
182 // Verify write
183 outputHexData(n_pixel_words, pixel_cls_input_data, "Pixel Clustering Input");
184
185 // Execute
186 ATH_MSG_DEBUG("Executing pixel clustering");
187 m_queue.enqueueTask(m_pixelClusterL2GKernels[0]);
188 m_queue.finish();
189
190 // Read
191 ATH_MSG_DEBUG("Reading pixel clustering output");
192 uint64_t cls_out_data[n_pixel_ghitz_words];
193 m_queue.enqueueReadBuffer(m_pixelClusterL2GOutputBufferList[0], CL_TRUE, 0, pixel_ghitz_size_bytes, &cls_out_data);
194
195 outputHexData(n_pixel_ghitz_words, cls_out_data, "Real Pixel Clustering Output");
196
197 // Read
198 ATH_MSG_DEBUG("Reading pixel clustering EDM output");
199 uint64_t cls_out_edm_data[n_pixel_ghitz_words];
200 m_queue.enqueueReadBuffer(m_pixelClusterL2GEDMOutputBufferList[0], CL_TRUE, 0, pixel_ghitz_size_bytes, &cls_out_edm_data);
201
202 outputHexData(n_pixel_ghitz_words, cls_out_edm_data, "Real Pixel Clustering EDM Output");
203
204
205 ATH_MSG_DEBUG("Loading input data to Slicing Engine...");
206 m_queue.enqueueWriteBuffer(m_pixelFirstStageSlicingInputBufferList[0], CL_TRUE, 0, pixel_ghitz_size_bytes, &pixel_ghitz_data, NULL, NULL);
207
208 outputHexData(n_pixel_ghitz_words, pixel_ghitz_data, "Ideal Pixel Clustering Output");
209
210 // Reset slicing engine event counters
211 m_pixelFirstStageSlicingIPs[0].write_register(USER_CTRL_OFFSET, EVENT_COUNT_RST);
212
213 ATH_MSG_DEBUG("------------------------------------------------------------------------------------------------");
214
215 // Start Input/Output Kernel runs
216 ATH_MSG_DEBUG("Starting Input kernel run.");
217 m_queue.enqueueTask(m_pixelFirstStageSlicingInputKernels[0]);
218 ATH_MSG_DEBUG("Starting Output kernel run.");
219 m_queue.enqueueTask(m_pixelFirstStageSlicingOutputKernels[0]);
220 m_queue.finish();
221
222 // Get the output;
223 ATH_MSG_DEBUG("Synchronize output buffer data from device global memory");
224 uint64_t se_out_data[n_pixel_first_sliced_words];
225 m_queue.enqueueReadBuffer(m_pixelFirstStageSlicingOutputBufferList[0], CL_TRUE, 0, pixel_first_sliced_size_bytes, &se_out_data);
226
227 outputHexData(n_pixel_first_sliced_words, se_out_data, "Slicing Engine Output");
228
229 ATH_MSG_DEBUG("Write SE output to inside out input (cross DDR banks)");
230 m_queue.enqueueWriteBuffer(m_insideOutInputBufferList[0], CL_TRUE, 0, pixel_first_sliced_size_bytes, &pixel_first_sliced_data, NULL, NULL);
231
232 ATH_MSG_DEBUG("Starting Inside Out Input kernel run.");
233 m_queue.enqueueTask(m_insideOutInputKernels[0]);
234 m_queue.finish();
235 ATH_MSG_DEBUG("Starting Inside Out Output kernel run.");
236 m_queue.enqueueTask(m_insideOutOutputKernels[0]);
237 m_queue.finish();
238
239
240 ATH_MSG_DEBUG("Slicing Engine System Done");
241 ATH_MSG_DEBUG("------------------------------------------------------------------------------------------------");
242
243
244 return StatusCode::SUCCESS;
245 }
246
248 {
249 ATH_MSG_INFO("Average Kernel execution time: " << m_sum_kernelTime /m_num_Events /1e6 << " ms");
250 return StatusCode::SUCCESS;
251 }
252}
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_INFO(x)
#define ATH_MSG_DEBUG(x)
Exception-safe IChronoSvc caller.
#define USER_CTRL_OFFSET
#define EVENT_COUNT_RST
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
Exception-safe IChronoSvc caller.
Definition Chrono.h:50
Gaudi::Property< std::string > m_pixelFirstStageInputKernelName
std::atomic< cl_ulong > m_sum_kernelTime
Sum for the average time of the kernel execution.
void outputHexData(size_t dataLen, uint64_t *data, const std::string &dataDescriptor) const
Gaudi::Property< std::string > m_pixelStageOneSlicingInputPath
Gaudi::Property< std::string > m_pixelClusteringKernelName
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
Gaudi::Property< std::string > m_insideOutInputPath
std::string get_cu_name(const std::string &kernel_name, int cu)
ToolHandle< xAODClusterMaker > m_xaodClusterMaker
Tool for creating xAOD containers.
Gaudi::Property< std::string > m_pixelFirstStageOutputKernelName
Gaudi::Property< std::string > m_memReadKernelName
virtual StatusCode finalize() override final
std::atomic< ulonglong > m_num_Events
Number of events for the average time of the kernel execution.
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
ToolHandle< TestVectorTool > m_testVectorTool
Tool for preparing test vectors.
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
Gaudi::Property< std::string > m_memWriteKernelName
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
Tool for formatting FPGA data.
Gaudi::Property< std::string > m_pixelClusterInputPath
Gaudi::Property< std::string > m_pixelFirstStageSlicingIPName
ToolHandle< OutputConversionTool > m_outputConversionTool
SG::ReadHandleKey< FPGATrackSimTrackCollection > m_FPGATrackKey
void readTV(const std::string &filepath, int n_words, uint64_t *data) const
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
cl::Program m_program
Program object containing the kernel.
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
cl::Context m_context
Context object for the application.
cl::Device m_accelerator
Device object for the accelerator card.
The class for enconding RDO to FPGA format.