ATLAS Offline Software
Loading...
Searching...
No Matches
F600IntegrationAlg.cxx
Go to the documentation of this file.
1/*
2Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3*/
4
8
10{
11 std::string F600IntegrationAlg::get_cu_name(const std::string& kernel_name, int cu) {
12 std::string full_cu_name = kernel_name + ":{" + kernel_name + "_" + std::to_string(cu) + "}";
13 ATH_MSG_DEBUG("LOADING " + full_cu_name);
14 return full_cu_name;
15 }
16
17 void F600IntegrationAlg::outputHexData(size_t dataLen, uint64_t *data, const std::string& dataDescriptor) const {
18 ATH_MSG_DEBUG("STARTING " << dataDescriptor << " words:");
19 for (size_t i = 0; i < dataLen; i++) {
20 ATH_MSG_DEBUG(std::hex << std::setw(16) << std::setfill('0') << data[i]);
21 }
22 ATH_MSG_DEBUG("ENDING " << dataDescriptor << " words");
23 }
24
26 {
27 ATH_MSG_INFO("Running on the FPGA accelerator");
28
29 ATH_CHECK(m_chronoSvc.retrieve());
30
31 {
32 Athena::Chrono chrono("Platform and device initlize", m_chronoSvc.get());
34
35 ATH_MSG_INFO("Initializing xrt::device");
36 m_xrt_accelerator = xrt::opencl::get_xrt_device(m_accelerator.get());
37 ATH_MSG_INFO("xrt::device bdf: " + m_xrt_accelerator.get_info<xrt::info::device::bdf>() + " with name: " + m_xrt_accelerator.get_info<xrt::info::device::name>());
38 }
39
40 {
41 Athena::Chrono chrono("CL::loadProgram", m_chronoSvc.get());
42 ATH_MSG_INFO("LOADING PROGRAM: " + m_xclbin);
44 }
45
46 cl_int err = CL_SUCCESS;
47
48 xrt::uuid loaded_xclbin_uuid = m_xrt_accelerator.get_xclbin_uuid();
49 int cu = 1;
50
51 // Pixel clustering
52 m_pixelClusterL2GKernels.push_back(cl::Kernel(m_program, get_cu_name(m_pixelClusteringKernelName, cu).c_str(), &err));
53
54 // Slicing Engine
55 for (int i = 0; i < 1; ++i) {
56 m_pixelFirstStageSlicingIPs.push_back(xrt::ip(m_xrt_accelerator, loaded_xclbin_uuid, get_cu_name(m_pixelFirstStageSlicingIPName, cu + i)));
57 m_pixelFirstStageSlicingInputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_pixelFirstStageInputKernelName, cu + i).c_str(), &err));
58 m_pixelFirstStageSlicingOutputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_pixelFirstStageOutputKernelName, cu + i).c_str(), &err));
59 }
60
61 m_insideOutInputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_memReadKernelName, cu).c_str(), &err));
62 m_insideOutOutputKernels.push_back(cl::Kernel(m_program, get_cu_name(m_memWriteKernelName, cu).c_str(), &err));
63
64 m_queue = cl::CommandQueue(m_context, m_accelerator, CL_QUEUE_PROFILING_ENABLE , &err);
65
66 if (err != CL_SUCCESS) {
67 return StatusCode::FAILURE;
68 }
69
70 ATH_CHECK(m_xaodClusterMaker.retrieve());
71 ATH_CHECK(m_testVectorTool.retrieve());
73 ATH_CHECK(m_FPGATrackKey.initialize());
75 return StatusCode::SUCCESS;
76 }
77
78 int F600IntegrationAlg::readTVLength(std::string filepath) const {
79 int n_words;
80 int size = 0;
81 ATH_MSG_DEBUG("Reading len of: " << filepath);
82 size = std::filesystem::file_size(filepath);
83 n_words = size / (16 + 1);
84 ATH_MSG_DEBUG("Num TV words (64b): " << n_words);
85 return n_words;
86 }
87
88 void F600IntegrationAlg::readTV(const std::string& filepath, int n_words, uint64_t *data) const {
89 std::ifstream file_in;
90 std::string line_in = "";
91
92 std::fill(data, data + n_words, 0);
93
94 ATH_MSG_DEBUG("Reading data from file: " + filepath);
95 file_in.open(filepath);
96 if (file_in.is_open()) {
97 for(int i = 0; i < n_words; i++) {
98 std::getline (file_in, line_in);
99 data[i] = std::stoul(line_in.c_str(), NULL, 16);
100 }
101 } else {
102 ATH_MSG_INFO("ERROR! Unable to open data file: " + filepath);
103 }
104 ATH_MSG_DEBUG("Closing " << filepath);
105 file_in.close();
106 }
107
108 StatusCode F600IntegrationAlg::execute(const EventContext &ctx) const
109 {
110 ATH_MSG_DEBUG("Executing F600IntegrationAlg");
111
112 int n_pixel_words = readTVLength(m_pixelClusterInputPath);
113 int n_pixel_ghitz_words = readTVLength(m_pixelStageOneSlicingInputPath);
114 int n_pixel_first_sliced_words = readTVLength(m_insideOutInputPath);
115 int n_inside_out_words = 4 * 1024;
116
117 ATH_MSG_DEBUG("Pixel Clustering TV words = " << n_pixel_words << " (64b)");
118 ATH_MSG_DEBUG("Pixel GHITZ TV words = " << n_pixel_ghitz_words << " (64b)");
119 ATH_MSG_DEBUG("Pixel Sliced TV words = " << n_pixel_first_sliced_words << " (64b)");
120
121 uint64_t pixel_cls_input_data[n_pixel_words];
122 uint64_t pixel_ghitz_data[n_pixel_ghitz_words];
123 uint64_t pixel_first_sliced_data[n_pixel_first_sliced_words];
124
125 readTV(m_pixelClusterInputPath, n_pixel_words, pixel_cls_input_data);
126 readTV(m_pixelStageOneSlicingInputPath, n_pixel_ghitz_words, pixel_ghitz_data);
127 readTV(m_insideOutInputPath, n_pixel_first_sliced_words, pixel_first_sliced_data);
128
129 std::unique_lock lock(m_fpgaHandleMtx);
130
131 cl_int err = 0;
132 size_t pixel_size_bytes = n_pixel_words * sizeof(uint64_t);
133 size_t pixel_ghitz_size_bytes = n_pixel_ghitz_words * sizeof(uint64_t);
134 size_t pixel_first_sliced_size_bytes = n_pixel_first_sliced_words * sizeof(uint64_t);
135 size_t inside_out_size_bytes = n_inside_out_words * sizeof(uint64_t);
136
137 ATH_MSG_DEBUG("Allocating pxl cls input buffer");
138 m_pixelClusterL2GInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_size_bytes, NULL, &err));
139 ATH_MSG_DEBUG("Allocating pxl cls out buffer");
140 m_pixelClusterL2GOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_ghitz_size_bytes, NULL, &err));
141 ATH_MSG_DEBUG("Allocating pxl cls EDM out buffer");
142 m_pixelClusterL2GEDMOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_ghitz_size_bytes, NULL, &err));
143 ATH_MSG_DEBUG("Allocating Input Buffer in Global Memory");
144 m_pixelFirstStageSlicingInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_ghitz_size_bytes, NULL, &err));
145 ATH_MSG_DEBUG("Allocating Output Buffer in Global Memory");
146 m_pixelFirstStageSlicingOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_first_sliced_size_bytes, NULL, &err));
147 ATH_MSG_DEBUG("Allocating Inside PR Input Buffer in Global Memory");
148 m_insideOutInputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, pixel_first_sliced_size_bytes, NULL, &err));
149 ATH_MSG_DEBUG("Allocating Inside PR Output Buffer in Global Memory");
150 m_insideOutOutputBufferList.push_back(cl::Buffer(m_context, CL_MEM_READ_WRITE, inside_out_size_bytes, NULL, &err));
151
152 // Create Input/Output Kernel runs and set arguments
153 ATH_MSG_DEBUG("Setting buffer for pxl cls input.");
154 m_pixelClusterL2GKernels[0].setArg(0, m_pixelClusterL2GInputBufferList[0]);
155 ATH_MSG_DEBUG("Setting buffer for pxl cls output.");
156 m_pixelClusterL2GKernels[0].setArg(1, m_pixelClusterL2GOutputBufferList[0]);
157 ATH_MSG_DEBUG("Setting buffer for pxl cls EDM output.");
158 m_pixelClusterL2GKernels[0].setArg(2, m_pixelClusterL2GEDMOutputBufferList[0]);
159
160 ATH_MSG_DEBUG("Setting buffer for Input kernel run.");
161 m_pixelFirstStageSlicingInputKernels[0].setArg(0, m_pixelFirstStageSlicingInputBufferList[0]);
162 ATH_MSG_DEBUG("Setting # words for Input kernel run.");
163 m_pixelFirstStageSlicingInputKernels[0].setArg(1, n_pixel_ghitz_words);
164
165 ATH_MSG_DEBUG("Setting buffer for Output kernel run.");
166 m_pixelFirstStageSlicingOutputKernels[0].setArg(0, m_pixelFirstStageSlicingOutputBufferList[0]);
167 ATH_MSG_DEBUG("Setting # words for Output kernel run.");
168 m_pixelFirstStageSlicingOutputKernels[0].setArg(1, n_pixel_first_sliced_words);
169
170 ATH_MSG_DEBUG("Setting buffer for Input kernel run INSIDE OUT.");
171 m_insideOutInputKernels[0].setArg(0, m_insideOutInputBufferList[0]);
172
173 ATH_MSG_DEBUG("Setting buffer for Output kernel run INSIDE OUT.");
174 m_insideOutOutputKernels[0].setArg(0, m_insideOutOutputBufferList[0]);
175
176 // Pixel Clustering
177 // Write
178 ATH_MSG_DEBUG("Loading input data to pixel clustering kernel...");
179 m_queue.enqueueWriteBuffer(m_pixelClusterL2GInputBufferList[0], CL_TRUE, 0, pixel_size_bytes, &pixel_cls_input_data, NULL, NULL);
180
181 // Verify write
182 outputHexData(n_pixel_words, pixel_cls_input_data, "Pixel Clustering Input");
183
184 // Execute
185 ATH_MSG_DEBUG("Executing pixel clustering");
186 m_queue.enqueueTask(m_pixelClusterL2GKernels[0]);
187 m_queue.finish();
188
189 // Read
190 ATH_MSG_DEBUG("Reading pixel clustering output");
191 uint64_t cls_out_data[n_pixel_ghitz_words];
192 m_queue.enqueueReadBuffer(m_pixelClusterL2GOutputBufferList[0], CL_TRUE, 0, pixel_ghitz_size_bytes, &cls_out_data);
193
194 outputHexData(n_pixel_ghitz_words, cls_out_data, "Real Pixel Clustering Output");
195
196 // Read
197 ATH_MSG_DEBUG("Reading pixel clustering EDM output");
198 uint64_t cls_out_edm_data[n_pixel_ghitz_words];
199 m_queue.enqueueReadBuffer(m_pixelClusterL2GEDMOutputBufferList[0], CL_TRUE, 0, pixel_ghitz_size_bytes, &cls_out_edm_data);
200
201 outputHexData(n_pixel_ghitz_words, cls_out_edm_data, "Real Pixel Clustering EDM Output");
202
203
204 ATH_MSG_DEBUG("Loading input data to Slicing Engine...");
205 m_queue.enqueueWriteBuffer(m_pixelFirstStageSlicingInputBufferList[0], CL_TRUE, 0, pixel_ghitz_size_bytes, &pixel_ghitz_data, NULL, NULL);
206
207 outputHexData(n_pixel_ghitz_words, pixel_ghitz_data, "Ideal Pixel Clustering Output");
208
209 // Reset slicing engine event counters
210 m_pixelFirstStageSlicingIPs[0].write_register(USER_CTRL_OFFSET, EVENT_COUNT_RST);
211
212 ATH_MSG_DEBUG("------------------------------------------------------------------------------------------------");
213
214 // Start Input/Output Kernel runs
215 ATH_MSG_DEBUG("Starting Input kernel run.");
216 m_queue.enqueueTask(m_pixelFirstStageSlicingInputKernels[0]);
217 ATH_MSG_DEBUG("Starting Output kernel run.");
218 m_queue.enqueueTask(m_pixelFirstStageSlicingOutputKernels[0]);
219 m_queue.finish();
220
221 // Get the output;
222 ATH_MSG_DEBUG("Synchronize output buffer data from device global memory");
223 uint64_t se_out_data[n_pixel_first_sliced_words];
224 m_queue.enqueueReadBuffer(m_pixelFirstStageSlicingOutputBufferList[0], CL_TRUE, 0, pixel_first_sliced_size_bytes, &se_out_data);
225
226 outputHexData(n_pixel_first_sliced_words, se_out_data, "Slicing Engine Output");
227
228 ATH_MSG_DEBUG("Write SE output to inside out input (cross DDR banks)");
229 m_queue.enqueueWriteBuffer(m_insideOutInputBufferList[0], CL_TRUE, 0, pixel_first_sliced_size_bytes, &pixel_first_sliced_data, NULL, NULL);
230
231 ATH_MSG_DEBUG("Starting Inside Out Input kernel run.");
232 m_queue.enqueueTask(m_insideOutInputKernels[0]);
233 m_queue.finish();
234 ATH_MSG_DEBUG("Starting Inside Out Output kernel run.");
235 m_queue.enqueueTask(m_insideOutOutputKernels[0]);
236 m_queue.finish();
237
238
239 ATH_MSG_DEBUG("Slicing Engine System Done");
240 ATH_MSG_DEBUG("------------------------------------------------------------------------------------------------");
241
242
243 return StatusCode::SUCCESS;
244 }
245
247 {
248 ATH_MSG_INFO("Average Kernel execution time: " << m_sum_kernelTime /m_num_Events /1e6 << " ms");
249 return StatusCode::SUCCESS;
250 }
251}
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_INFO(x)
#define ATH_MSG_DEBUG(x)
Exception-safe IChronoSvc caller.
#define USER_CTRL_OFFSET
#define EVENT_COUNT_RST
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
Exception-safe IChronoSvc caller.
Definition Chrono.h:50
Gaudi::Property< std::string > m_pixelFirstStageInputKernelName
std::atomic< cl_ulong > m_sum_kernelTime
Sum for the average time of the kernel execution.
void outputHexData(size_t dataLen, uint64_t *data, const std::string &dataDescriptor) const
Gaudi::Property< std::string > m_pixelStageOneSlicingInputPath
Gaudi::Property< std::string > m_pixelClusteringKernelName
virtual StatusCode initialize() override final
Detect the OpenCL devices and prepare OpenCL context.
Gaudi::Property< std::string > m_insideOutInputPath
std::string get_cu_name(const std::string &kernel_name, int cu)
ToolHandle< xAODClusterMaker > m_xaodClusterMaker
Tool for creating xAOD containers.
Gaudi::Property< std::string > m_pixelFirstStageOutputKernelName
Gaudi::Property< std::string > m_memReadKernelName
virtual StatusCode finalize() override final
std::atomic< ulonglong > m_num_Events
Number of events for the average time of the kernel execution.
virtual StatusCode execute(const EventContext &ctx) const override final
Should be overriden by derived classes to perform meaningful work.
ToolHandle< TestVectorTool > m_testVectorTool
Tool for preparing test vectors.
Gaudi::Property< std::string > m_xclbin
Path and name of the xclbin file.
Gaudi::Property< std::string > m_memWriteKernelName
ToolHandle< FPGADataFormatTool > m_FPGADataFormatTool
Tool for formatting FPGA data.
Gaudi::Property< std::string > m_pixelClusterInputPath
Gaudi::Property< std::string > m_pixelFirstStageSlicingIPName
ToolHandle< OutputConversionTool > m_outputConversionTool
SG::ReadHandleKey< FPGATrackSimTrackCollection > m_FPGATrackKey
void readTV(const std::string &filepath, int n_words, uint64_t *data) const
ServiceHandle< IChronoSvc > m_chronoSvc
Service for timing the algorithm.
StatusCode loadProgram(const std::string &xclbin)
Find the xclbin file and load it into the OpenCL program object.
cl::Program m_program
Program object containing the kernel.
virtual StatusCode initialize() override
Detect the OpenCL devices and prepare OpenCL context.
cl::Context m_context
Context object for the application.
cl::Device m_accelerator
Device object for the accelerator card.
The class for enconding RDO to FPGA format.