ATLAS Offline Software
Loading...
Searching...
No Matches
VectorMultOCLExampleAlg.cxx
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3//
4
5// Local include(s).
7
8namespace AthExXRT {
9
11
12 ATH_MSG_INFO("initialize_global()");
13
14 ATH_CHECK(m_DeviceMgmtSvc.retrieve());
15
16 // Retrieve the list of OpencCL Handle(s) providing the kernel.
17 m_handles = m_DeviceMgmtSvc->get_opencl_handles_by_kernel_name(s_krnl_name);
18
19 if (m_handles.empty()) {
20 ATH_MSG_ERROR("No OpenCL context provides kernel '" << s_krnl_name << "'");
21 return StatusCode::FAILURE;
22 }
23
24 return StatusCode::SUCCESS;
25}
26
28
29 ATH_MSG_INFO("initialize_worker()");
30
31 cl_int err = CL_SUCCESS;
32
33 // Allocate slot specific resources.
34 std::size_t slotIdx = 0;
35 for (SlotData& slot : m_slots) {
36 ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
37
38 if (m_handles.size() > 1) {
39 ATH_MSG_WARNING("More than one OpenCL context provides a '"
40 << s_krnl_name << "' kernel (" << m_handles.size()
41 << "), using the first one");
42 }
43 slot.m_context = m_handles[0].context;
44 slot.m_program = m_handles[0].program;
45
46 // Create kernel objects.
47 slot.m_kernel =
48 std::make_unique<cl::Kernel>(*slot.m_program, s_krnl_name, &err);
49 if (err != 0) {
51 "Could not create OpenCL kernel '"
53 << "', check that correct XCLBIN is programmed by AthXRT service");
54 return StatusCode::FAILURE;
55 }
56
57 // Create command queue.
58 slot.m_queue = std::make_unique<cl::CommandQueue>(
59 *slot.m_context, slot.m_context->getInfo<CL_CONTEXT_DEVICES>()[0], 0,
60 &err);
61 ATH_CHECK(err == CL_SUCCESS);
62
63 const std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
64
65 // Create buffer objects.
66 // This create aligned buffer object both on host and device.
67 slot.m_dev_buf_in1 = std::make_unique<cl::Buffer>(
68 *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
69 size_in_bytes, nullptr, &err);
70 ATH_CHECK(err == CL_SUCCESS);
71
72 slot.m_dev_buf_in2 = std::make_unique<cl::Buffer>(
73 *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
74 size_in_bytes, nullptr, &err);
75 ATH_CHECK(err == CL_SUCCESS);
76
77 slot.m_dev_buf_out = std::make_unique<cl::Buffer>(
78 *slot.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
79 size_in_bytes, nullptr, &err);
80 ATH_CHECK(err == CL_SUCCESS);
81
82 slot.m_host_buf_in1 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
83 *slot.m_dev_buf_in1, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
84 nullptr, &err);
85 ATH_CHECK(err == CL_SUCCESS);
86
87 slot.m_host_buf_in2 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
88 *slot.m_dev_buf_in2, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
89 nullptr, &err);
90 ATH_CHECK(err == CL_SUCCESS);
91
92 slot.m_host_buf_out = (uint32_t*)slot.m_queue->enqueueMapBuffer(
93 *slot.m_dev_buf_out, CL_TRUE, CL_MAP_READ, 0, size_in_bytes, nullptr,
94 nullptr, &err);
95 ATH_CHECK(err == CL_SUCCESS);
96
97 // Set kernel arguments.
98 ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in1, *slot.m_dev_buf_in1) ==
99 CL_SUCCESS);
100 ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in2, *slot.m_dev_buf_in2) ==
101 CL_SUCCESS);
102 ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_out, *slot.m_dev_buf_out) ==
103 CL_SUCCESS);
104 ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_size, s_element_count) ==
105 CL_SUCCESS);
106
107 ++slotIdx;
108 }
109
110 return StatusCode::SUCCESS;
111}
112
114
115 ATH_MSG_INFO("stop_worker(): Cleaning OCL environment");
116 // Unmap buffer objects.
117 for (SlotData& slot : m_slots) {
118 ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
119 *slot.m_dev_buf_in1, slot.m_host_buf_in1) == CL_SUCCESS);
120 ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
121 *slot.m_dev_buf_in2, slot.m_host_buf_in2) == CL_SUCCESS);
122 ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
123 *slot.m_dev_buf_out, slot.m_host_buf_out) == CL_SUCCESS);
124 ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
125 }
126
127 return StatusCode::SUCCESS;
128}
129
130StatusCode VectorMultOCLExampleAlg::execute(const EventContext& ctx) const {
131
132 // Get the slot (thread) specific data.
133 const SlotData& slot = *m_slots.get(ctx);
134
135 // Initialize the buffers with random data.
136 for (std::size_t i = 0; i < s_element_count; ++i) {
137 slot.m_host_buf_in1[i] = rand() % s_element_count;
138 slot.m_host_buf_in2[i] = rand() % s_element_count;
139 }
140
141 ATH_MSG_DEBUG("Transfer data buffer to device");
142 std::vector<cl::Memory> mems_vector = {*slot.m_dev_buf_in1,
143 *slot.m_dev_buf_in2};
144 ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(mems_vector, 0, nullptr,
145 nullptr) == CL_SUCCESS);
146
147 // Schedule the kernel.
148 ATH_MSG_DEBUG("Running kernel");
149 ATH_CHECK(slot.m_queue->enqueueTask(*slot.m_kernel, nullptr, nullptr) ==
150 CL_SUCCESS);
151
152 // Migrate data back to host.
153 ATH_MSG_DEBUG("Transfer data back to host");
154 std::vector<cl::Memory> mems_out_vector = {*slot.m_dev_buf_out};
155 ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(
156 mems_out_vector, CL_MIGRATE_MEM_OBJECT_HOST, nullptr,
157 nullptr) == CL_SUCCESS);
158 ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
159
160 // Check that kernel results are correct.
161 bool correct = true;
162 for (std::size_t i = 0; i < s_element_count; ++i) {
163 uint32_t cpu_result = slot.m_host_buf_in1[i] * slot.m_host_buf_in2[i];
164 if (slot.m_host_buf_out[i] != cpu_result) {
165 ATH_MSG_ERROR("Error: Result mismatch: i = "
166 << i << ": CPU result = " << cpu_result
167 << " Device result = " << slot.m_host_buf_out[i]);
168 correct = false;
169 break;
170 }
171 }
172 if (correct) {
173 ATH_MSG_INFO("OpenCL vector multiplication test PASSED!");
174 } else {
175 ATH_MSG_ERROR("OpenCL vector multiplication test FAILED!");
176 return StatusCode::FAILURE;
177 }
178
179 return StatusCode::SUCCESS;
180}
181
182} // namespace AthExXRT
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_WARNING(x)
#define ATH_MSG_DEBUG(x)
virtual StatusCode stop_worker() override
clean up
virtual StatusCode initialize_global() override
Glocal XRT initialization.
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
std::vector< AthXRT::IDeviceMgmtSvc::OpenCLHandle > m_handles
virtual StatusCode initialize_worker() override
Initialization per process.
virtual StatusCode execute(const EventContext &ctx) const override
Function executing the algorithm.
std::unique_ptr< cl::Buffer > m_dev_buf_in1
Buffer objects.
std::unique_ptr< cl::CommandQueue > m_queue
Kernel run object.
std::unique_ptr< cl::Kernel > m_kernel
Kernel object.