ATLAS Offline Software
VectorAddOCLExampleAlg.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Gaudi includes
6 #include "GaudiKernel/ConcurrencyFlags.h"
7 
8 // Local include(s).
10 
11 namespace AthExXRT {
12 
14 
15  ATH_MSG_INFO("initialize_global()");
16 
17  ATH_CHECK(m_DeviceMgmtSvc.retrieve());
18 
19  // Retrieve the list of OpencCL Handle(s) providing the kernel.
20  m_handles = m_DeviceMgmtSvc->get_opencl_handles_by_kernel_name(s_krnl_name);
21 
22  if (m_handles.empty()) {
23  ATH_MSG_ERROR("No OpenCL context provides kernel '" << s_krnl_name << "'");
24  return StatusCode::FAILURE;
25  }
26 
27  return StatusCode::SUCCESS;
28 }
29 
31 
32  ATH_MSG_INFO("initialize_worker()");
33 
34  cl_int err = CL_SUCCESS;
35 
36  // Allocate slot specific resources.
37  std::size_t slotIdx = 0;
38  for (SlotData& slot : m_slots) {
39  ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
40 
41  if (m_handles.size() > 1) {
42  ATH_MSG_WARNING("More than one OpenCL context provides a '"
43  << s_krnl_name << "' kernel (" << m_handles.size()
44  << "), using the first one");
45  }
46  slot.m_context = m_handles[0].context;
47  slot.m_program = m_handles[0].program;
48 
49  // Create kernel objects.
50  slot.m_kernel =
51  std::make_unique<cl::Kernel>(*slot.m_program, s_krnl_name, &err);
52  if (err != 0) {
54  "Could not create OpenCL kernel '"
55  << s_krnl_name
56  << "', check that correct XCLBIN is programmed by AthXRT service");
57  return StatusCode::FAILURE;
58  }
59 
60  // Create command queue.
61  slot.m_queue = std::make_unique<cl::CommandQueue>(
62  *slot.m_context, slot.m_context->getInfo<CL_CONTEXT_DEVICES>()[0], 0,
63  &err);
64  ATH_CHECK(err == CL_SUCCESS);
65 
66  const std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
67 
68  // Create buffer objects.
69  // This create aligned buffer object both on host and device.
70  slot.m_dev_buf_in1 = std::make_unique<cl::Buffer>(
71  *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
72  size_in_bytes, nullptr, &err);
73  ATH_CHECK(err == CL_SUCCESS);
74 
75  slot.m_dev_buf_in2 = std::make_unique<cl::Buffer>(
76  *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
77  size_in_bytes, nullptr, &err);
78  ATH_CHECK(err == CL_SUCCESS);
79 
80  slot.m_dev_buf_out = std::make_unique<cl::Buffer>(
81  *slot.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
82  size_in_bytes, nullptr, &err);
83  ATH_CHECK(err == CL_SUCCESS);
84 
85  slot.m_host_buf_in1 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
86  *slot.m_dev_buf_in1, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
87  nullptr, &err);
88  ATH_CHECK(err == CL_SUCCESS);
89 
90  slot.m_host_buf_in2 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
91  *slot.m_dev_buf_in2, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
92  nullptr, &err);
93  ATH_CHECK(err == CL_SUCCESS);
94 
95  slot.m_host_buf_out = (uint32_t*)slot.m_queue->enqueueMapBuffer(
96  *slot.m_dev_buf_out, CL_TRUE, CL_MAP_READ, 0, size_in_bytes, nullptr,
97  nullptr, &err);
98  ATH_CHECK(err == CL_SUCCESS);
99 
100  // Set kernel arguments.
101  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in1, *slot.m_dev_buf_in1) ==
102  CL_SUCCESS);
103  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in2, *slot.m_dev_buf_in2) ==
104  CL_SUCCESS);
105  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_out, *slot.m_dev_buf_out) ==
106  CL_SUCCESS);
107  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_size, s_element_count) ==
108  CL_SUCCESS);
109 
110  ++slotIdx;
111  }
112 
113  return StatusCode::SUCCESS;
114 }
115 
117 
118  ATH_MSG_INFO("stop_worker(): Cleaning OCL environment");
119  // Unmap buffer objects.
120  for (SlotData& slot : m_slots) {
121  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
122  *slot.m_dev_buf_in1, slot.m_host_buf_in1) == CL_SUCCESS);
123  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
124  *slot.m_dev_buf_in2, slot.m_host_buf_in2) == CL_SUCCESS);
125  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
126  *slot.m_dev_buf_out, slot.m_host_buf_out) == CL_SUCCESS);
127  ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
128  }
129 
130  return StatusCode::SUCCESS;
131 }
132 
133 StatusCode VectorAddOCLExampleAlg::execute(const EventContext& ctx) const {
134 
135  // Get the slot (thread) specific data.
136  const SlotData& slot = *m_slots.get(ctx);
137 
138  // Initialize the buffers with random data.
139  for (std::size_t i = 0; i < s_element_count; ++i) {
140  slot.m_host_buf_in1[i] = rand() % s_element_count;
141  slot.m_host_buf_in2[i] = rand() % s_element_count;
142  }
143 
144  ATH_MSG_DEBUG("Transfer data buffer to device");
145  std::vector<cl::Memory> mems_vector = {*slot.m_dev_buf_in1,
146  *slot.m_dev_buf_in2};
147  ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(mems_vector, 0, nullptr,
148  nullptr) == CL_SUCCESS);
149 
150  // Schedule the kernel.
151  ATH_MSG_DEBUG("Running kernel");
152  ATH_CHECK(slot.m_queue->enqueueTask(*slot.m_kernel, nullptr, nullptr) ==
153  CL_SUCCESS);
154 
155  // Migrate data back to host.
156  ATH_MSG_DEBUG("Transfer data back to host");
157  std::vector<cl::Memory> mems_out_vector = {*slot.m_dev_buf_out};
158  ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(
159  mems_out_vector, CL_MIGRATE_MEM_OBJECT_HOST, nullptr,
160  nullptr) == CL_SUCCESS);
161  ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
162 
163  // Check that kernel results are correct.
164  bool correct = true;
165  for (std::size_t i = 0; i < s_element_count; ++i) {
166  uint32_t cpu_result = slot.m_host_buf_in1[i] + slot.m_host_buf_in2[i];
167  if (slot.m_host_buf_out[i] != cpu_result) {
168  ATH_MSG_ERROR("Error: Result mismatch: i = "
169  << i << ": CPU result = " << cpu_result
170  << " Device result = " << slot.m_host_buf_out[i]);
171  correct = false;
172  break;
173  }
174  }
175  if (correct) {
176  ATH_MSG_INFO("OpenCL vector addition test PASSED!");
177  } else {
178  ATH_MSG_ERROR("OpenCL vector addition test FAILED!");
179  return StatusCode::FAILURE;
180  }
181 
182  return StatusCode::SUCCESS;
183 }
184 
185 } // namespace AthExXRT
AthExXRT::VectorAddOCLExampleAlg::s_krnl_param_out
static constexpr int s_krnl_param_out
Definition: VectorAddOCLExampleAlg.h:63
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_host_buf_out
uint32_t * m_host_buf_out
Definition: VectorAddOCLExampleAlg.h:92
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_dev_buf_out
std::unique_ptr< cl::Buffer > m_dev_buf_out
Definition: VectorAddOCLExampleAlg.h:88
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
AthExXRT::VectorAddOCLExampleAlg::m_slots
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
Definition: VectorAddOCLExampleAlg.h:96
AthExXRT
Definition: VectorAddOCLExampleAlg.cxx:11
AthExXRT::VectorAddOCLExampleAlg::s_krnl_name
static constexpr char s_krnl_name[]
Definition: VectorAddOCLExampleAlg.h:57
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_queue
std::unique_ptr< cl::CommandQueue > m_queue
Kernel run object.
Definition: VectorAddOCLExampleAlg.h:83
AthExXRT::VectorAddOCLExampleAlg::s_krnl_param_in1
static constexpr int s_krnl_param_in1
Definition: VectorAddOCLExampleAlg.h:61
AthExXRT::VectorAddOCLExampleAlg::m_DeviceMgmtSvc
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
Definition: VectorAddOCLExampleAlg.h:52
AthExXRT::VectorAddOCLExampleAlg::stop_worker
virtual StatusCode stop_worker() override
clean up
Definition: VectorAddOCLExampleAlg.cxx:116
AthExXRT::VectorAddOCLExampleAlg::SlotData
Slot-specific state.
Definition: VectorAddOCLExampleAlg.h:72
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_dev_buf_in1
std::unique_ptr< cl::Buffer > m_dev_buf_in1
Buffer objects.
Definition: VectorAddOCLExampleAlg.h:86
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_host_buf_in1
uint32_t * m_host_buf_in1
Definition: VectorAddOCLExampleAlg.h:90
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
LArG4FSStartPointFilter.rand
rand
Definition: LArG4FSStartPointFilter.py:80
dqt_zlumi_pandas.err
err
Definition: dqt_zlumi_pandas.py:183
lumiFormat.i
int i
Definition: lumiFormat.py:85
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
AthExXRT::VectorAddOCLExampleAlg::s_krnl_param_size
static constexpr int s_krnl_param_size
Definition: VectorAddOCLExampleAlg.h:64
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_kernel
std::unique_ptr< cl::Kernel > m_kernel
Kernel object.
Definition: VectorAddOCLExampleAlg.h:80
AthExXRT::VectorAddOCLExampleAlg::m_handles
std::vector< AthXRT::IDeviceMgmtSvc::OpenCLHandle > m_handles
Definition: VectorAddOCLExampleAlg.h:69
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_dev_buf_in2
std::unique_ptr< cl::Buffer > m_dev_buf_in2
Definition: VectorAddOCLExampleAlg.h:87
AthExXRT::VectorAddOCLExampleAlg::SlotData::m_host_buf_in2
uint32_t * m_host_buf_in2
Definition: VectorAddOCLExampleAlg.h:91
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
AthExXRT::VectorAddOCLExampleAlg::s_krnl_param_in2
static constexpr int s_krnl_param_in2
Definition: VectorAddOCLExampleAlg.h:62
AthExXRT::VectorAddOCLExampleAlg::initialize_worker
virtual StatusCode initialize_worker() override
Initialization per process.
Definition: VectorAddOCLExampleAlg.cxx:30
AthExXRT::VectorAddOCLExampleAlg::s_element_count
static constexpr int s_element_count
Definition: VectorAddOCLExampleAlg.h:67
VectorAddOCLExampleAlg.h
AthExXRT::VectorAddOCLExampleAlg::initialize_global
virtual StatusCode initialize_global() override
Glocal XRT initialization.
Definition: VectorAddOCLExampleAlg.cxx:13
AthExXRT::VectorAddOCLExampleAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override
Definition: VectorAddOCLExampleAlg.cxx:133