ATLAS Offline Software
VectorMultOCLExampleAlg.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Local include(s).
7 
8 namespace AthExXRT {
9 
11 
12  ATH_MSG_INFO("initialize_global()");
13 
14  ATH_CHECK(m_DeviceMgmtSvc.retrieve());
15 
16  // Retrieve the list of OpencCL Handle(s) providing the kernel.
17  m_handles = m_DeviceMgmtSvc->get_opencl_handles_by_kernel_name(s_krnl_name);
18 
19  if (m_handles.empty()) {
20  ATH_MSG_ERROR("No OpenCL context provides kernel '" << s_krnl_name << "'");
21  return StatusCode::FAILURE;
22  }
23 
24  return StatusCode::SUCCESS;
25 }
26 
28 
29  ATH_MSG_INFO("initialize_worker()");
30 
31  cl_int err = CL_SUCCESS;
32 
33  // Allocate slot specific resources.
34  std::size_t slotIdx = 0;
35  for (SlotData& slot : m_slots) {
36  ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
37 
38  if (m_handles.size() > 1) {
39  ATH_MSG_WARNING("More than one OpenCL context provides a '"
40  << s_krnl_name << "' kernel (" << m_handles.size()
41  << "), using the first one");
42  }
43  slot.m_context = m_handles[0].context;
44  slot.m_program = m_handles[0].program;
45 
46  // Create kernel objects.
47  slot.m_kernel =
48  std::make_unique<cl::Kernel>(*slot.m_program, s_krnl_name, &err);
49  if (err != 0) {
51  "Could not create OpenCL kernel '"
52  << s_krnl_name
53  << "', check that correct XCLBIN is programmed by AthXRT service");
54  return StatusCode::FAILURE;
55  }
56 
57  // Create command queue.
58  slot.m_queue = std::make_unique<cl::CommandQueue>(
59  *slot.m_context, slot.m_context->getInfo<CL_CONTEXT_DEVICES>()[0], 0,
60  &err);
61  ATH_CHECK(err == CL_SUCCESS);
62 
63  const std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
64 
65  // Create buffer objects.
66  // This create aligned buffer object both on host and device.
67  slot.m_dev_buf_in1 = std::make_unique<cl::Buffer>(
68  *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
69  size_in_bytes, nullptr, &err);
70  ATH_CHECK(err == CL_SUCCESS);
71 
72  slot.m_dev_buf_in2 = std::make_unique<cl::Buffer>(
73  *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
74  size_in_bytes, nullptr, &err);
75  ATH_CHECK(err == CL_SUCCESS);
76 
77  slot.m_dev_buf_out = std::make_unique<cl::Buffer>(
78  *slot.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
79  size_in_bytes, nullptr, &err);
80  ATH_CHECK(err == CL_SUCCESS);
81 
82  slot.m_host_buf_in1 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
83  *slot.m_dev_buf_in1, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
84  nullptr, &err);
85  ATH_CHECK(err == CL_SUCCESS);
86 
87  slot.m_host_buf_in2 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
88  *slot.m_dev_buf_in2, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
89  nullptr, &err);
90  ATH_CHECK(err == CL_SUCCESS);
91 
92  slot.m_host_buf_out = (uint32_t*)slot.m_queue->enqueueMapBuffer(
93  *slot.m_dev_buf_out, CL_TRUE, CL_MAP_READ, 0, size_in_bytes, nullptr,
94  nullptr, &err);
95  ATH_CHECK(err == CL_SUCCESS);
96 
97  // Set kernel arguments.
98  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in1, *slot.m_dev_buf_in1) ==
99  CL_SUCCESS);
100  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in2, *slot.m_dev_buf_in2) ==
101  CL_SUCCESS);
102  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_out, *slot.m_dev_buf_out) ==
103  CL_SUCCESS);
104  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_size, s_element_count) ==
105  CL_SUCCESS);
106 
107  ++slotIdx;
108  }
109 
110  return StatusCode::SUCCESS;
111 }
112 
114 
115  ATH_MSG_INFO("stop_worker(): Cleaning OCL environment");
116  // Unmap buffer objects.
117  for (SlotData& slot : m_slots) {
118  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
119  *slot.m_dev_buf_in1, slot.m_host_buf_in1) == CL_SUCCESS);
120  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
121  *slot.m_dev_buf_in2, slot.m_host_buf_in2) == CL_SUCCESS);
122  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
123  *slot.m_dev_buf_out, slot.m_host_buf_out) == CL_SUCCESS);
124  ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
125  }
126 
127  return StatusCode::SUCCESS;
128 }
129 
130 StatusCode VectorMultOCLExampleAlg::execute(const EventContext& ctx) const {
131 
132  // Get the slot (thread) specific data.
133  const SlotData& slot = *m_slots.get(ctx);
134 
135  // Initialize the buffers with random data.
136  for (std::size_t i = 0; i < s_element_count; ++i) {
137  slot.m_host_buf_in1[i] = rand() % s_element_count;
138  slot.m_host_buf_in2[i] = rand() % s_element_count;
139  }
140 
141  ATH_MSG_DEBUG("Transfer data buffer to device");
142  std::vector<cl::Memory> mems_vector = {*slot.m_dev_buf_in1,
143  *slot.m_dev_buf_in2};
144  ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(mems_vector, 0, nullptr,
145  nullptr) == CL_SUCCESS);
146 
147  // Schedule the kernel.
148  ATH_MSG_DEBUG("Running kernel");
149  ATH_CHECK(slot.m_queue->enqueueTask(*slot.m_kernel, nullptr, nullptr) ==
150  CL_SUCCESS);
151 
152  // Migrate data back to host.
153  ATH_MSG_DEBUG("Transfer data back to host");
154  std::vector<cl::Memory> mems_out_vector = {*slot.m_dev_buf_out};
155  ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(
156  mems_out_vector, CL_MIGRATE_MEM_OBJECT_HOST, nullptr,
157  nullptr) == CL_SUCCESS);
158  ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
159 
160  // Check that kernel results are correct.
161  bool correct = true;
162  for (std::size_t i = 0; i < s_element_count; ++i) {
163  uint32_t cpu_result = slot.m_host_buf_in1[i] * slot.m_host_buf_in2[i];
164  if (slot.m_host_buf_out[i] != cpu_result) {
165  ATH_MSG_ERROR("Error: Result mismatch: i = "
166  << i << ": CPU result = " << cpu_result
167  << " Device result = " << slot.m_host_buf_out[i]);
168  correct = false;
169  break;
170  }
171  }
172  if (correct) {
173  ATH_MSG_INFO("OpenCL vector multiplication test PASSED!");
174  } else {
175  ATH_MSG_ERROR("OpenCL vector multiplication test FAILED!");
176  return StatusCode::FAILURE;
177  }
178 
179  return StatusCode::SUCCESS;
180 }
181 
182 } // namespace AthExXRT
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
VectorMultOCLExampleAlg.h
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_out
static constexpr int s_krnl_param_out
Definition: VectorMultOCLExampleAlg.h:62
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_dev_buf_out
std::unique_ptr< cl::Buffer > m_dev_buf_out
Definition: VectorMultOCLExampleAlg.h:87
AthExXRT
Definition: VectorAddOCLExampleAlg.cxx:11
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_dev_buf_in1
std::unique_ptr< cl::Buffer > m_dev_buf_in1
Buffer objects.
Definition: VectorMultOCLExampleAlg.h:85
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_host_buf_in1
uint32_t * m_host_buf_in1
Definition: VectorMultOCLExampleAlg.h:89
AthExXRT::VectorMultOCLExampleAlg::stop_worker
virtual StatusCode stop_worker() override
clean up
Definition: VectorMultOCLExampleAlg.cxx:113
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
LArG4FSStartPointFilter.rand
rand
Definition: LArG4FSStartPointFilter.py:80
dqt_zlumi_pandas.err
err
Definition: dqt_zlumi_pandas.py:183
lumiFormat.i
int i
Definition: lumiFormat.py:85
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
AthExXRT::VectorMultOCLExampleAlg::SlotData
Slot-specific state.
Definition: VectorMultOCLExampleAlg.h:71
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_host_buf_out
uint32_t * m_host_buf_out
Definition: VectorMultOCLExampleAlg.h:91
AthExXRT::VectorMultOCLExampleAlg::s_element_count
static constexpr int s_element_count
Definition: VectorMultOCLExampleAlg.h:66
AthExXRT::VectorMultOCLExampleAlg::m_DeviceMgmtSvc
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
Definition: VectorMultOCLExampleAlg.h:51
AthExXRT::VectorMultOCLExampleAlg::m_slots
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
Definition: VectorMultOCLExampleAlg.h:95
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_host_buf_in2
uint32_t * m_host_buf_in2
Definition: VectorMultOCLExampleAlg.h:90
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_in1
static constexpr int s_krnl_param_in1
Definition: VectorMultOCLExampleAlg.h:60
AthExXRT::VectorMultOCLExampleAlg::m_handles
std::vector< AthXRT::IDeviceMgmtSvc::OpenCLHandle > m_handles
Definition: VectorMultOCLExampleAlg.h:68
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_size
static constexpr int s_krnl_param_size
Definition: VectorMultOCLExampleAlg.h:63
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_kernel
std::unique_ptr< cl::Kernel > m_kernel
Kernel object.
Definition: VectorMultOCLExampleAlg.h:79
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_dev_buf_in2
std::unique_ptr< cl::Buffer > m_dev_buf_in2
Definition: VectorMultOCLExampleAlg.h:86
AthExXRT::VectorMultOCLExampleAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override
Function executing the algorithm.
Definition: VectorMultOCLExampleAlg.cxx:130
AthExXRT::VectorMultOCLExampleAlg::initialize_worker
virtual StatusCode initialize_worker() override
Initialization per process.
Definition: VectorMultOCLExampleAlg.cxx:27
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_in2
static constexpr int s_krnl_param_in2
Definition: VectorMultOCLExampleAlg.h:61
AthExXRT::VectorMultOCLExampleAlg::initialize_global
virtual StatusCode initialize_global() override
Glocal XRT initialization.
Definition: VectorMultOCLExampleAlg.cxx:10
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_queue
std::unique_ptr< cl::CommandQueue > m_queue
Kernel run object.
Definition: VectorMultOCLExampleAlg.h:82
AthExXRT::VectorMultOCLExampleAlg::s_krnl_name
static constexpr char s_krnl_name[]
Definition: VectorMultOCLExampleAlg.h:56