ATLAS Offline Software
VectorMultOCLExampleAlg.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Gaudi includes
6 #include "GaudiKernel/ConcurrencyFlags.h"
7 
8 // Local include(s).
10 
11 namespace AthExXRT {
12 
14 
15  // Retrieve the necessary component(s).
16  ATH_CHECK(m_DeviceMgmtSvc.retrieve());
17 
18  cl_int err = CL_SUCCESS;
19 
20  // Retrieve the list of OpencCL Handle(s) providing the kernel.
21  std::vector<AthXRT::IDeviceMgmtSvc::OpenCLHandle> handles =
22  m_DeviceMgmtSvc->get_opencl_handles_by_kernel_name(s_krnl_name);
23 
24  if (handles.empty()) {
25  ATH_MSG_ERROR("No OpenCL context provides kernel '" << s_krnl_name << "'");
26  return StatusCode::FAILURE;
27  }
28 
29  // Allocate slot specific resources.
30  std::size_t slotIdx = 0;
31  for (SlotData& slot : m_slots) {
32  ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
33 
34  if (handles.size() > 1) {
35  ATH_MSG_WARNING("More than one OpenCL context provides a '"
36  << s_krnl_name << "' kernel (" << handles.size()
37  << "), using the first one");
38  }
39  slot.m_context = handles[0].context;
40  slot.m_program = handles[0].program;
41 
42  // Create kernel objects.
43  slot.m_kernel =
44  std::make_unique<cl::Kernel>(*slot.m_program, s_krnl_name, &err);
45  if (err != 0) {
47  "Could not create OpenCL kernel '"
48  << s_krnl_name
49  << "', check that correct XCLBIN is programmed by AthXRT service");
50  return StatusCode::FAILURE;
51  }
52 
53  // Create command queue.
54  slot.m_queue = std::make_unique<cl::CommandQueue>(
55  *slot.m_context, slot.m_context->getInfo<CL_CONTEXT_DEVICES>()[0], 0,
56  &err);
57  ATH_CHECK(err == CL_SUCCESS);
58 
59  const std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
60 
61  // Create buffer objects.
62  // This create aligned buffer object both on host and device.
63  slot.m_dev_buf_in1 = std::make_unique<cl::Buffer>(
64  *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
65  size_in_bytes, nullptr, &err);
66  ATH_CHECK(err == CL_SUCCESS);
67 
68  slot.m_dev_buf_in2 = std::make_unique<cl::Buffer>(
69  *slot.m_context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
70  size_in_bytes, nullptr, &err);
71  ATH_CHECK(err == CL_SUCCESS);
72 
73  slot.m_dev_buf_out = std::make_unique<cl::Buffer>(
74  *slot.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
75  size_in_bytes, nullptr, &err);
76  ATH_CHECK(err == CL_SUCCESS);
77 
78  slot.m_host_buf_in1 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
79  *slot.m_dev_buf_in1, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
80  nullptr, &err);
81  ATH_CHECK(err == CL_SUCCESS);
82 
83  slot.m_host_buf_in2 = (uint32_t*)slot.m_queue->enqueueMapBuffer(
84  *slot.m_dev_buf_in2, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, nullptr,
85  nullptr, &err);
86  ATH_CHECK(err == CL_SUCCESS);
87 
88  slot.m_host_buf_out = (uint32_t*)slot.m_queue->enqueueMapBuffer(
89  *slot.m_dev_buf_out, CL_TRUE, CL_MAP_READ, 0, size_in_bytes, nullptr,
90  nullptr, &err);
91  ATH_CHECK(err == CL_SUCCESS);
92 
93  // Set kernel arguments.
94  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in1, *slot.m_dev_buf_in1) ==
95  CL_SUCCESS);
96  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_in2, *slot.m_dev_buf_in2) ==
97  CL_SUCCESS);
98  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_out, *slot.m_dev_buf_out) ==
99  CL_SUCCESS);
100  ATH_CHECK(slot.m_kernel->setArg(s_krnl_param_size, s_element_count) ==
101  CL_SUCCESS);
102 
103  ++slotIdx;
104  }
105 
106  // Return gracefully.
107  return StatusCode::SUCCESS;
108 }
109 
110 StatusCode VectorMultOCLExampleAlg::execute(const EventContext& ctx) const {
111 
112  // Get the slot (thread) specific data.
113  const SlotData& slot = *m_slots.get(ctx);
114 
115  // Initialize the buffers with random data.
116  for (std::size_t i = 0; i < s_element_count; ++i) {
117  slot.m_host_buf_in1[i] = rand() % s_element_count;
118  slot.m_host_buf_in2[i] = rand() % s_element_count;
119  }
120 
121  ATH_MSG_DEBUG("Transfer data buffer to device");
122  std::vector<cl::Memory> mems_vector = {*slot.m_dev_buf_in1,
123  *slot.m_dev_buf_in2};
124  ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(mems_vector, 0, nullptr,
125  nullptr) == CL_SUCCESS);
126 
127  // Schedule the kernel.
128  ATH_MSG_DEBUG("Running kernel");
129  ATH_CHECK(slot.m_queue->enqueueTask(*slot.m_kernel, nullptr, nullptr) ==
130  CL_SUCCESS);
131 
132  // Migrate data back to host.
133  ATH_MSG_DEBUG("Transfer data back to host");
134  std::vector<cl::Memory> mems_out_vector = {*slot.m_dev_buf_out};
135  ATH_CHECK(slot.m_queue->enqueueMigrateMemObjects(
136  mems_out_vector, CL_MIGRATE_MEM_OBJECT_HOST, nullptr,
137  nullptr) == CL_SUCCESS);
138  ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
139 
140  // Check that kernel results are correct.
141  bool correct = true;
142  for (std::size_t i = 0; i < s_element_count; ++i) {
143  uint32_t cpu_result = slot.m_host_buf_in1[i] * slot.m_host_buf_in2[i];
144  if (slot.m_host_buf_out[i] != cpu_result) {
145  ATH_MSG_ERROR("Error: Result mismatch: i = "
146  << i << ": CPU result = " << cpu_result
147  << " Device result = " << slot.m_host_buf_out[i]);
148  correct = false;
149  break;
150  }
151  }
152  if (correct) {
153  ATH_MSG_INFO("OpenCL vector multiplication test PASSED!");
154  } else {
155  ATH_MSG_ERROR("OpenCL vector multiplication test FAILED!");
156  return StatusCode::FAILURE;
157  }
158 
159  // Return gracefully.
160  return StatusCode::SUCCESS;
161 }
162 
164 
165  // Unmap buffer objects.
166  for (SlotData& slot : m_slots) {
167  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
168  *slot.m_dev_buf_in1, slot.m_host_buf_in1) == CL_SUCCESS);
169  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
170  *slot.m_dev_buf_in2, slot.m_host_buf_in2) == CL_SUCCESS);
171  ATH_CHECK(slot.m_queue->enqueueUnmapMemObject(
172  *slot.m_dev_buf_out, slot.m_host_buf_out) == CL_SUCCESS);
173  ATH_CHECK(slot.m_queue->finish() == CL_SUCCESS);
174  }
175 
176  // Return gracefully.
177  return StatusCode::SUCCESS;
178 }
179 
180 } // namespace AthExXRT
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
VectorMultOCLExampleAlg.h
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_out
static constexpr int s_krnl_param_out
Definition: VectorMultOCLExampleAlg.h:56
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_dev_buf_out
std::unique_ptr< cl::Buffer > m_dev_buf_out
Definition: VectorMultOCLExampleAlg.h:79
AthExXRT
Definition: VectorAddOCLExampleAlg.cxx:11
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_dev_buf_in1
std::unique_ptr< cl::Buffer > m_dev_buf_in1
Buffer objects.
Definition: VectorMultOCLExampleAlg.h:77
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_host_buf_in1
uint32_t * m_host_buf_in1
Definition: VectorMultOCLExampleAlg.h:81
AthExXRT::VectorMultOCLExampleAlg::finalize
virtual StatusCode finalize() override
Function finalising the algorithm.
Definition: VectorMultOCLExampleAlg.cxx:163
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
LArG4FSStartPointFilter.rand
rand
Definition: LArG4FSStartPointFilter.py:80
dqt_zlumi_pandas.err
err
Definition: dqt_zlumi_pandas.py:182
lumiFormat.i
int i
Definition: lumiFormat.py:85
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
AthExXRT::VectorMultOCLExampleAlg::SlotData
Slot-specific state.
Definition: VectorMultOCLExampleAlg.h:63
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_host_buf_out
uint32_t * m_host_buf_out
Definition: VectorMultOCLExampleAlg.h:83
AthExXRT::VectorMultOCLExampleAlg::s_element_count
static constexpr int s_element_count
Definition: VectorMultOCLExampleAlg.h:60
AthExXRT::VectorMultOCLExampleAlg::m_DeviceMgmtSvc
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
Definition: VectorMultOCLExampleAlg.h:45
AthExXRT::VectorMultOCLExampleAlg::m_slots
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
Definition: VectorMultOCLExampleAlg.h:87
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_host_buf_in2
uint32_t * m_host_buf_in2
Definition: VectorMultOCLExampleAlg.h:82
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_in1
static constexpr int s_krnl_param_in1
Definition: VectorMultOCLExampleAlg.h:54
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_size
static constexpr int s_krnl_param_size
Definition: VectorMultOCLExampleAlg.h:57
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_kernel
std::unique_ptr< cl::Kernel > m_kernel
Kernel object.
Definition: VectorMultOCLExampleAlg.h:71
ATH_MSG_WARNING
#define ATH_MSG_WARNING(x)
Definition: AthMsgStreamMacros.h:32
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_dev_buf_in2
std::unique_ptr< cl::Buffer > m_dev_buf_in2
Definition: VectorMultOCLExampleAlg.h:78
AthExXRT::VectorMultOCLExampleAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override
Function executing the algorithm.
Definition: VectorMultOCLExampleAlg.cxx:110
AthExXRT::VectorMultOCLExampleAlg::s_krnl_param_in2
static constexpr int s_krnl_param_in2
Definition: VectorMultOCLExampleAlg.h:55
AthExXRT::VectorMultOCLExampleAlg::initialize
virtual StatusCode initialize() override
Function initialising the algorithm.
Definition: VectorMultOCLExampleAlg.cxx:13
AthExXRT::VectorMultOCLExampleAlg::SlotData::m_queue
std::unique_ptr< cl::CommandQueue > m_queue
Kernel run object.
Definition: VectorMultOCLExampleAlg.h:74
AthExXRT::VectorMultOCLExampleAlg::s_krnl_name
static constexpr char s_krnl_name[]
Definition: VectorMultOCLExampleAlg.h:50