ATLAS Offline Software
VectorMultXRTExampleAlg.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Gaudi includes
6 #include "GaudiKernel/ConcurrencyFlags.h"
7 
8 // Local include(s).
10 
11 namespace AthExXRT {
12 
14 
15  // Retrieve the necessary component(s).
16  ATH_CHECK(m_DeviceMgmtSvc.retrieve());
17 
18  // Retrieve the list of device(s) providing the kernel.
19  std::vector<std::shared_ptr<xrt::device>> devices =
20  m_DeviceMgmtSvc->get_xrt_devices_by_kernel_name(s_krnl_name);
21  if (devices.empty()) {
22  ATH_MSG_ERROR("No XRT device provides kernel '" << s_krnl_name << "'");
23  return StatusCode::FAILURE;
24  }
25 
26  // Allocate slot specific resources.
27  std::size_t slotIdx = 0;
28  for (SlotData& slot : m_slots) {
29  ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
30 
31  // If multiple device are available, we can select one based on the slot
32  // number in a round-robin fashion. This is just an example, and more
33  // complex logic could be implemented here to take advantage of multiple
34  // devices.
35  const std::size_t device_idx = slotIdx % devices.size();
36  ATH_MSG_DEBUG("Using device " << device_idx << " for slot " << slotIdx);
37  slot.m_device = devices[device_idx];
38 
39  // Create kernel objects.
40  try {
41  slot.m_kernel = std::make_unique<xrt::kernel>(
42  *slot.m_device, slot.m_device->get_xclbin_uuid(), s_krnl_name);
43  } catch (...) {
44  std::exception_ptr p = std::current_exception();
46  "Could not create XRT kernel '"
47  << s_krnl_name
48  << "', check that correct XCLBIN is programmed by AthXRT service");
49  return StatusCode::FAILURE;
50  }
51 
52  // Get memory bank groups for device buffers.
53  xrtMemoryGroup bank_grp_in1 = slot.m_kernel->group_id(s_krnl_param_in1);
54  xrtMemoryGroup bank_grp_in2 = slot.m_kernel->group_id(s_krnl_param_in2);
55  xrtMemoryGroup bank_grp_out = slot.m_kernel->group_id(s_krnl_param_out);
56 
57  std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
58 
59  // Create buffer objects.
60  // This create aligned buffer object both on host and device.
61  slot.m_bo_in1 = std::make_unique<xrt::bo>(
62  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in1);
63  slot.m_bo_in2 = std::make_unique<xrt::bo>(
64  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in2);
65  slot.m_bo_out = std::make_unique<xrt::bo>(
66  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_out);
67 
68  // Create run object and set arguments for subsequent executions.
69  slot.m_run = std::make_unique<xrt::run>(*slot.m_kernel);
70  slot.m_run->set_arg(s_krnl_param_in1, *slot.m_bo_in1);
71  slot.m_run->set_arg(s_krnl_param_in2, *slot.m_bo_in2);
72  slot.m_run->set_arg(s_krnl_param_out, *slot.m_bo_out);
73  slot.m_run->set_arg(s_krnl_param_size, s_element_count);
74 
75  ++slotIdx;
76  }
77 
78  // Return gracefully.
79  return StatusCode::SUCCESS;
80 }
81 
82 StatusCode VectorMultXRTExampleAlg::execute(const EventContext& ctx) const {
83 
84  // Get the slot (thread) specific data.
85  const SlotData& slot = *m_slots.get(ctx);
86 
87  // Map buffer objects to host pointers.
88  uint32_t* buffer_in1 = slot.m_bo_in1->map<uint32_t*>();
89  uint32_t* buffer_in2 = slot.m_bo_in2->map<uint32_t*>();
90  uint32_t* buffer_out = slot.m_bo_out->map<uint32_t*>();
91 
92  // Initialize the buffers with random data.
93  for (int i = 0; i < s_element_count; ++i) {
94  buffer_in1[i] = rand() % s_element_count;
95  buffer_in2[i] = rand() % s_element_count;
96  }
97 
98  ATH_MSG_DEBUG("Transfer data buffer to device");
99  slot.m_bo_in1->sync(XCL_BO_SYNC_BO_TO_DEVICE);
100  slot.m_bo_in2->sync(XCL_BO_SYNC_BO_TO_DEVICE);
101 
102  ATH_MSG_DEBUG("Running kernel");
103  slot.m_run->start();
104  slot.m_run->wait();
105 
106  ATH_MSG_DEBUG("Transfer data back to host");
107  slot.m_bo_out->sync(XCL_BO_SYNC_BO_FROM_DEVICE);
108 
109  // Check that kernel results are correct.
110  bool correct = true;
111  for (int i = 0; i < s_element_count; ++i) {
112  uint32_t cpu_result = buffer_in1[i] * buffer_in2[i];
113  if (buffer_out[i] != cpu_result) {
114  ATH_MSG_ERROR("Error: Result mismatch: i = "
115  << i << ": CPU result = " << cpu_result
116  << " Device result = " << buffer_out[i]);
117  correct = false;
118  break;
119  }
120  }
121  if (correct) {
122  ATH_MSG_INFO("XRT vector multiplication test PASSED!");
123  } else {
124  ATH_MSG_ERROR("XRT vector multiplication test FAILED!");
125  return StatusCode::FAILURE;
126  }
127 
128  // Return gracefully.
129  return StatusCode::SUCCESS;
130 }
131 
132 } // namespace AthExXRT
AthExXRT::VectorMultXRTExampleAlg::s_element_count
static constexpr int s_element_count
Definition: VectorMultXRTExampleAlg.h:57
python.PerfMonSerializer.p
def p
Definition: PerfMonSerializer.py:743
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_bo_in2
std::unique_ptr< xrt::bo > m_bo_in2
Definition: VectorMultXRTExampleAlg.h:72
AthExXRT
Definition: VectorAddOCLExampleAlg.cxx:11
AthExXRT::VectorMultXRTExampleAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override
Function executing the algorithm.
Definition: VectorMultXRTExampleAlg.cxx:82
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_run
std::unique_ptr< xrt::run > m_run
Kernel run object.
Definition: VectorMultXRTExampleAlg.h:68
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_bo_out
std::unique_ptr< xrt::bo > m_bo_out
Definition: VectorMultXRTExampleAlg.h:73
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_bo_in1
std::unique_ptr< xrt::bo > m_bo_in1
Buffer objects.
Definition: VectorMultXRTExampleAlg.h:71
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_in2
static constexpr int s_krnl_param_in2
Definition: VectorMultXRTExampleAlg.h:52
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
LArG4FSStartPointFilter.rand
rand
Definition: LArG4FSStartPointFilter.py:80
VectorMultXRTExampleAlg.h
lumiFormat.i
int i
Definition: lumiFormat.py:85
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_in1
static constexpr int s_krnl_param_in1
Definition: VectorMultXRTExampleAlg.h:51
AthExXRT::VectorMultXRTExampleAlg::m_DeviceMgmtSvc
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
Definition: VectorMultXRTExampleAlg.h:42
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_out
static constexpr int s_krnl_param_out
Definition: VectorMultXRTExampleAlg.h:53
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_size
static constexpr int s_krnl_param_size
Definition: VectorMultXRTExampleAlg.h:54
AthExXRT::VectorMultXRTExampleAlg::s_krnl_name
static constexpr char s_krnl_name[]
Definition: VectorMultXRTExampleAlg.h:47
AthExXRT::VectorMultXRTExampleAlg::m_slots
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
Definition: VectorMultXRTExampleAlg.h:77
AthExXRT::VectorMultXRTExampleAlg::initialize
virtual StatusCode initialize() override
Function initialising the algorithm.
Definition: VectorMultXRTExampleAlg.cxx:13
AthExXRT::VectorMultXRTExampleAlg::SlotData
Slot-specific state.
Definition: VectorMultXRTExampleAlg.h:60