ATLAS Offline Software
VectorAddXRTExampleAlg.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Local include(s).
7 
8 namespace AthExXRT {
9 
11 
12  ATH_MSG_INFO("initialize_global()");
13 
14  ATH_CHECK(m_DeviceMgmtSvc.retrieve());
15 
16  // Retrieve the list of device(s) providing the kernel.
17  m_devices = m_DeviceMgmtSvc->get_xrt_devices_by_kernel_name(s_krnl_name);
18  if (m_devices.empty()) {
19  ATH_MSG_ERROR("No XRT device provides kernel '" << s_krnl_name << "'");
20  return StatusCode::FAILURE;
21  }
22  ATH_MSG_INFO("Retrieved " << m_devices.size()<<" devices running "<< s_krnl_name);
23 
24  return StatusCode::SUCCESS;
25 
26 }
27 
29 
30  ATH_MSG_INFO("initialize_worker()");
31 
32  // Allocate slot specific resources.
33  std::size_t slotIdx = 0;
34  for (SlotData& slot : m_slots) {
35  ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
36 
37  // If multiple device are available, we can select one based on the slot
38  // number in a round-robin fashion. This is just an example, and more
39  // complex logic could be implemented here to take advantage of multiple
40  // devices.
41  const std::size_t device_idx = slotIdx % m_devices.size();
42  ATH_MSG_DEBUG("Using device " << device_idx << " for slot " << slotIdx);
43  slot.m_device = m_devices[device_idx];
44 
45  // Create kernel objects.
46  try {
47  slot.m_kernel = std::make_unique<xrt::kernel>(
48  *slot.m_device, slot.m_device->get_xclbin_uuid(), s_krnl_name);
49  } catch (...) {
50  std::exception_ptr p = std::current_exception();
52  "Could not create XRT kernel '"
53  << s_krnl_name
54  << "', check that correct XCLBIN is programmed by AthXRT service");
55  return StatusCode::FAILURE;
56  }
57 
58  // Get memory bank groups for device buffers.
59  xrtMemoryGroup bank_grp_in1 = slot.m_kernel->group_id(s_krnl_param_in1);
60  xrtMemoryGroup bank_grp_in2 = slot.m_kernel->group_id(s_krnl_param_in2);
61  xrtMemoryGroup bank_grp_out = slot.m_kernel->group_id(s_krnl_param_out);
62 
63  std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
64 
65  // Create buffer objects.
66  // This create aligned buffer object both on host and device.
67  slot.m_bo_in1 = std::make_unique<xrt::bo>(
68  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in1);
69  slot.m_bo_in2 = std::make_unique<xrt::bo>(
70  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in2);
71  slot.m_bo_out = std::make_unique<xrt::bo>(
72  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_out);
73 
74  // Create run object and set arguments for subsequent executions.
75  slot.m_run = std::make_unique<xrt::run>(*slot.m_kernel);
76  slot.m_run->set_arg(s_krnl_param_in1, *slot.m_bo_in1);
77  slot.m_run->set_arg(s_krnl_param_in2, *slot.m_bo_in2);
78  slot.m_run->set_arg(s_krnl_param_out, *slot.m_bo_out);
79  slot.m_run->set_arg(s_krnl_param_size, s_element_count);
80 
81  ++slotIdx;
82  }
83 
84  return StatusCode::SUCCESS;
85 }
86 
87 StatusCode VectorAddXRTExampleAlg::execute(const EventContext& ctx) const {
88 
89  // Get the slot (thread) specific data.
90  const SlotData& slot = *m_slots.get(ctx);
91 
92  // Map buffer objects to host pointers.
93  uint32_t* buffer_in1 = slot.m_bo_in1->map<uint32_t*>();
94  uint32_t* buffer_in2 = slot.m_bo_in2->map<uint32_t*>();
95  uint32_t* buffer_out = slot.m_bo_out->map<uint32_t*>();
96 
97  // Initialize the buffers with random data.
98  for (int i = 0; i < s_element_count; ++i) {
99  buffer_in1[i] = rand() % s_element_count;
100  buffer_in2[i] = rand() % s_element_count;
101  }
102 
103  ATH_MSG_DEBUG("Transfer data buffer to device");
104  slot.m_bo_in1->sync(XCL_BO_SYNC_BO_TO_DEVICE);
105  slot.m_bo_in2->sync(XCL_BO_SYNC_BO_TO_DEVICE);
106 
107  ATH_MSG_DEBUG("Running kernel");
108  slot.m_run->start();
109  slot.m_run->wait();
110 
111  ATH_MSG_DEBUG("Transfer data back to host");
112  slot.m_bo_out->sync(XCL_BO_SYNC_BO_FROM_DEVICE);
113 
114  // Check that kernel results are correct.
115  bool correct = true;
116  for (int i = 0; i < s_element_count; ++i) {
117  uint32_t cpu_result = buffer_in1[i] + buffer_in2[i];
118  if (buffer_out[i] != cpu_result) {
119  ATH_MSG_ERROR("Error: Result mismatch: i = "
120  << i << ": CPU result = " << cpu_result
121  << " Device result = " << buffer_out[i]);
122  correct = false;
123  break;
124  }
125  }
126  if (correct) {
127  ATH_MSG_INFO("XRT vector addition test PASSED!");
128  } else {
129  ATH_MSG_ERROR("XRT vector addition test FAILED!");
130  return StatusCode::FAILURE;
131  }
132 
133  return StatusCode::SUCCESS;
134 }
135 
136 } // namespace AthExXRT
AthExXRT::VectorAddXRTExampleAlg::m_devices
std::vector< std::shared_ptr< xrt::device > > m_devices
Definition: VectorAddXRTExampleAlg.h:56
AthExXRT::VectorAddXRTExampleAlg::SlotData
Slot-specific state.
Definition: VectorAddXRTExampleAlg.h:69
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
AthExXRT::VectorAddXRTExampleAlg::s_krnl_param_in2
static constexpr int s_krnl_param_in2
Definition: VectorAddXRTExampleAlg.h:61
AthExXRT
Definition: VectorAddOCLExampleAlg.cxx:11
AthExXRT::VectorAddXRTExampleAlg::SlotData::m_run
std::unique_ptr< xrt::run > m_run
Kernel run object.
Definition: VectorAddXRTExampleAlg.h:77
VectorAddXRTExampleAlg.h
python.utils.AtlRunQueryDQUtils.p
p
Definition: AtlRunQueryDQUtils.py:209
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
LArG4FSStartPointFilter.rand
rand
Definition: LArG4FSStartPointFilter.py:80
lumiFormat.i
int i
Definition: lumiFormat.py:85
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
AthExXRT::VectorAddXRTExampleAlg::initialize_worker
virtual StatusCode initialize_worker() override
Initialization per process.
Definition: VectorAddXRTExampleAlg.cxx:28
AthExXRT::VectorAddXRTExampleAlg::s_krnl_param_in1
static constexpr int s_krnl_param_in1
Definition: VectorAddXRTExampleAlg.h:60
AthExXRT::VectorAddXRTExampleAlg::s_krnl_param_size
static constexpr int s_krnl_param_size
Definition: VectorAddXRTExampleAlg.h:63
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
AthExXRT::VectorAddXRTExampleAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override
Definition: VectorAddXRTExampleAlg.cxx:87
AthExXRT::VectorAddXRTExampleAlg::m_DeviceMgmtSvc
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
Definition: VectorAddXRTExampleAlg.h:48
AthExXRT::VectorAddXRTExampleAlg::s_krnl_param_out
static constexpr int s_krnl_param_out
Definition: VectorAddXRTExampleAlg.h:62
AthExXRT::VectorAddXRTExampleAlg::SlotData::m_bo_in1
std::unique_ptr< xrt::bo > m_bo_in1
Buffer objects.
Definition: VectorAddXRTExampleAlg.h:80
AthExXRT::VectorAddXRTExampleAlg::SlotData::m_bo_out
std::unique_ptr< xrt::bo > m_bo_out
Definition: VectorAddXRTExampleAlg.h:82
AthExXRT::VectorAddXRTExampleAlg::initialize_global
virtual StatusCode initialize_global() override
Glocal XRT initialization.
Definition: VectorAddXRTExampleAlg.cxx:10
AthExXRT::VectorAddXRTExampleAlg::s_krnl_name
static constexpr char s_krnl_name[]
Definition: VectorAddXRTExampleAlg.h:53
AthExXRT::VectorAddXRTExampleAlg::s_element_count
static constexpr int s_element_count
Definition: VectorAddXRTExampleAlg.h:66
AthExXRT::VectorAddXRTExampleAlg::m_slots
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
Definition: VectorAddXRTExampleAlg.h:86
AthExXRT::VectorAddXRTExampleAlg::SlotData::m_bo_in2
std::unique_ptr< xrt::bo > m_bo_in2
Definition: VectorAddXRTExampleAlg.h:81