ATLAS Offline Software
VectorMultXRTExampleAlg.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Local include(s).
7 
8 namespace AthExXRT {
9 
11 
12  ATH_MSG_INFO("initialize_global()");
13 
14  ATH_CHECK(m_DeviceMgmtSvc.retrieve());
15 
16  // Retrieve the list of device(s) providing the kernel.
17  m_devices = m_DeviceMgmtSvc->get_xrt_devices_by_kernel_name(s_krnl_name);
18  if (m_devices.empty()) {
19  ATH_MSG_ERROR("No XRT device provides kernel '" << s_krnl_name << "'");
20  return StatusCode::FAILURE;
21  }
22  ATH_MSG_INFO("Retrieved " << m_devices.size()<<" devices running "<< s_krnl_name);
23 
24  return StatusCode::SUCCESS;
25 
26 }
27 
29 
30  ATH_MSG_INFO("initialize_worker()");
31  // Allocate slot specific resources.
32  std::size_t slotIdx = 0;
33  for (SlotData& slot : m_slots) {
34  ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
35 
36  // If multiple device are available, we can select one based on the slot
37  // number in a round-robin fashion. This is just an example, and more
38  // complex logic could be implemented here to take advantage of multiple
39  // devices.
40  const std::size_t device_idx = slotIdx % m_devices.size();
41  ATH_MSG_DEBUG("Using device " << device_idx << " for slot " << slotIdx);
42  slot.m_device = m_devices[device_idx];
43 
44  // Create kernel objects.
45  try {
46  slot.m_kernel = std::make_unique<xrt::kernel>(
47  *slot.m_device, slot.m_device->get_xclbin_uuid(), s_krnl_name);
48  } catch (...) {
49  std::exception_ptr p = std::current_exception();
51  "Could not create XRT kernel '"
52  << s_krnl_name
53  << "', check that correct XCLBIN is programmed by AthXRT service");
54  return StatusCode::FAILURE;
55  }
56 
57  // Get memory bank groups for device buffers.
58  xrtMemoryGroup bank_grp_in1 = slot.m_kernel->group_id(s_krnl_param_in1);
59  xrtMemoryGroup bank_grp_in2 = slot.m_kernel->group_id(s_krnl_param_in2);
60  xrtMemoryGroup bank_grp_out = slot.m_kernel->group_id(s_krnl_param_out);
61 
62  std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
63 
64  // Create buffer objects.
65  // This create aligned buffer object both on host and device.
66  slot.m_bo_in1 = std::make_unique<xrt::bo>(
67  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in1);
68  slot.m_bo_in2 = std::make_unique<xrt::bo>(
69  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in2);
70  slot.m_bo_out = std::make_unique<xrt::bo>(
71  *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_out);
72 
73  // Create run object and set arguments for subsequent executions.
74  slot.m_run = std::make_unique<xrt::run>(*slot.m_kernel);
75  slot.m_run->set_arg(s_krnl_param_in1, *slot.m_bo_in1);
76  slot.m_run->set_arg(s_krnl_param_in2, *slot.m_bo_in2);
77  slot.m_run->set_arg(s_krnl_param_out, *slot.m_bo_out);
78  slot.m_run->set_arg(s_krnl_param_size, s_element_count);
79 
80  ++slotIdx;
81  }
82 
83  return StatusCode::SUCCESS;
84 }
85 
86 StatusCode VectorMultXRTExampleAlg::execute(const EventContext& ctx) const {
87 
88  // Get the slot (thread) specific data.
89  const SlotData& slot = *m_slots.get(ctx);
90 
91  // Map buffer objects to host pointers.
92  uint32_t* buffer_in1 = slot.m_bo_in1->map<uint32_t*>();
93  uint32_t* buffer_in2 = slot.m_bo_in2->map<uint32_t*>();
94  uint32_t* buffer_out = slot.m_bo_out->map<uint32_t*>();
95 
96  // Initialize the buffers with random data.
97  for (int i = 0; i < s_element_count; ++i) {
98  buffer_in1[i] = rand() % s_element_count;
99  buffer_in2[i] = rand() % s_element_count;
100  }
101 
102  ATH_MSG_DEBUG("Transfer data buffer to device");
103  slot.m_bo_in1->sync(XCL_BO_SYNC_BO_TO_DEVICE);
104  slot.m_bo_in2->sync(XCL_BO_SYNC_BO_TO_DEVICE);
105 
106  ATH_MSG_DEBUG("Running kernel");
107  slot.m_run->start();
108  slot.m_run->wait();
109 
110  ATH_MSG_DEBUG("Transfer data back to host");
111  slot.m_bo_out->sync(XCL_BO_SYNC_BO_FROM_DEVICE);
112 
113  // Check that kernel results are correct.
114  bool correct = true;
115  for (int i = 0; i < s_element_count; ++i) {
116  uint32_t cpu_result = buffer_in1[i] * buffer_in2[i];
117  if (buffer_out[i] != cpu_result) {
118  ATH_MSG_ERROR("Error: Result mismatch: i = "
119  << i << ": CPU result = " << cpu_result
120  << " Device result = " << buffer_out[i]);
121  correct = false;
122  break;
123  }
124  }
125  if (correct) {
126  ATH_MSG_INFO("XRT vector multiplication test PASSED!");
127  } else {
128  ATH_MSG_ERROR("XRT vector multiplication test FAILED!");
129  return StatusCode::FAILURE;
130  }
131 
132  return StatusCode::SUCCESS;
133 }
134 
135 } // namespace AthExXRT
AthExXRT::VectorMultXRTExampleAlg::initialize_global
virtual StatusCode initialize_global() override
Glocal XRT initialization.
Definition: VectorMultXRTExampleAlg.cxx:10
AthExXRT::VectorMultXRTExampleAlg::s_element_count
static constexpr int s_element_count
Definition: VectorMultXRTExampleAlg.h:66
AthExXRT::VectorMultXRTExampleAlg::initialize_worker
virtual StatusCode initialize_worker() override
Initialization per process.
Definition: VectorMultXRTExampleAlg.cxx:28
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
AthExXRT::VectorMultXRTExampleAlg::m_devices
std::vector< std::shared_ptr< xrt::device > > m_devices
Definition: VectorMultXRTExampleAlg.h:56
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_bo_in2
std::unique_ptr< xrt::bo > m_bo_in2
Definition: VectorMultXRTExampleAlg.h:81
AthExXRT
Definition: VectorAddOCLExampleAlg.cxx:11
AthExXRT::VectorMultXRTExampleAlg::execute
virtual StatusCode execute(const EventContext &ctx) const override
Function executing the algorithm.
Definition: VectorMultXRTExampleAlg.cxx:86
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_run
std::unique_ptr< xrt::run > m_run
Kernel run object.
Definition: VectorMultXRTExampleAlg.h:77
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_bo_out
std::unique_ptr< xrt::bo > m_bo_out
Definition: VectorMultXRTExampleAlg.h:82
AthExXRT::VectorMultXRTExampleAlg::SlotData::m_bo_in1
std::unique_ptr< xrt::bo > m_bo_in1
Buffer objects.
Definition: VectorMultXRTExampleAlg.h:80
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_in2
static constexpr int s_krnl_param_in2
Definition: VectorMultXRTExampleAlg.h:61
python.utils.AtlRunQueryDQUtils.p
p
Definition: AtlRunQueryDQUtils.py:209
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
LArG4FSStartPointFilter.rand
rand
Definition: LArG4FSStartPointFilter.py:80
VectorMultXRTExampleAlg.h
lumiFormat.i
int i
Definition: lumiFormat.py:85
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
ATH_MSG_DEBUG
#define ATH_MSG_DEBUG(x)
Definition: AthMsgStreamMacros.h:29
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_in1
static constexpr int s_krnl_param_in1
Definition: VectorMultXRTExampleAlg.h:60
AthExXRT::VectorMultXRTExampleAlg::m_DeviceMgmtSvc
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
Definition: VectorMultXRTExampleAlg.h:48
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_out
static constexpr int s_krnl_param_out
Definition: VectorMultXRTExampleAlg.h:62
AthExXRT::VectorMultXRTExampleAlg::s_krnl_param_size
static constexpr int s_krnl_param_size
Definition: VectorMultXRTExampleAlg.h:63
AthExXRT::VectorMultXRTExampleAlg::s_krnl_name
static constexpr char s_krnl_name[]
Definition: VectorMultXRTExampleAlg.h:53
AthExXRT::VectorMultXRTExampleAlg::m_slots
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
Definition: VectorMultXRTExampleAlg.h:86
AthExXRT::VectorMultXRTExampleAlg::SlotData
Slot-specific state.
Definition: VectorMultXRTExampleAlg.h:69