ATLAS Offline Software
Loading...
Searching...
No Matches
VectorAddXRTExampleAlg.cxx
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3//
4
5// Local include(s).
7
8namespace AthExXRT {
9
11
12 ATH_MSG_INFO("initialize_global()");
13
14 ATH_CHECK(m_DeviceMgmtSvc.retrieve());
15
16 // Retrieve the list of device(s) providing the kernel.
17 m_devices = m_DeviceMgmtSvc->get_xrt_devices_by_kernel_name(s_krnl_name);
18 if (m_devices.empty()) {
19 ATH_MSG_ERROR("No XRT device provides kernel '" << s_krnl_name << "'");
20 return StatusCode::FAILURE;
21 }
22 ATH_MSG_INFO("Retrieved " << m_devices.size()<<" devices running "<< s_krnl_name);
23
24 return StatusCode::SUCCESS;
25
26}
27
29
30 ATH_MSG_INFO("initialize_worker()");
31
32 // Allocate slot specific resources.
33 std::size_t slotIdx = 0;
34 for (SlotData& slot : m_slots) {
35 ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
36
37 // If multiple device are available, we can select one based on the slot
38 // number in a round-robin fashion. This is just an example, and more
39 // complex logic could be implemented here to take advantage of multiple
40 // devices.
41 const std::size_t device_idx = slotIdx % m_devices.size();
42 ATH_MSG_DEBUG("Using device " << device_idx << " for slot " << slotIdx);
43 slot.m_device = m_devices[device_idx];
44
45 // Create kernel objects.
46 try {
47 slot.m_kernel = std::make_unique<xrt::kernel>(
48 *slot.m_device, slot.m_device->get_xclbin_uuid(), s_krnl_name);
49 } catch (...) {
50 std::exception_ptr p = std::current_exception();
52 "Could not create XRT kernel '"
54 << "', check that correct XCLBIN is programmed by AthXRT service");
55 return StatusCode::FAILURE;
56 }
57
58 // Get memory bank groups for device buffers.
59 xrtMemoryGroup bank_grp_in1 = slot.m_kernel->group_id(s_krnl_param_in1);
60 xrtMemoryGroup bank_grp_in2 = slot.m_kernel->group_id(s_krnl_param_in2);
61 xrtMemoryGroup bank_grp_out = slot.m_kernel->group_id(s_krnl_param_out);
62
63 std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
64
65 // Create buffer objects.
66 // This create aligned buffer object both on host and device.
67 slot.m_bo_in1 = std::make_unique<xrt::bo>(
68 *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in1);
69 slot.m_bo_in2 = std::make_unique<xrt::bo>(
70 *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in2);
71 slot.m_bo_out = std::make_unique<xrt::bo>(
72 *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_out);
73
74 // Create run object and set arguments for subsequent executions.
75 slot.m_run = std::make_unique<xrt::run>(*slot.m_kernel);
76 slot.m_run->set_arg(s_krnl_param_in1, *slot.m_bo_in1);
77 slot.m_run->set_arg(s_krnl_param_in2, *slot.m_bo_in2);
78 slot.m_run->set_arg(s_krnl_param_out, *slot.m_bo_out);
79 slot.m_run->set_arg(s_krnl_param_size, s_element_count);
80
81 ++slotIdx;
82 }
83
84 return StatusCode::SUCCESS;
85}
86
87StatusCode VectorAddXRTExampleAlg::execute(const EventContext& ctx) const {
88
89 // Get the slot (thread) specific data.
90 const SlotData& slot = *m_slots.get(ctx);
91
92 // Map buffer objects to host pointers.
93 uint32_t* buffer_in1 = slot.m_bo_in1->map<uint32_t*>();
94 uint32_t* buffer_in2 = slot.m_bo_in2->map<uint32_t*>();
95 uint32_t* buffer_out = slot.m_bo_out->map<uint32_t*>();
96
97 // Initialize the buffers with random data.
98 for (int i = 0; i < s_element_count; ++i) {
99 buffer_in1[i] = rand() % s_element_count;
100 buffer_in2[i] = rand() % s_element_count;
101 }
102
103 ATH_MSG_DEBUG("Transfer data buffer to device");
104 slot.m_bo_in1->sync(XCL_BO_SYNC_BO_TO_DEVICE);
105 slot.m_bo_in2->sync(XCL_BO_SYNC_BO_TO_DEVICE);
106
107 ATH_MSG_DEBUG("Running kernel");
108 slot.m_run->start();
109 slot.m_run->wait();
110
111 ATH_MSG_DEBUG("Transfer data back to host");
112 slot.m_bo_out->sync(XCL_BO_SYNC_BO_FROM_DEVICE);
113
114 // Check that kernel results are correct.
115 bool correct = true;
116 for (int i = 0; i < s_element_count; ++i) {
117 uint32_t cpu_result = buffer_in1[i] + buffer_in2[i];
118 if (buffer_out[i] != cpu_result) {
119 ATH_MSG_ERROR("Error: Result mismatch: i = "
120 << i << ": CPU result = " << cpu_result
121 << " Device result = " << buffer_out[i]);
122 correct = false;
123 break;
124 }
125 }
126 if (correct) {
127 ATH_MSG_INFO("XRT vector addition test PASSED!");
128 } else {
129 ATH_MSG_ERROR("XRT vector addition test FAILED!");
130 return StatusCode::FAILURE;
131 }
132
133 return StatusCode::SUCCESS;
134}
135
136} // namespace AthExXRT
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_DEBUG(x)
std::vector< std::shared_ptr< xrt::device > > m_devices
virtual StatusCode initialize_worker() override
Initialization per process.
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
virtual StatusCode initialize_global() override
Glocal XRT initialization.
virtual StatusCode execute(const EventContext &ctx) const override
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
std::unique_ptr< xrt::run > m_run
Kernel run object.
std::unique_ptr< xrt::bo > m_bo_in1
Buffer objects.