ATLAS Offline Software
Loading...
Searching...
No Matches
VectorMultXRTExampleAlg.cxx
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3//
4
5// Local include(s).
7
8namespace AthExXRT {
9
11
12 ATH_MSG_INFO("initialize_global()");
13
14 ATH_CHECK(m_DeviceMgmtSvc.retrieve());
15
16 // Retrieve the list of device(s) providing the kernel.
17 m_devices = m_DeviceMgmtSvc->get_xrt_devices_by_kernel_name(s_krnl_name);
18 if (m_devices.empty()) {
19 ATH_MSG_ERROR("No XRT device provides kernel '" << s_krnl_name << "'");
20 return StatusCode::FAILURE;
21 }
22 ATH_MSG_INFO("Retrieved " << m_devices.size()<<" devices running "<< s_krnl_name);
23
24 return StatusCode::SUCCESS;
25
26}
27
29
30 ATH_MSG_INFO("initialize_worker()");
31 // Allocate slot specific resources.
32 std::size_t slotIdx = 0;
33 for (SlotData& slot : m_slots) {
34 ATH_MSG_DEBUG("Allocating resources for slot " << slotIdx);
35
36 // If multiple device are available, we can select one based on the slot
37 // number in a round-robin fashion. This is just an example, and more
38 // complex logic could be implemented here to take advantage of multiple
39 // devices.
40 const std::size_t device_idx = slotIdx % m_devices.size();
41 ATH_MSG_DEBUG("Using device " << device_idx << " for slot " << slotIdx);
42 slot.m_device = m_devices[device_idx];
43
44 // Create kernel objects.
45 try {
46 slot.m_kernel = std::make_unique<xrt::kernel>(
47 *slot.m_device, slot.m_device->get_xclbin_uuid(), s_krnl_name);
48 } catch (...) {
49 std::exception_ptr p = std::current_exception();
51 "Could not create XRT kernel '"
53 << "', check that correct XCLBIN is programmed by AthXRT service");
54 return StatusCode::FAILURE;
55 }
56
57 // Get memory bank groups for device buffers.
58 xrtMemoryGroup bank_grp_in1 = slot.m_kernel->group_id(s_krnl_param_in1);
59 xrtMemoryGroup bank_grp_in2 = slot.m_kernel->group_id(s_krnl_param_in2);
60 xrtMemoryGroup bank_grp_out = slot.m_kernel->group_id(s_krnl_param_out);
61
62 std::size_t size_in_bytes = s_element_count * sizeof(uint32_t);
63
64 // Create buffer objects.
65 // This create aligned buffer object both on host and device.
66 slot.m_bo_in1 = std::make_unique<xrt::bo>(
67 *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in1);
68 slot.m_bo_in2 = std::make_unique<xrt::bo>(
69 *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_in2);
70 slot.m_bo_out = std::make_unique<xrt::bo>(
71 *slot.m_device, size_in_bytes, xrt::bo::flags::normal, bank_grp_out);
72
73 // Create run object and set arguments for subsequent executions.
74 slot.m_run = std::make_unique<xrt::run>(*slot.m_kernel);
75 slot.m_run->set_arg(s_krnl_param_in1, *slot.m_bo_in1);
76 slot.m_run->set_arg(s_krnl_param_in2, *slot.m_bo_in2);
77 slot.m_run->set_arg(s_krnl_param_out, *slot.m_bo_out);
78 slot.m_run->set_arg(s_krnl_param_size, s_element_count);
79
80 ++slotIdx;
81 }
82
83 return StatusCode::SUCCESS;
84}
85
86StatusCode VectorMultXRTExampleAlg::execute(const EventContext& ctx) const {
87
88 // Get the slot (thread) specific data.
89 const SlotData& slot = *m_slots.get(ctx);
90
91 // Map buffer objects to host pointers.
92 uint32_t* buffer_in1 = slot.m_bo_in1->map<uint32_t*>();
93 uint32_t* buffer_in2 = slot.m_bo_in2->map<uint32_t*>();
94 uint32_t* buffer_out = slot.m_bo_out->map<uint32_t*>();
95
96 // Initialize the buffers with random data.
97 for (int i = 0; i < s_element_count; ++i) {
98 buffer_in1[i] = rand() % s_element_count;
99 buffer_in2[i] = rand() % s_element_count;
100 }
101
102 ATH_MSG_DEBUG("Transfer data buffer to device");
103 slot.m_bo_in1->sync(XCL_BO_SYNC_BO_TO_DEVICE);
104 slot.m_bo_in2->sync(XCL_BO_SYNC_BO_TO_DEVICE);
105
106 ATH_MSG_DEBUG("Running kernel");
107 slot.m_run->start();
108 slot.m_run->wait();
109
110 ATH_MSG_DEBUG("Transfer data back to host");
111 slot.m_bo_out->sync(XCL_BO_SYNC_BO_FROM_DEVICE);
112
113 // Check that kernel results are correct.
114 bool correct = true;
115 for (int i = 0; i < s_element_count; ++i) {
116 uint32_t cpu_result = buffer_in1[i] * buffer_in2[i];
117 if (buffer_out[i] != cpu_result) {
118 ATH_MSG_ERROR("Error: Result mismatch: i = "
119 << i << ": CPU result = " << cpu_result
120 << " Device result = " << buffer_out[i]);
121 correct = false;
122 break;
123 }
124 }
125 if (correct) {
126 ATH_MSG_INFO("XRT vector multiplication test PASSED!");
127 } else {
128 ATH_MSG_ERROR("XRT vector multiplication test FAILED!");
129 return StatusCode::FAILURE;
130 }
131
132 return StatusCode::SUCCESS;
133}
134
135} // namespace AthExXRT
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_DEBUG(x)
virtual StatusCode execute(const EventContext &ctx) const override
Function executing the algorithm.
virtual StatusCode initialize_worker() override
Initialization per process.
virtual StatusCode initialize_global() override
Glocal XRT initialization.
SG::SlotSpecificObj< SlotData > m_slots
List of slot-specific data.
ServiceHandle< AthXRT::IDeviceMgmtSvc > m_DeviceMgmtSvc
The XRT device manager to use.
std::vector< std::shared_ptr< xrt::device > > m_devices
std::unique_ptr< xrt::run > m_run
Kernel run object.
std::unique_ptr< xrt::bo > m_bo_in1
Buffer objects.