ATLAS Offline Software
Loading...
Searching...
No Matches
CaloGPUHybridClusterProcessor.cxx
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration
3//
4// Dear emacs, this is -*- c++ -*-
5//
6
8
10
12
13#include <algorithm>
14
15//We'll use a quick and dirty helper with placement new...
16#include <new>
17
20
23
25
26#include "boost/chrono/chrono.hpp"
27#include "boost/chrono/thread_clock.hpp"
28
29using namespace CaloRecGPU;
30
31CaloGPUHybridClusterProcessor::CaloGPUHybridClusterProcessor(const std::string & name, ISvcLocator * pSvcLocator):
32 AthReentrantAlgorithm(name, pSvcLocator),
33 CaloGPUTimed(this),
36{
37
38}
39
41{
42 ATH_CHECK( m_clusterOutput.initialize() );
43
44 if (m_clusterCellLinkOutput.key().empty())
45 {
47 }
48 ATH_CHECK( m_clusterCellLinkOutput.initialize() );
49
50
51 bool any_failed = false;
52
53 auto retrieve_and_report = [&](auto & var, const auto & type, bool & falsify_if_empty)
54 {
55 if (var.empty())
56 {
57 falsify_if_empty = false;
58 ATH_MSG_DEBUG("There is nothing to retrieve for " << type << ".");
59 }
60 else if (var.retrieve().isFailure())
61 {
62 ATH_MSG_ERROR("Failed to retrieve " << type << ": " << var);
63 any_failed = true;
64 }
65 else
66 {
67 ATH_MSG_DEBUG("Successfully retrieved " << type << ": " << var);
68 }
69 };
70 //A generic lambda to prevent code repetition.
71
72
73 bool checker = true;
74
75 retrieve_and_report(m_preGPUoperations, "pre-GPU operations", checker);
76 retrieve_and_report(m_GPUoperations, "GPU operations", checker);
77 retrieve_and_report(m_postGPUoperations, "post-GPU operations", checker);
78
80 {
81 retrieve_and_report(m_transformConstantData, "constant data to GPU transformer", checker);
82 retrieve_and_report(m_transformForGPU, "event data to GPU transformer", checker);
83 retrieve_and_report(m_transformBackToCPU, "GPU to Athena transformer", checker);
84 }
85 else
86 {
88 m_transformForGPU.disable();
89 m_transformBackToCPU.disable();
90 }
91
92 if (m_doPlots)
93 {
94 checker = true;
95 retrieve_and_report(m_plotterTool, "plotter tool", checker);
96 m_doPlots = checker;
97 }
98 else
99 {
100 m_plotterTool.disable();
101 }
102
103 if (any_failed)
104 {
105 return StatusCode::FAILURE;
106 }
107
109
110 for (const auto & tool : m_GPUoperations)
111 {
112 m_temporariesSize = std::max(m_temporariesSize, tool->size_of_temporaries());
113 }
114
116 {
117 m_mDecor_ncells = m_clusterOutput.key() + "." + m_mDecor_ncells.key();
118 }
119
121
122 return StatusCode::SUCCESS;
123}
124
125
127{
129 {
130 ATH_CHECK( m_transformConstantData->initialize() );
131 //Not sure if this is needed or the tool will get initialized by this point.
132
133 ATH_CHECK( m_transformConstantData->convert(m_constantData, m_doPlots) );
134 m_constantDataSent = true;
135 }
136
137 if (size_t(m_numPreAllocatedGPUData) > 0)
138 {
139 ATH_MSG_INFO("Pre-allocating event data and temporary buffer for " << size_t(m_numPreAllocatedGPUData) << " parellel events.");
140
141 m_eventDataThreadedHolder.resize(m_numPreAllocatedGPUData);
142 m_temporariesThreadedHolder.resize(m_numPreAllocatedGPUData);
143 //This will allocate the object holders.
144
145 m_eventDataThreadedHolder.operate_on_all( [&](EventDataHolder & edh)
146 {
147 edh.allocate(true);
148 }
149 );
150 m_temporariesThreadedHolder.operate_on_all( [&](simple_GPU_pointer_holder & ph)
151 {
153 }
154 );
155 //This will allocate all the memory at this point.
156 //Also useful to prevent/debug potential allocation issues?
157 //But the main point is really reducing the execute times...
158 }
159
160 return StatusCode::SUCCESS;
161}
162
163
164StatusCode CaloGPUHybridClusterProcessor::execute(const EventContext & ctx) const
165{
167
169 {
170 ATH_CHECK( cluster_collection.record (std::make_unique<xAOD::CaloClusterContainer>(), std::make_unique<xAOD::CaloClusterTrigAuxContainer> () ));
171 }
172 else
173 {
175 }
176
177 //ATH_CHECK(CaloClusterStoreHelper::AddContainerWriteHandle(&(*evtStore()), cluster_collection, msg()));
178
179 xAOD::CaloClusterContainer * cluster_collection_ptr = cluster_collection.ptr();
180
182 {
183 std::lock_guard<std::mutex> lock_guard(m_mutex);
184 if (!m_constantDataSent.load())
185 {
186 ConstantDataHolder * cdh_ptr ATLAS_THREAD_SAFE = &m_constantData;
187 ATH_CHECK( m_transformConstantData->convert(ctx, *cdh_ptr, m_doPlots) );
188 m_constantDataSent.store(true);
189 }
190 }
191
192 EventDataHolder * event_data_ptr = nullptr;
193
194 Helpers::separate_thread_accessor<EventDataHolder> sep_th_acc_1(m_eventDataThreadedHolder, event_data_ptr);
195 //This is a RAII wrapper to access an object held by Helpers::separate_thread_holder,
196 //to ensure the event data is appropriately released when we are done processing.
197
198 if (event_data_ptr == nullptr && !m_skipConversions)
199 {
200 ATH_MSG_ERROR("Could not get valid Event Data Holder! Event: " << ctx.evt() );
201 return StatusCode::FAILURE;
202 }
203
205 {
206 event_data_ptr->allocate(true);
207 //No-op if already allocated.
208 }
209
210 simple_GPU_pointer_holder * temporaries_data_ptr_holder = nullptr;
211
212 Helpers::separate_thread_accessor<simple_GPU_pointer_holder> sep_th_acc_2(m_temporariesThreadedHolder, temporaries_data_ptr_holder);
213 if (!temporaries_data_ptr_holder)
214 {
215 ATH_MSG_ERROR("temporaries_data_ptr_holder is null in CaloGPUHybridClusterProcessor::execute" );
216 return StatusCode::FAILURE;
217 }
218 temporaries_data_ptr_holder->allocate(m_temporariesSize);
219 //This will not perform any allocations if they've already been done.
220
221 if ( (temporaries_data_ptr_holder->get_pointer() == nullptr) && !m_skipConversions && m_temporariesSize > 0 )
222 {
223 ATH_MSG_ERROR("Could not get valid temporary buffer holder! Event: " << ctx.evt() );
224 return StatusCode::FAILURE;
225 }
226
227 const ConstantDataHolder & constant_data_holder ATLAS_THREAD_SAFE = m_constantData;
228 //Just to shut up the checker. We know what we are doing...
229
230 using clock_type = boost::chrono::thread_clock;
231 auto time_cast = [](const auto & before, const auto & after)
232 {
233 return boost::chrono::duration_cast<boost::chrono::microseconds>(after - before).count();
234 };
235
236 std::vector<size_t> times;
237
238 size_t plot_time = 0;
239
240 if (m_measureTimes)
241 {
242 const size_t time_size = m_preGPUoperations.size() + m_GPUoperations.size() + m_postGPUoperations.size() + m_doPlots + 2 * !m_skipConversions;
243 //+2 for the conversions
244 //+1 for the plotter (only added at the end)
245 times.reserve(time_size);
246 }
247
248 if (m_doPlots)
249 {
250 auto t1 = clock_type::now();
251 ATH_CHECK( m_plotterTool->update_plots_start(ctx, constant_data_holder, cluster_collection_ptr) );
252 auto t2 = clock_type::now();
253 if (m_measureTimes)
254 {
255 plot_time += time_cast(t1, t2);
256 }
257 }
258
259 for (const auto & pre_GPU_tool : m_preGPUoperations)
260 {
261 auto t1 = clock_type::now();
262 ATH_CHECK( pre_GPU_tool->execute(ctx, cluster_collection_ptr) );
263 auto t2 = clock_type::now();
264 if (m_measureTimes)
265 {
266 times.push_back(time_cast(t1, t2));
267 }
268 if (m_doPlots)
269 {
270 auto t3 = clock_type::now();
271 ATH_CHECK( m_plotterTool->update_plots(ctx, constant_data_holder, cluster_collection_ptr, pre_GPU_tool.get()) );
272 auto t4 = clock_type::now();
273 if (m_measureTimes)
274 {
275 plot_time += time_cast(t3, t4);
276 }
277 }
278 }
279
281 {
282 auto t3 = clock_type::now();
283 ATH_CHECK( m_transformForGPU->convert(ctx, constant_data_holder, cluster_collection_ptr, *event_data_ptr) );
284 auto t4 = clock_type::now();
285 if (m_measureTimes)
286 {
287 times.push_back(time_cast(t3, t4));
288 }
289 }
290
291 if (m_doPlots)
292 {
293 auto t1 = clock_type::now();
294 ATH_CHECK( m_plotterTool->update_plots(ctx, constant_data_holder, cluster_collection_ptr, *event_data_ptr, m_transformForGPU.get()) );
295 auto t2 = clock_type::now();
296 if (m_measureTimes)
297 {
298 plot_time += time_cast(t1, t2);
299 }
300 }
301
302 for (const auto & GPU_tool : m_GPUoperations)
303 {
304 auto t5 = clock_type::now();
305 ATH_CHECK( GPU_tool->execute(ctx, constant_data_holder, *event_data_ptr, temporaries_data_ptr_holder->get_pointer()) );
306 auto t6 = clock_type::now();
307 if (m_measureTimes)
308 {
309 times.push_back(time_cast(t5, t6));
310 }
311 if (m_doPlots)
312 {
313 auto t3 = clock_type::now();
314 ATH_CHECK( m_plotterTool->update_plots(ctx, constant_data_holder, cluster_collection_ptr, *event_data_ptr, GPU_tool.get()) );
315 auto t4 = clock_type::now();
316 if (m_measureTimes)
317 {
318 plot_time += time_cast(t3, t4);
319 }
320 }
321 }
322
324 {
325 auto t7 = clock_type::now();
326 ATH_CHECK( m_transformBackToCPU->convert(ctx, constant_data_holder, *event_data_ptr, cluster_collection_ptr) );
327 auto t8 = clock_type::now();
328 if (m_measureTimes)
329 {
330 times.push_back(time_cast(t7, t8));
331 }
332 }
333
334 if (m_doPlots)
335 {
336 auto t1 = clock_type::now();
337 ATH_CHECK( m_plotterTool->update_plots(ctx, constant_data_holder, cluster_collection_ptr, *event_data_ptr, m_transformBackToCPU.get()) );
338 auto t2 = clock_type::now();
339 if (m_measureTimes)
340 {
341 plot_time += time_cast(t1, t2);
342 }
343 }
344
345 for (const auto & post_GPU_tool : m_postGPUoperations)
346 {
347 auto t9 = clock_type::now();
348 ATH_CHECK( post_GPU_tool->execute(ctx, cluster_collection_ptr) );
349 auto t10 = clock_type::now();
350 if (m_measureTimes)
351 {
352 times.push_back(time_cast(t9, t10));
353 }
354 if (m_doPlots)
355 {
356 auto t3 = clock_type::now();
357 ATH_CHECK( m_plotterTool->update_plots(ctx, constant_data_holder, cluster_collection_ptr, post_GPU_tool.get()) );
358 auto t4 = clock_type::now();
359 if (m_measureTimes)
360 {
361 plot_time += time_cast(t3, t4);
362 }
363 }
364 }
365
366 if (m_doPlots)
367 {
368 auto t1 = clock_type::now();
369 ATH_CHECK( m_plotterTool->update_plots_end(ctx, constant_data_holder, cluster_collection_ptr) );
370 auto t2 = clock_type::now();
371 if (m_measureTimes)
372 {
373 plot_time += time_cast(t1, t2);
374 }
375 }
376
378 {
380
381 for (const xAOD::CaloCluster * cl : *cluster_collection_ptr)
382 {
383 const CaloClusterCellLink * cell_links = cl->getCellLinks();
384 if (!cell_links)
385 {
386 decor_handle(*cl) = 0;
387 }
388 else
389 {
390 decor_handle(*cl) = cell_links->size();
391 }
392 }
393 }
394
395 ATH_MSG_DEBUG("Created cluster container with " << cluster_collection->size() << " clusters");
396
398
399 ATH_CHECK( CaloClusterStoreHelper::finalizeClusters(cell_links, cluster_collection.ptr()) );
400
401 if (m_measureTimes)
402 {
403 if (m_doPlots)
404 {
405 times.push_back(plot_time);
406 }
407 record_times(ctx.evt(), times);
408 }
409
410 return StatusCode::SUCCESS;
411}
412
414{
415 if (m_measureTimes)
416 {
417 std::string header_string;
418
419 auto add_name_to_string = [&](const auto & obj)
420 {
421 std::string rep = obj->name();
422 std::replace(rep.begin(), rep.end(), ' ', '_');
423 header_string += rep + " ";
424 };
425
426 for (const auto & pre_GPU_tool : m_preGPUoperations)
427 {
428 add_name_to_string(pre_GPU_tool);
429 }
430
432 {
433 add_name_to_string(m_transformForGPU);
434 }
435
436 for (const auto & GPU_tool : m_GPUoperations)
437 {
438 add_name_to_string(GPU_tool);
439 }
440
442 {
443 add_name_to_string(m_transformBackToCPU);
444 }
445
446 for (const auto & post_GPU_tool : m_postGPUoperations)
447 {
448 add_name_to_string(post_GPU_tool);
449 }
450
451 if (m_doPlots)
452 {
453 add_name_to_string(m_plotterTool);
454 }
455
456 print_times(header_string, m_preGPUoperations.size() + m_GPUoperations.size() + m_postGPUoperations.size() + 2 * !m_skipConversions + m_doPlots);
457 }
458
459 if (m_doPlots)
460 {
461 ATH_CHECK(m_plotterTool->finalize_plots());
462 }
463
464 return StatusCode::SUCCESS;
465}
#define ATH_CHECK
Evaluate an expression and check for errors.
#define ATH_MSG_ERROR(x)
#define ATH_MSG_INFO(x)
#define ATH_MSG_DEBUG(x)
Helpers for checking error return status codes and reporting errors.
Header file to be included by clients of the Monitored infrastructure.
Handle class for adding a decoration to an object.
An algorithm that can be simultaneously executed in multiple threads.
static StatusCode AddContainerWriteHandle(SG::WriteHandle< xAOD::CaloClusterContainer > &clusColl)
Creates a new xAOD::CaloClusterContainer in the given WriteHandle + CaloClusterAuxContainer and recor...
static StatusCode finalizeClusters(SG::WriteHandle< CaloClusterCellLinkContainer > &h, xAOD::CaloClusterContainer *pClusterColl)
Finalize clusters (move CaloClusterCellLink to a separate container).
std::atomic< bool > m_constantDataSent
A flag to signal that the constant data has been adequately sent to the GPU.
Gaudi::Property< size_t > m_numPreAllocatedGPUData
Number of events for which to pre-allocate space on GPU memory (should ideally be set to the expected...
virtual StatusCode initialize_non_CUDA() override
Initialization that does not invoke CUDA functions.
ToolHandle< ICaloClusterGPUConstantTransformer > m_transformConstantData
The tool that will convert the constant data from the CPU to the GPU.
ToolHandle< ICaloClusterGPUInputTransformer > m_transformForGPU
The tool that will actually convert the data from the CPU to the GPU.
SG::WriteHandleKey< xAOD::CaloClusterContainer > m_clusterOutput
The name of the key in StoreGate for the output CaloClusterContainer.
SG::WriteDecorHandleKey< xAOD::CaloClusterContainer > m_mDecor_ncells
Key to the handle for writing the number of cells as a decoration.
ToolHandle< ICaloClusterGPUPlotter > m_plotterTool
An optional plotter, for testing and/or debugging purposes.
Gaudi::Property< bool > m_skipConversions
If true, both constant and event data conversion is skipped.
size_t m_temporariesSize
The size of the temporary buffer to allocate for the GPU tools that will be called.
SG::WriteHandleKey< CaloClusterCellLinkContainer > m_clusterCellLinkOutput
The name of the key in StoreGate for the output CaloClusterCellLinkContainer.
virtual StatusCode initialize_CUDA() override
Initialization that invokes CUDA functions.
ToolHandleArray< CaloClusterGPUProcessor > m_GPUoperations
Tools to be applied to the clusters on the GPU.
Gaudi::Property< bool > m_doPlots
If true, calls the plotter specified by m_plotterTool at every tool execution.
std::mutex m_mutex
This mutex is locked when sending the constant data on the first event to ensure thread safety.
virtual StatusCode execute(const EventContext &ctx) const override
Gaudi::Property< bool > m_writeTriggerSpecificInfo
If true, writes some trigger-specific decorations.
ToolHandle< ICaloClusterGPUOutputTransformer > m_transformBackToCPU
The tool that will convert the data from the GPU back to the CPU.
CaloRecGPU::Helpers::separate_thread_holder< CaloRecGPU::EventDataHolder > m_eventDataThreadedHolder ATLAS_THREAD_SAFE
A way to reduce allocations over multiple threads by keeping a cache of previously allocated objects ...
ToolHandleArray< CaloClusterCollectionProcessor > m_preGPUoperations
Tools to be applied to the clusters before being sent to the GPU for processing.
Gaudi::Property< bool > m_deferConstantDataToFirstEvent
If true, the constant data is only converted and sent to the GPU on the first event,...
ToolHandleArray< CaloClusterCollectionProcessor > m_postGPUoperations
Tools to be applied to the clusters after returning from the GPU.
CaloGPUHybridClusterProcessor(const std::string &name, ISvcLocator *pSvcLocator)
Gaudi::Property< bool > m_measureTimes
If true, times are recorded to the file given by m_timeFileName.
CaloGPUTimed(T *ptr)
void print_times(const std::string &header, const size_t time_size) const
void record_times(const size_t event_num, const std::vector< size_t > &times) const
Holds CPU and GPU versions of the geometry and cell noise information, which are assumed to be consta...
Definition DataHolders.h:27
Holds the mutable per-event information (clusters and cells) and provides utilities to convert betwee...
Definition DataHolders.h:73
void allocate(const bool also_GPU=true)
const T * get(size_type n) const
Access an element, as an rvalue.
Handle class for adding a decoration to an object.
StatusCode record(std::unique_ptr< T > data)
Record a const object to the store.
pointer_type ptr()
Dereference the pointer.
STL class.
Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration.
CaloCluster_v1 CaloCluster
Define the latest version of the calorimeter cluster class.
CaloClusterContainer_v1 CaloClusterContainer
Define the latest version of the calorimeter cluster container.
A simple RAII wrapper to ensure proper allocation and deallocation of GPU memory in a void * for the ...