ATLAS Offline Software
KernelRunnerSvc.cxx
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
3 //
4 
5 // Local include(s).
6 #include "KernelRunnerSvc.h"
7 
8 // Project include(s).
9 #include "AthCUDACore/Info.h"
10 
11 // System include(s).
12 #include <cassert>
13 #include <sstream>
14 #include <thread>
15 
16 namespace AthCUDA {
17 
19 
20  // Reset the internal counter(s).
22  m_totalTasks = 0;
23  m_gpuTasks = 0;
24 
25  // If no devices are available or no kernels are allowed to run on the
26  // GPU, then don't even set up the implementation object.
27  if( ( Info::instance().nDevices() == 0 ) ||
28  ( m_nKernels.value() == 0 ) ) {
29  ATH_MSG_INFO( "Will run everything on the CPU." );
30  return StatusCode::SUCCESS;
31  }
32 
33  // Access the stream pool service.
34  ATH_CHECK( m_streamPoolSvc.retrieve() );
35 
36  // Create the implementation object.
37  m_impl = std::make_unique< KernelRunnerSvcImpl >( *m_streamPoolSvc,
38  *this );
39 
40  // Tell the user what happened.
41  std::ostringstream str;
42  str << Info::instance();
43  ATH_MSG_INFO( "Started service for running " << m_nKernels.value()
44  << " GPU kernel(s) in parallel on device(s):\n"
45  << str.str() );
46 
47  // Return gracefully.
48  return StatusCode::SUCCESS;
49  }
50 
52 
53  // Destroy the implementation object.
54  m_impl.reset();
55 
56  // Tell the user what happened.
57  ATH_MSG_INFO( " o All task(s) executed: " << m_totalTasks.load() );
58  const double percentage =
59  ( m_totalTasks != 0 ?
60  ( static_cast< double >( m_gpuTasks.load() ) /
61  static_cast< double >( m_totalTasks.load() ) * 100.0 ) : 0.0 );
62  ATH_MSG_INFO( " o GPU task(s) executed: " << m_gpuTasks.load() << " ("
63  << percentage << "%)" );
64 
65  // Finalise the base class.
67 
68  // Return gracefully.
69  return StatusCode::SUCCESS;
70  }
71 
72  StatusCode KernelRunnerSvc::execute( std::unique_ptr< IKernelTask > task ) {
73 
74  // Make sure that we received a valid task.
75  if( task.get() == nullptr ) {
76  ATH_MSG_ERROR( "Invalid task object received" );
77  return StatusCode::FAILURE;
78  }
79 
80  // One way or another, we will execute this task.
81  ++m_totalTasks;
82 
83  // Check if a GPU is available, and no other thread is launching a GPU
84  // calculation right now.
85  if( ( ! m_impl ) || m_streamPoolSvc->isEmpty() ||
86  ( ( m_nKernels.value() > 0 ) &&
87  ( m_kernelsInFlight.load() >= m_nKernels.value() ) ) ) {
88 
89  // If so, let's just execute the task in the current thread.
90  ATH_MSG_VERBOSE( "Executing a task on the CPU" );
92  if( task->finished( task->execute( dummy ),
93  IKernelTask::Synchronous ) != 0 ) {
94  ATH_MSG_ERROR( "Failed to execute task in the caller thread!" );
95  return StatusCode::FAILURE;
96  }
97 
98  // Return gracefully.
99  return StatusCode::SUCCESS;
100  }
101 
102  // If we got here, we need to schedule the task for execution on the/a
103  // GPU.
104 
105  // Give the task to the implementation object to launch it.
106  ATH_MSG_VERBOSE( "Executing an offloaded task" );
108  ++m_gpuTasks;
109  m_impl->execute( std::move( task ) );
110 
111  // Return gracefully.
112  return StatusCode::SUCCESS;
113  }
114 
116 
117  // Update the internal counter.
119  return;
120  }
121 
122 } // namespace AthCUDA
AthCUDA::KernelRunnerSvc::m_gpuTasks
std::atomic_uint m_gpuTasks
The number of tasks sent to the GPU during the job.
Definition: KernelRunnerSvc.h:97
AthCUDA::Info::instance
static const Info & instance()
Singleton accessor function.
python.tests.PyTestsLib.finalize
def finalize(self)
_info( "content of StoreGate..." ) self.sg.dump()
Definition: PyTestsLib.py:50
AthCUDA::IKernelTask::Synchronous
@ Synchronous
The kernel was executed synchronously on the CPU.
Definition: IKernelTask.h:31
AthCUDA::KernelRunnerSvc::initialize
virtual StatusCode initialize() override
Initialise the service.
Definition: KernelRunnerSvc.cxx:18
ATH_MSG_INFO
#define ATH_MSG_INFO(x)
Definition: AthMsgStreamMacros.h:31
Info.h
KernelRunnerSvc.h
ATH_MSG_VERBOSE
#define ATH_MSG_VERBOSE(x)
Definition: AthMsgStreamMacros.h:28
AthCUDA::KernelRunnerSvc::setTaskFinished
void setTaskFinished()
Definition: KernelRunnerSvc.cxx:115
AthCUDA::KernelRunnerSvc::m_impl
std::unique_ptr< KernelRunnerSvcImpl > m_impl
Implementation helper object.
Definition: KernelRunnerSvc.h:100
ATH_MSG_ERROR
#define ATH_MSG_ERROR(x)
Definition: AthMsgStreamMacros.h:33
EL::StatusCode
::StatusCode StatusCode
StatusCode definition for legacy code.
Definition: PhysicsAnalysis/D3PDTools/EventLoop/EventLoop/StatusCode.h:22
AthCUDA
Definition: Info.h:14
python.xAODType.dummy
dummy
Definition: xAODType.py:4
ATH_CHECK
#define ATH_CHECK
Definition: AthCheckMacros.h:40
AthCUDA::KernelRunnerSvc::m_nKernels
Gaudi::Property< int > m_nKernels
The number of streams to use.
Definition: KernelRunnerSvc.h:81
AthCUDA::KernelRunnerSvc::m_streamPoolSvc
ServiceHandle< IStreamPoolSvc > m_streamPoolSvc
Service managing CUDA the streams.
Definition: KernelRunnerSvc.h:85
AthCUDA::KernelRunnerSvc::m_kernelsInFlight
std::atomic_int m_kernelsInFlight
The current number of kernels being executed.
Definition: KernelRunnerSvc.h:92
python.CaloScaleNoiseConfig.str
str
Definition: CaloScaleNoiseConfig.py:78
AthCUDA::StreamHolder
Helper class for handling CUDA streams.
Definition: StreamHolder.h:24
AthCUDA::KernelRunnerSvc::m_totalTasks
std::atomic_uint m_totalTasks
The number of tasks executed during the job in total.
Definition: KernelRunnerSvc.h:95
str
Definition: BTagTrackIpAccessor.cxx:11
AthCUDA::KernelRunnerSvc::finalize
virtual StatusCode finalize() override
Finalise the service.
Definition: KernelRunnerSvc.cxx:51
AthCUDA::KernelRunnerSvc::execute
virtual StatusCode execute(std::unique_ptr< IKernelTask > task) override
Execute a user specified kernel task.
Definition: KernelRunnerSvc.cxx:72