ATLAS Offline Software
Loading...
Searching...
No Matches
AthCUDAServicesConfig.py
Go to the documentation of this file.
1# Copyright (C) 2002-2026 CERN for the benefit of the ATLAS collaboration
2
3# Framework import(s).
4from AthenaConfiguration.ComponentFactory import CompFactory
5from AthenaConfiguration.ComponentAccumulator import ComponentAccumulator
6
7# Local import(s).
8from AthCUDAServices.CUDAConfigFlags import CUDAStream
9
10
12 acc = ComponentAccumulator()
13 svc = CompFactory.getComp("AthCUDA::GPUSystemInfoSvc")("GPUSystemInfoSvc")
14 acc.addService(svc)
15 return acc
16
17
18def HostMemoryResourceToolCfg(flags, **kwargs):
19 '''Default CUDA host memory resource tool to use
20
21 It makes sure that appropriate caching would be used, as allocating pinned
22 host memory is relatively slow.
23 '''
24
25 # Create an accumulator to hold the configuration.
26 result = ComponentAccumulator()
27
28 # Create the components that would collaborate to provide thread-safe
29 # caching to the "bare" memory resource.
30 tool = CompFactory.AthCUDA.HostMemoryResourceTool(**kwargs)
31 if flags.Device.Memory.Debug:
32 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
33 'CUDAHostMemoryResourceDebugTool',
34 MRTool=tool)
35 tool = debugTool
36 pass
37 if flags.Device.Memory.Cache:
38 cacheSvc = CompFactory.AthDevice.BinaryPageMemoryResourceSvc(
39 'CUDAHostCachedMemoryResourceSvc',
40 MRTool=tool)
41 result.addService(cacheSvc)
42 cacheTool = CompFactory.AthDevice.MemoryResourceSvcAdaptorTool(
43 'CUDAHostCachedMemoryResourceTool',
44 MRSvc=cacheSvc)
45 tool = cacheTool
46 if flags.Device.Memory.Debug:
47 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
48 'CUDAHostCachedMemoryResourceDebugTool',
49 MRTool=tool)
50 tool = debugTool
51 pass
52 pass
53 result.setPrivateTools(tool)
54
55 # Return the CA.
56 return result
57
58
59def DeviceMemoryResourceToolCfg(flags, **kwargs):
60 '''Default CUDA device memory resource tool to use
61
62 It makes sure that appropriate caching would be used, as allocating device
63 memory is relatively slow.
64 '''
65
66 # Create an accumulator to hold the configuration.
67 result = ComponentAccumulator()
68
69 # Create the components that would collaborate to provide thread-safe
70 # caching to the "bare" memory resource.
71 tool = CompFactory.AthCUDA.DeviceMemoryResourceTool(**kwargs)
72 if flags.Device.Memory.Debug:
73 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
74 'CUDADeviceMemoryResourceDebugTool',
75 MRTool=tool)
76 tool = debugTool
77 pass
78 if flags.Device.Memory.Cache:
79 cacheSvc = CompFactory.AthDevice.BinaryPageMemoryResourceSvc(
80 'CUDADeviceCachedMemoryResourceSvc',
81 MRTool=tool)
82 result.addService(cacheSvc)
83 cacheTool = CompFactory.AthDevice.MemoryResourceSvcAdaptorTool(
84 'CUDADeviceCachedMemoryResourceTool',
85 MRSvc=cacheSvc)
86 tool = cacheTool
87 if flags.Device.Memory.Debug:
88 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
89 'CUDADeviceCachedMemoryResourceDebugTool',
90 MRTool=tool)
91 tool = debugTool
92 pass
93 pass
94 result.setPrivateTools(tool)
95
96 # Return the CA.
97 return result
98
99
100def ManagedMemoryResourceToolCfg(flags, **kwargs):
101 '''Default CUDA managed memory resource tool to use
102
103 It makes sure that appropriate caching would be used, as allocating managed
104 memory is relatively slow.
105 '''
106
107 # Create an accumulator to hold the configuration.
108 result = ComponentAccumulator()
109
110 # Create the components that would collaborate to provide thread-safe
111 # caching to the "bare" memory resource.
112 tool = CompFactory.AthCUDA.ManagedMemoryResourceTool(**kwargs)
113 if flags.Device.Memory.Debug:
114 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
115 'CUDAManagedMemoryResourceDebugTool',
116 MRTool=tool)
117 tool = debugTool
118 pass
119 if flags.Device.Memory.Cache:
120 cacheSvc = CompFactory.AthDevice.BinaryPageMemoryResourceSvc(
121 'CUDAManagedCachedMemoryResourceSvc',
122 MRTool=tool)
123 result.addService(cacheSvc)
124 cacheTool = CompFactory.AthDevice.MemoryResourceSvcAdaptorTool(
125 'CUDAManagedCachedMemoryResourceTool',
126 MRSvc=cacheSvc)
127 tool = cacheTool
128 if flags.Device.Memory.Debug:
129 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
130 'CUDAManagedCachedMemoryResourceDebugTool',
131 MRTool=tool)
132 tool = debugTool
133 pass
134 pass
135 result.setPrivateTools(tool)
136
137 # Return the CA.
138 return result
139
140
141def SingleStreamToolCfg(flags, **kwargs):
142 '''Tool providing a single CUDA stream for all components in the entire job
143 '''
144
145 # Create an accumulator to hold the configuration.
146 result = ComponentAccumulator()
147
148 # Create the stream service and add it to the accumulator.
149 streamSvc = CompFactory.AthCUDA.SingleStreamSvc(**kwargs)
150 result.addService(streamSvc)
151
152 # Create an adaptor tool on top of the service, and set that as the main
153 # component of the CA.
154 streamTool = CompFactory.AthCUDA.StreamSvcAdaptorTool(
155 'SingleStreamTool', StreamSvc=streamSvc)
156 result.setPrivateTools(streamTool)
157
158 # Return the CA.
159 return result
160
161
162def PerEventStreamToolCfg(flags, **kwargs):
163 '''Tool providing one CUDA stream per event/slot
164 '''
165
166 # Create an accumulator to hold the configuration.
167 result = ComponentAccumulator()
168
169 # Create the stream service and add it to the accumulator.
170 streamSvc = CompFactory.AthCUDA.PerEventStreamSvc(**kwargs)
171 result.addService(streamSvc)
172
173 # Create an adaptor tool on top of the service, and set that as the main
174 # component of the CA.
175 streamTool = CompFactory.AthCUDA.StreamSvcAdaptorTool(
176 'PerEventStreamTool', StreamSvc=streamSvc)
177 result.setPrivateTools(streamTool)
178
179 # Return the CA.
180 return result
181
182
183def PerComponentStreamToolCfg(flags, **kwargs):
184 '''Tool providing one CUDA stream per component (algorithm/tool/service)
185 '''
186
187 # Create an accumulator to hold the configuration.
188 result = ComponentAccumulator()
189
190 # Create an tool that implements this behaviour.
191 streamTool = CompFactory.AthCUDA.PerComponentStreamTool(**kwargs)
192 result.setPrivateTools(streamTool)
193
194 # Return the CA.
195 return result
196
197
199 '''Tool providing one CUDA stream per component and event/slot
200 '''
201
202 # Create an accumulator to hold the configuration.
203 result = ComponentAccumulator()
204
205 # Create an tool that implements this behaviour.
206 streamTool = CompFactory.AthCUDA.PerEventAndComponentStreamTool(**kwargs)
207 result.setPrivateTools(streamTool)
208
209 # Return the CA.
210 return result
211
212
213def StreamToolCfg(flags, **kwargs):
214 '''Default CUDA stream provider tool to use
215 '''
216
217 # Create an accumulator to hold the configuration.
218 result = ComponentAccumulator()
219
220 # Create the default stream tool, depending on the job's configuration.
221 if flags.CUDA.Stream == CUDAStream.Single:
222 cfg = SingleStreamToolCfg(flags, **kwargs)
223 result.setPrivateTools(cfg.getPrimary())
224 result.merge(cfg)
225 elif flags.CUDA.Stream == CUDAStream.PerEvent:
226 cfg = PerEventStreamToolCfg(flags, **kwargs)
227 result.setPrivateTools(cfg.getPrimary())
228 result.merge(cfg)
229 elif flags.CUDA.Stream == CUDAStream.PerComponent:
230 cfg = PerComponentStreamToolCfg(flags, **kwargs)
231 result.setPrivateTools(cfg.getPrimary())
232 result.merge(cfg)
233 elif flags.CUDA.Stream == CUDAStream.PerEventAndComponent:
234 cfg = PerEventAndComponentStreamToolCfg(flags, **kwargs)
235 result.setPrivateTools(cfg.getPrimary())
236 result.merge(cfg)
237 else:
238 raise ValueError(f"Invalid CUDA stream strategy: {flags.CUDA.Stream}")
239 pass
240
241 # Return the CA.
242 return result
243
244
245def CopyToolCfg(flags, **kwargs):
246 '''Synchronous copy object provider tool
247 '''
248
249 # Create an accumulator to hold the configuration.
250 result = ComponentAccumulator()
251
252 # Create the tool in a simple way.
253 result.setPrivateTools(CompFactory.AthCUDA.CopyTool(**kwargs))
254
255 # Return the CA.
256 return result
257
258
259def AsyncCopyToolCfg(flags, **kwargs):
260 '''Asynchronous copy object provider tool
261 '''
262
263 # Create an accumulator to hold the configuration.
264 result = ComponentAccumulator()
265
266 # Create the tool. Attaching a stream tool to it.
267 copyTool = CompFactory.AthCUDA.AsyncCopyTool(**kwargs)
268 streamTool = StreamToolCfg(flags, **kwargs)
269 copyTool.StreamTool = streamTool.getPrimary()
270 result.merge(streamTool)
271 result.setPrivateTools(copyTool)
272
273 # Return the CA.
274 return result
SingleStreamToolCfg(flags, **kwargs)
DeviceMemoryResourceToolCfg(flags, **kwargs)
PerComponentStreamToolCfg(flags, **kwargs)
PerEventStreamToolCfg(flags, **kwargs)
PerEventAndComponentStreamToolCfg(flags, **kwargs)
AsyncCopyToolCfg(flags, **kwargs)
ManagedMemoryResourceToolCfg(flags, **kwargs)
HostMemoryResourceToolCfg(flags, **kwargs)