4from AthenaConfiguration.ComponentFactory
import CompFactory
5from AthenaConfiguration.ComponentAccumulator
import ComponentAccumulator
8from AthCUDAServices.CUDAConfigFlags
import CUDAStream
12 acc = ComponentAccumulator()
13 svc = CompFactory.getComp(
"AthCUDA::GPUSystemInfoSvc")(
"GPUSystemInfoSvc")
19 '''Default CUDA host memory resource tool to use
21 It makes sure that appropriate caching would be used, as allocating pinned
22 host memory is relatively slow.
26 result = ComponentAccumulator()
30 tool = CompFactory.AthCUDA.HostMemoryResourceTool(**kwargs)
31 if flags.Device.Memory.Debug:
32 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
33 'CUDAHostMemoryResourceDebugTool',
37 if flags.Device.Memory.Cache:
38 cacheSvc = CompFactory.AthDevice.BinaryPageMemoryResourceSvc(
39 'CUDAHostCachedMemoryResourceSvc',
41 result.addService(cacheSvc)
42 cacheTool = CompFactory.AthDevice.MemoryResourceSvcAdaptorTool(
43 'CUDAHostCachedMemoryResourceTool',
46 if flags.Device.Memory.Debug:
47 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
48 'CUDAHostCachedMemoryResourceDebugTool',
53 result.setPrivateTools(tool)
60 '''Default CUDA device memory resource tool to use
62 It makes sure that appropriate caching would be used, as allocating device
63 memory is relatively slow.
67 result = ComponentAccumulator()
71 tool = CompFactory.AthCUDA.DeviceMemoryResourceTool(**kwargs)
72 if flags.Device.Memory.Debug:
73 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
74 'CUDADeviceMemoryResourceDebugTool',
78 if flags.Device.Memory.Cache:
79 cacheSvc = CompFactory.AthDevice.BinaryPageMemoryResourceSvc(
80 'CUDADeviceCachedMemoryResourceSvc',
82 result.addService(cacheSvc)
83 cacheTool = CompFactory.AthDevice.MemoryResourceSvcAdaptorTool(
84 'CUDADeviceCachedMemoryResourceTool',
87 if flags.Device.Memory.Debug:
88 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
89 'CUDADeviceCachedMemoryResourceDebugTool',
94 result.setPrivateTools(tool)
101 '''Default CUDA managed memory resource tool to use
103 It makes sure that appropriate caching would be used, as allocating managed
104 memory is relatively slow.
108 result = ComponentAccumulator()
112 tool = CompFactory.AthCUDA.ManagedMemoryResourceTool(**kwargs)
113 if flags.Device.Memory.Debug:
114 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
115 'CUDAManagedMemoryResourceDebugTool',
119 if flags.Device.Memory.Cache:
120 cacheSvc = CompFactory.AthDevice.BinaryPageMemoryResourceSvc(
121 'CUDAManagedCachedMemoryResourceSvc',
123 result.addService(cacheSvc)
124 cacheTool = CompFactory.AthDevice.MemoryResourceSvcAdaptorTool(
125 'CUDAManagedCachedMemoryResourceTool',
128 if flags.Device.Memory.Debug:
129 debugTool = CompFactory.AthDevice.DebugMemoryResourceTool(
130 'CUDAManagedCachedMemoryResourceDebugTool',
135 result.setPrivateTools(tool)
142 '''Tool providing a single CUDA stream for all components in the entire job
146 result = ComponentAccumulator()
149 streamSvc = CompFactory.AthCUDA.SingleStreamSvc(**kwargs)
150 result.addService(streamSvc)
154 streamTool = CompFactory.AthCUDA.StreamSvcAdaptorTool(
155 'SingleStreamTool', StreamSvc=streamSvc)
156 result.setPrivateTools(streamTool)
163 '''Tool providing one CUDA stream per event/slot
167 result = ComponentAccumulator()
170 streamSvc = CompFactory.AthCUDA.PerEventStreamSvc(**kwargs)
171 result.addService(streamSvc)
175 streamTool = CompFactory.AthCUDA.StreamSvcAdaptorTool(
176 'PerEventStreamTool', StreamSvc=streamSvc)
177 result.setPrivateTools(streamTool)
184 '''Tool providing one CUDA stream per component (algorithm/tool/service)
188 result = ComponentAccumulator()
191 streamTool = CompFactory.AthCUDA.PerComponentStreamTool(**kwargs)
192 result.setPrivateTools(streamTool)
199 '''Tool providing one CUDA stream per component and event/slot
203 result = ComponentAccumulator()
206 streamTool = CompFactory.AthCUDA.PerEventAndComponentStreamTool(**kwargs)
207 result.setPrivateTools(streamTool)
214 '''Default CUDA stream provider tool to use
218 result = ComponentAccumulator()
221 if flags.CUDA.Stream == CUDAStream.Single:
223 result.setPrivateTools(cfg.getPrimary())
225 elif flags.CUDA.Stream == CUDAStream.PerEvent:
227 result.setPrivateTools(cfg.getPrimary())
229 elif flags.CUDA.Stream == CUDAStream.PerComponent:
231 result.setPrivateTools(cfg.getPrimary())
233 elif flags.CUDA.Stream == CUDAStream.PerEventAndComponent:
235 result.setPrivateTools(cfg.getPrimary())
238 raise ValueError(f
"Invalid CUDA stream strategy: {flags.CUDA.Stream}")
246 '''Synchronous copy object provider tool
250 result = ComponentAccumulator()
253 result.setPrivateTools(CompFactory.AthCUDA.CopyTool(**kwargs))
260 '''Asynchronous copy object provider tool
264 result = ComponentAccumulator()
267 copyTool = CompFactory.AthCUDA.AsyncCopyTool(**kwargs)
269 copyTool.StreamTool = streamTool.getPrimary()
270 result.merge(streamTool)
271 result.setPrivateTools(copyTool)
SingleStreamToolCfg(flags, **kwargs)
DeviceMemoryResourceToolCfg(flags, **kwargs)
PerComponentStreamToolCfg(flags, **kwargs)
PerEventStreamToolCfg(flags, **kwargs)
PerEventAndComponentStreamToolCfg(flags, **kwargs)
StreamToolCfg(flags, **kwargs)
GPUSystemInfoSvcCfg(flags)
AsyncCopyToolCfg(flags, **kwargs)
ManagedMemoryResourceToolCfg(flags, **kwargs)
CopyToolCfg(flags, **kwargs)
HostMemoryResourceToolCfg(flags, **kwargs)