|
ATLAS Offline Software
|
More...
#include <GPUKernelSizeOptimizerSvc.h>
|
bool | m_dynpar_support = false |
|
bool | m_coopgroup_support = false |
|
std::unordered_map< std::string, std::vector< KernelRecord > > | m_kernel_map |
|
Gaudi::Property< std::vector< std::string > > | m_kernelFiles {this, "KernelSizeInput", {}, "Kernel size input JSON files"} |
| List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs. More...
|
|
Gaudi::Property< bool > | m_outputSizes {this, "OutputSizes", true, "Write out last used kernel sizes"} |
| If true , writes the (last used) kernel sizes to an output JSON file. More...
|
|
Gaudi::Property< std::string > | m_outputFile {this, "OutputFile", "sizes.json", "Kernel size output file"} |
| If m_outputSizes is true , the file to which the kernel sizes should be output. More...
|
|
◆ GPUKernelSizeOptimizerSvc()
GPUKernelSizeOptimizerSvc::GPUKernelSizeOptimizerSvc |
( |
const std::string & |
name, |
|
|
ISvcLocator * |
svc |
|
) |
| |
◆ can_use_cooperative_groups()
virtual bool GPUKernelSizeOptimizerSvc::can_use_cooperative_groups |
( |
| ) |
const |
|
inlineoverridevirtual |
◆ can_use_dynamic_parallelism()
virtual bool GPUKernelSizeOptimizerSvc::can_use_dynamic_parallelism |
( |
| ) |
const |
|
inlineoverridevirtual |
◆ finalize()
StatusCode GPUKernelSizeOptimizerSvc::finalize |
( |
| ) |
|
|
overridevirtual |
Definition at line 119 of file GPUKernelSizeOptimizerSvc.cxx.
127 return (
a.grid_x !=
b.grid_x ) ||
128 (
a.grid_y !=
b.grid_y ) ||
129 (
a.grid_z !=
b.grid_z ) ||
130 (
a.block_x !=
b.block_x ) ||
131 (
a.block_y !=
b.block_y ) ||
132 (
a.block_z !=
b.block_z );
152 ke.device = device_name;
153 ke.name = pair.first;
154 ke.kernels.resize(pair.second.size());
156 for (
size_t i = 0;
i < ke.kernels.size(); ++
i)
158 const KernelRecord & kr = pair.second[
i];
160 KernelsEntry::KernelInfo ki;
161 for (
int u = 0;
u <= 100; ++
u)
164 if (delta_configs(
cfg, ki))
168 ki.usage_end =
u - 1;
169 ke.kernels[
i].push_back(ki);
172 ki.grid_x =
cfg.grid_x;
173 ki.grid_y =
cfg.grid_y;
174 ki.grid_z =
cfg.grid_z;
175 ki.block_x =
cfg.block_x;
176 ki.block_y =
cfg.block_y;
177 ki.block_z =
cfg.block_z;
183 ke.kernels[
i].push_back(ki);
193 output <<
"\n]" << std::endl;
200 return StatusCode::SUCCESS;
◆ get_GPU_usage()
int GPUKernelSizeOptimizerSvc::get_GPU_usage |
( |
| ) |
const |
|
inlineprivate |
Get the GPU usage, in percentage, rounded to the nearest integer.
- Warning
- Getting GPU usage not yet supported in the current version of the code, it will default to considering the GPU 100% available.
Definition at line 113 of file GPUKernelSizeOptimizerSvc.h.
◆ get_launch_configuration()
◆ handle()
void CaloGPUCUDAInitialization::handle |
( |
const Incident & |
incident | ) |
|
|
inlineoverrideinherited |
Definition at line 66 of file CaloGPUCUDAInitialization.h.
68 const bool is_multiprocess = (Gaudi::Concurrency::ConcurrencyFlags::numProcs() > 0);
73 throw GaudiException(
"Failed to perform the CUDA initialization!",
74 "CaloGPUCUDAInitialization::handle",
◆ initialize()
virtual StatusCode GPUKernelSizeOptimizerSvc::initialize |
( |
| ) |
|
|
inlineoverridevirtual |
◆ initialize_CUDA()
StatusCode GPUKernelSizeOptimizerSvc::initialize_CUDA |
( |
| ) |
|
|
overridevirtual |
Initialization that invokes CUDA functions.
Reimplemented from CaloGPUCUDAInitialization.
Definition at line 66 of file GPUKernelSizeOptimizerSvc.cxx.
75 std::ifstream in(
file);
86 for (
const auto &
entry : j)
88 if (
entry.at(
"device") != device_name)
92 const KernelsEntry ke =
entry.template get<KernelsEntry>();
94 std::vector<KernelRecord> & vect =
m_kernel_map[ke.name];
95 vect.resize(ke.kernels.size());
97 for (
size_t i = 0;
i < vect.size(); ++
i)
99 for (
const auto & ki : ke.kernels[
i])
102 config.grid_x = ki.grid_x;
103 config.grid_y = ki.grid_y;
104 config.grid_z = ki.grid_z;
105 config.block_x = ki.block_x;
106 config.block_y = ki.block_y;
107 config.block_z = ki.block_z;
109 vect[
i].add_configuration(
config, ki.usage_start, ki.usage_end,
true);
116 return StatusCode::SUCCESS;
◆ initialize_non_CUDA()
virtual StatusCode CaloGPUCUDAInitialization::initialize_non_CUDA |
( |
| ) |
|
|
inlineprotectedvirtualinherited |
◆ register_kernels()
void GPUKernelSizeOptimizerSvc::register_kernels |
( |
const std::string & |
tool_name, |
|
|
const int |
number, |
|
|
void ** |
kernels, |
|
|
const int * |
blocksize_hints, |
|
|
const int * |
gridsize_hints, |
|
|
const int * |
max_total_threads, |
|
|
const int |
offset = 0 |
|
) |
| |
|
overridevirtual |
Register a set of kernels that can be referred back to with a name and a number.
Uses C-style arrays for more immediate CUDA compatibility, assumes the size of kernels
, blocksize_hints
and gridsize_hints
is number
and starts the numbering with an optional offset
.
Definition at line 17 of file GPUKernelSizeOptimizerSvc.cxx.
27 std::vector<KernelRecord> & vect =
m_kernel_map[tool_name];
37 if (gridsize_hints[
i] == IGPUKernelSizeOptimizer::SpecialSizeHints::CooperativeLaunch)
◆ should_use_minimal_kernel_sizes()
virtual bool GPUKernelSizeOptimizerSvc::should_use_minimal_kernel_sizes |
( |
| ) |
const |
|
inlinevirtual |
Whether to avoid oversizing kernels and instead (if possible) launch kernels with the exact number of threads...
Definition at line 67 of file GPUKernelSizeOptimizerSvc.h.
◆ m_coopgroup_support
bool GPUKernelSizeOptimizerSvc::m_coopgroup_support = false |
|
private |
◆ m_dynpar_support
bool GPUKernelSizeOptimizerSvc::m_dynpar_support = false |
|
private |
◆ m_kernel_map
std::unordered_map<std::string, std::vector<KernelRecord> > GPUKernelSizeOptimizerSvc::m_kernel_map |
|
private |
◆ m_kernelFiles
Gaudi::Property<std::vector<std::string> > GPUKernelSizeOptimizerSvc::m_kernelFiles {this, "KernelSizeInput", {}, "Kernel size input JSON files"} |
|
private |
List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs.
Definition at line 120 of file GPUKernelSizeOptimizerSvc.h.
◆ m_outputFile
Gaudi::Property<std::string> GPUKernelSizeOptimizerSvc::m_outputFile {this, "OutputFile", "sizes.json", "Kernel size output file"} |
|
private |
◆ m_outputSizes
Gaudi::Property<bool> GPUKernelSizeOptimizerSvc::m_outputSizes {this, "OutputSizes", true, "Write out last used kernel sizes"} |
|
private |
The documentation for this class was generated from the following files:
void optimize_block_and_grid_size(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize.
bool supports_cooperative_launches()
void optimize_block_and_grid_size_for_cooperative_launch(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size for a cooperative launch.
Gaudi::Property< std::vector< std::string > > m_kernelFiles
List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs.
Gaudi::Property< bool > m_outputSizes
If true, writes the (last used) kernel sizes to an output JSON file.
constexpr int int_ceil_div(const int num, const int denom)
Returns the ceiling of num/denom, with proper rounding.
@ u
Enums for curvilinear frames.
virtual StatusCode initialize()
std::unordered_map< std::string, std::vector< KernelRecord > > m_kernel_map
bool supports_dynamic_parallelism()
AthROOTErrorHandlerSvc * svc
Gaudi::Property< std::string > m_outputFile
If m_outputSizes is true, the file to which the kernel sizes should be output.
int get_GPU_usage() const
Get the GPU usage, in percentage, rounded to the nearest integer.
static const std::string & type()
Incident type.
virtual StatusCode initialize_CUDA()
Initialization that invokes CUDA functions.
#define ATH_MSG_WARNING(x)