|
ATLAS Offline Software
|
Go to the documentation of this file.
21 const int * gridsize_hints,
22 const int * max_total_threads,
27 std::vector<KernelRecord> & vect =
m_kernel_map[tool_name];
37 if (gridsize_hints[
i] == IGPUKernelSizeOptimizer::SpecialSizeHints::CooperativeLaunch)
75 std::ifstream in(
file);
86 for (
const auto &
entry : j)
88 if (
entry.at(
"device") != device_name)
97 for (
size_t i = 0;
i < vect.size(); ++
i)
99 for (
const auto & ki : ke.
kernels[
i])
102 config.grid_x = ki.grid_x;
103 config.grid_y = ki.grid_y;
104 config.grid_z = ki.grid_z;
105 config.block_x = ki.block_x;
106 config.block_y = ki.block_y;
107 config.block_z = ki.block_z;
109 vect[
i].add_configuration(
config, ki.usage_start, ki.usage_end,
true);
116 return StatusCode::SUCCESS;
127 return (
a.grid_x !=
b.grid_x ) ||
128 (
a.grid_y !=
b.grid_y ) ||
129 (
a.grid_z !=
b.grid_z ) ||
130 (
a.block_x !=
b.block_x ) ||
131 (
a.block_y !=
b.block_y ) ||
132 (
a.block_z !=
b.block_z );
153 ke.
name = pair.first;
154 ke.
kernels.resize(pair.second.size());
156 for (
size_t i = 0;
i < ke.
kernels.size(); ++
i)
161 for (
int u = 0;
u <= 100; ++
u)
164 if (delta_configs(
cfg, ki))
193 output <<
"\n]" << std::endl;
200 return StatusCode::SUCCESS;
void optimize_block_and_grid_size(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize.
bool supports_cooperative_launches()
virtual void register_kernels(const std::string &tool_name, const int number, void **kernels, const int *blocksize_hints, const int *gridsize_hints, const int *max_total_threads, const int offset=0) override
Register a set of kernels that can be referred back to with a name and a number.
void optimize_block_and_grid_size_for_cooperative_launch(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size for a cooperative launch.
Gaudi::Property< std::vector< std::string > > m_kernelFiles
List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs.
Gaudi::Property< bool > m_outputSizes
If true, writes the (last used) kernel sizes to an output JSON file.
constexpr int int_ceil_div(const int num, const int denom)
Returns the ceiling of num/denom, with proper rounding.
@ u
Enums for curvilinear frames.
virtual StatusCode finalize() override
::StatusCode StatusCode
StatusCode definition for legacy code.
std::unordered_map< std::string, std::vector< KernelRecord > > m_kernel_map
bool supports_dynamic_parallelism()
AthROOTErrorHandlerSvc * svc
GPUKernelSizeOptimizerSvc(const std::string &name, ISvcLocator *svc)
CUDAKernelLaunchConfiguration configs[101]
Gaudi::Property< std::string > m_outputFile
If m_outputSizes is true, the file to which the kernel sizes should be output.
virtual CUDAKernelLaunchConfiguration get_launch_configuration(const std::string &name, const int number=0, const int dynamic_memory=0) const override
Retrieve the (hopefully optimal) kernel launch configuration.
int get_GPU_usage() const
Get the GPU usage, in percentage, rounded to the nearest integer.
virtual StatusCode initialize_CUDA() override
Initialization that invokes CUDA functions.
#define ATH_MSG_WARNING(x)
std::vector< std::vector< KernelInfo > > kernels