  | 
  
    ATLAS Offline Software
    
   | 
 
 
 
 
 
 More...
#include <GPUKernelSizeOptimizerSvc.h>
 | 
| bool  | m_dynpar_support = false | 
|   | 
| bool  | m_coopgroup_support = false | 
|   | 
| std::unordered_map< std::string, std::vector< KernelRecord > >  | m_kernel_map | 
|   | 
| Gaudi::Property< std::vector< std::string > >  | m_kernelFiles {this, "KernelSizeInput", {}, "Kernel size input JSON files"} | 
|   | List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs.  More...
  | 
|   | 
| Gaudi::Property< bool >  | m_outputSizes {this, "OutputSizes", true, "Write out last used kernel sizes"} | 
|   | If true, writes the (last used) kernel sizes to an output JSON file.  More...
  | 
|   | 
| Gaudi::Property< std::string >  | m_outputFile {this, "OutputFile", "sizes.json", "Kernel size output file"} | 
|   | If m_outputSizes is true, the file to which the kernel sizes should be output.  More...
  | 
|   | 
◆ GPUKernelSizeOptimizerSvc()
      
        
          | GPUKernelSizeOptimizerSvc::GPUKernelSizeOptimizerSvc  | 
          ( | 
          const std::string &  | 
          name,  | 
        
        
           | 
           | 
          ISvcLocator *  | 
          svc  | 
        
        
           | 
          ) | 
           |  | 
        
      
 
 
◆ can_use_cooperative_groups()
  
  
      
        
          | virtual bool GPUKernelSizeOptimizerSvc::can_use_cooperative_groups  | 
          ( | 
           | ) | 
           const | 
         
       
   | 
  
inlineoverridevirtual   | 
  
 
 
◆ can_use_dynamic_parallelism()
  
  
      
        
          | virtual bool GPUKernelSizeOptimizerSvc::can_use_dynamic_parallelism  | 
          ( | 
           | ) | 
           const | 
         
       
   | 
  
inlineoverridevirtual   | 
  
 
 
◆ finalize()
  
  
      
        
          | StatusCode GPUKernelSizeOptimizerSvc::finalize  | 
          ( | 
           | ) | 
           | 
         
       
   | 
  
overridevirtual   | 
  
 
Definition at line 119 of file GPUKernelSizeOptimizerSvc.cxx.
  127         return ( 
a.grid_x  != 
b.grid_x  ) ||
 
  128         ( 
a.grid_y  != 
b.grid_y  ) ||
 
  129         ( 
a.grid_z  != 
b.grid_z  ) ||
 
  130         ( 
a.block_x != 
b.block_x ) ||
 
  131         ( 
a.block_y != 
b.block_y ) ||
 
  132         ( 
a.block_z != 
b.block_z );
 
  152               ke.device = device_name;
 
  153               ke.name = pair.first;
 
  154               ke.kernels.resize(pair.second.size());
 
  156               for (
size_t i = 0; 
i < ke.kernels.size(); ++
i)
 
  158                   const KernelRecord & kr = pair.second[
i];
 
  160                   KernelsEntry::KernelInfo ki;
 
  161                   for (
int u = 0; 
u <= 100; ++
u)
 
  164                       if (delta_configs(
cfg, ki))
 
  168                               ki.usage_end = 
u - 1;
 
  169                               ke.kernels[
i].push_back(ki);
 
  172                           ki.grid_x = 
cfg.grid_x;
 
  173                           ki.grid_y = 
cfg.grid_y;
 
  174                           ki.grid_z = 
cfg.grid_z;
 
  175                           ki.block_x = 
cfg.block_x;
 
  176                           ki.block_y = 
cfg.block_y;
 
  177                           ki.block_z = 
cfg.block_z;
 
  183                       ke.kernels[
i].push_back(ki);
 
  193           output << 
"\n]" << std::endl;
 
  200   return StatusCode::SUCCESS;
 
 
 
 
◆ get_GPU_usage()
  
  
      
        
          | int GPUKernelSizeOptimizerSvc::get_GPU_usage  | 
          ( | 
           | ) | 
           const | 
         
       
   | 
  
inlineprivate   | 
  
 
Get the GPU usage, in percentage, rounded to the nearest integer. 
- Warning
 - Getting GPU usage not yet supported in the current version of the code, it will default to considering the GPU 100% available. 
 
Definition at line 113 of file GPUKernelSizeOptimizerSvc.h.
 
 
◆ get_launch_configuration()
◆ handle()
  
  
      
        
          | void CaloGPUCUDAInitialization::handle  | 
          ( | 
          const Incident &  | 
          incident | ) | 
           | 
         
       
   | 
  
inlineoverrideinherited   | 
  
 
Definition at line 66 of file CaloGPUCUDAInitialization.h.
   68     const bool is_multiprocess = (Gaudi::Concurrency::ConcurrencyFlags::numProcs() > 0);
 
   73           throw GaudiException(
"Failed to perform the CUDA initialization!",
 
   74                                "CaloGPUCUDAInitialization::handle",
 
 
 
 
◆ initialize()
  
  
      
        
          | virtual StatusCode GPUKernelSizeOptimizerSvc::initialize  | 
          ( | 
           | ) | 
           | 
         
       
   | 
  
inlineoverridevirtual   | 
  
 
 
◆ initialize_CUDA()
  
  
      
        
          | StatusCode GPUKernelSizeOptimizerSvc::initialize_CUDA  | 
          ( | 
           | ) | 
           | 
         
       
   | 
  
overridevirtual   | 
  
 
Initialization that invokes CUDA functions. 
Reimplemented from CaloGPUCUDAInitialization.
Definition at line 66 of file GPUKernelSizeOptimizerSvc.cxx.
   75       std::ifstream in(
file);
 
   86       for (
const auto & 
entry : j)
 
   88           if (
entry.at(
"device") != device_name)
 
   92           const KernelsEntry ke = 
entry.template get<KernelsEntry>();
 
   94           std::vector<KernelRecord> & vect = 
m_kernel_map[ke.name];
 
   95           vect.resize(ke.kernels.size());
 
   97           for (
size_t i = 0; 
i < vect.size(); ++
i)
 
   99               for (
const auto & ki : ke.kernels[
i])
 
  102                   config.grid_x = ki.grid_x;
 
  103                   config.grid_y = ki.grid_y;
 
  104                   config.grid_z = ki.grid_z;
 
  105                   config.block_x = ki.block_x;
 
  106                   config.block_y = ki.block_y;
 
  107                   config.block_z = ki.block_z;
 
  109                   vect[
i].add_configuration(
config, ki.usage_start, ki.usage_end, 
true);
 
  116   return StatusCode::SUCCESS;
 
 
 
 
◆ initialize_non_CUDA()
  
  
      
        
          | virtual StatusCode CaloGPUCUDAInitialization::initialize_non_CUDA  | 
          ( | 
           | ) | 
           | 
         
       
   | 
  
inlineprotectedvirtualinherited   | 
  
 
 
◆ register_kernels()
  
  
      
        
          | void GPUKernelSizeOptimizerSvc::register_kernels  | 
          ( | 
          const std::string &  | 
          tool_name,  | 
         
        
           | 
           | 
          const int  | 
          number,  | 
         
        
           | 
           | 
          void **  | 
          kernels,  | 
         
        
           | 
           | 
          const int *  | 
          blocksize_hints,  | 
         
        
           | 
           | 
          const int *  | 
          gridsize_hints,  | 
         
        
           | 
           | 
          const int *  | 
          max_total_threads,  | 
         
        
           | 
           | 
          const int  | 
          offset = 0  | 
         
        
           | 
          ) | 
           |  | 
         
       
   | 
  
overridevirtual   | 
  
 
Register a set of kernels that can be referred back to with a name and a number. 
Uses C-style arrays for more immediate CUDA compatibility, assumes the size of kernels, blocksize_hints and gridsize_hints is number and starts the numbering with an optional offset. 
Definition at line 17 of file GPUKernelSizeOptimizerSvc.cxx.
   27   std::vector<KernelRecord> & vect = 
m_kernel_map[tool_name];
 
   37       if (gridsize_hints[
i] == IGPUKernelSizeOptimizer::SpecialSizeHints::CooperativeLaunch)
 
 
 
 
◆ should_use_minimal_kernel_sizes()
  
  
      
        
          | virtual bool GPUKernelSizeOptimizerSvc::should_use_minimal_kernel_sizes  | 
          ( | 
           | ) | 
           const | 
         
       
   | 
  
inlinevirtual   | 
  
 
Whether to avoid oversizing kernels and instead (if possible) launch kernels with the exact number of threads... 
Definition at line 67 of file GPUKernelSizeOptimizerSvc.h.
 
 
◆ m_coopgroup_support
  
  
      
        
          | bool GPUKernelSizeOptimizerSvc::m_coopgroup_support = false | 
         
       
   | 
  
private   | 
  
 
 
◆ m_dynpar_support
  
  
      
        
          | bool GPUKernelSizeOptimizerSvc::m_dynpar_support = false | 
         
       
   | 
  
private   | 
  
 
 
◆ m_kernel_map
  
  
      
        
          | std::unordered_map<std::string, std::vector<KernelRecord> > GPUKernelSizeOptimizerSvc::m_kernel_map | 
         
       
   | 
  
private   | 
  
 
 
◆ m_kernelFiles
  
  
      
        
          | Gaudi::Property<std::vector<std::string> > GPUKernelSizeOptimizerSvc::m_kernelFiles {this, "KernelSizeInput", {}, "Kernel size input JSON files"} | 
         
       
   | 
  
private   | 
  
 
List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs. 
Definition at line 120 of file GPUKernelSizeOptimizerSvc.h.
 
 
◆ m_outputFile
  
  
      
        
          | Gaudi::Property<std::string> GPUKernelSizeOptimizerSvc::m_outputFile {this, "OutputFile", "sizes.json", "Kernel size output file"} | 
         
       
   | 
  
private   | 
  
 
 
◆ m_outputSizes
  
  
      
        
          | Gaudi::Property<bool> GPUKernelSizeOptimizerSvc::m_outputSizes {this, "OutputSizes", true, "Write out last used kernel sizes"} | 
         
       
   | 
  
private   | 
  
 
 
The documentation for this class was generated from the following files:
 
void optimize_block_and_grid_size(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize.
 
bool supports_cooperative_launches()
 
void optimize_block_and_grid_size_for_cooperative_launch(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size for a cooperative launch.
 
constexpr auto int_ceil_div(const T1 num, const T2 denom)
Returns the ceiling of num/denom, with proper rounding.
 
Gaudi::Property< std::vector< std::string > > m_kernelFiles
List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs.
 
Gaudi::Property< bool > m_outputSizes
If true, writes the (last used) kernel sizes to an output JSON file.
 
@ u
Enums for curvilinear frames.
 
virtual StatusCode initialize()
 
std::unordered_map< std::string, std::vector< KernelRecord > > m_kernel_map
 
bool supports_dynamic_parallelism()
 
AthROOTErrorHandlerSvc * svc
 
Gaudi::Property< std::string > m_outputFile
If m_outputSizes is true, the file to which the kernel sizes should be output.
 
int get_GPU_usage() const
Get the GPU usage, in percentage, rounded to the nearest integer.
 
static const std::string & type()
Incident type.
 
virtual StatusCode initialize_CUDA()
Initialization that invokes CUDA functions.
 
#define ATH_MSG_WARNING(x)