ATLAS Offline Software
IGPUKernelSizeOptimizer.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration
3 //
4 // Dear emacs, this is -*- c++ -*-
5 //
6 
7 #ifndef CALORECGPU_IGPUKERNELSIZEOPTIMIZER_H
8 #define CALORECGPU_IGPUKERNELSIZEOPTIMIZER_H
9 
10 #include <string>
11 
13 {
14  int grid_x = 0, grid_y = 0, grid_z = 0, block_x = 0, block_y = 0, block_z = 0;
15 };
16 
29 {
30  public:
31 
33  {
35  };
36 
39  virtual void register_kernel(const std::string & kernel_name,
40  void * kernel,
41  const int blocksize_hint,
42  const int gridsize_hint,
43  const int max_total_threads)
44  {
45  this->register_kernels(kernel_name, 1, &kernel, &blocksize_hint, &gridsize_hint, &max_total_threads, 0);
46  }
47 
54  virtual void register_kernels(const std::string & tool_name,
55  const int number,
56  void ** kernels,
57  const int * blocksize_hints,
58  const int * gridsize_hints,
59  const int * max_total_threads,
60  const int offset = 0) = 0;
61 
64  const int number = 0,
65  const int dynamic_memory = 0) const = 0;
66 
68  virtual bool can_use_cooperative_groups() const = 0;
69 
71  virtual bool can_use_dynamic_parallelism() const = 0;
72 
74  virtual bool use_minimal_kernel_sizes() const
75  {
76  //Testing shows that, at least on the devices we use,
77  //we only lose performance by dyn-par'ing our way to do this.
78  return false;
79  }
80 
81  virtual ~IGPUKernelSizeOptimizer() = default;
82 };
83 #endif
CUDAKernelLaunchConfiguration::grid_x
int grid_x
Definition: IGPUKernelSizeOptimizer.h:14
IGPUKernelSizeOptimizer::can_use_cooperative_groups
virtual bool can_use_cooperative_groups() const =0
Whether the device + environment in use support cooperative groups.
IGPUKernelSizeOptimizer::~IGPUKernelSizeOptimizer
virtual ~IGPUKernelSizeOptimizer()=default
IGPUKernelSizeOptimizer::SpecialSizeHints
SpecialSizeHints
Definition: IGPUKernelSizeOptimizer.h:33
IGPUKernelSizeOptimizer::register_kernel
virtual void register_kernel(const std::string &kernel_name, void *kernel, const int blocksize_hint, const int gridsize_hint, const int max_total_threads)
Register a kernel with a specific name.
Definition: IGPUKernelSizeOptimizer.h:39
CUDAKernelLaunchConfiguration::block_z
int block_z
Definition: IGPUKernelSizeOptimizer.h:14
IGPUKernelSizeOptimizer::CooperativeLaunch
@ CooperativeLaunch
Definition: IGPUKernelSizeOptimizer.h:34
IGPUKernelSizeOptimizer::get_launch_configuration
virtual CUDAKernelLaunchConfiguration get_launch_configuration(const std::string &name, const int number=0, const int dynamic_memory=0) const =0
Retrieve the (hopefully optimal) kernel launch configuration.
python.selection.number
number
Definition: selection.py:20
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:228
IGPUKernelSizeOptimizer::use_minimal_kernel_sizes
virtual bool use_minimal_kernel_sizes() const
Whether to avoid oversizing kernels and instead (if possible) launch kernels with the exact number of...
Definition: IGPUKernelSizeOptimizer.h:74
IGPUKernelSizeOptimizer::register_kernels
virtual void register_kernels(const std::string &tool_name, const int number, void **kernels, const int *blocksize_hints, const int *gridsize_hints, const int *max_total_threads, const int offset=0)=0
Register a set of kernels that can be referred back to with a name and a number.
CUDAKernelLaunchConfiguration::block_y
int block_y
Definition: IGPUKernelSizeOptimizer.h:14
CUDAKernelLaunchConfiguration::grid_y
int grid_y
Definition: IGPUKernelSizeOptimizer.h:14
CUDAKernelLaunchConfiguration::block_x
int block_x
Definition: IGPUKernelSizeOptimizer.h:14
convertTimingResiduals.offset
offset
Definition: convertTimingResiduals.py:71
IGPUKernelSizeOptimizer::can_use_dynamic_parallelism
virtual bool can_use_dynamic_parallelism() const =0
Whether the device + environment in use support dynamic parallelism.
CUDAKernelLaunchConfiguration
Definition: IGPUKernelSizeOptimizer.h:13
CUDAKernelLaunchConfiguration::grid_z
int grid_z
Definition: IGPUKernelSizeOptimizer.h:14
IGPUKernelSizeOptimizer
Interface for GPU kernel size optimization (allowing adjustment of kernel sizes to the properties of ...
Definition: IGPUKernelSizeOptimizer.h:29