db/d29/IGPUKernelSizeOptimizer_8h_source.html

//

// Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration

//

// Dear emacs, this is -*- c++ -*-

//


#ifndef CALORECGPU_IGPUKERNELSIZEOPTIMIZER_H

#define CALORECGPU_IGPUKERNELSIZEOPTIMIZER_H


#include <string>


struct CUDAKernelLaunchConfiguration

{

  int grid_x = 0, grid_y = 0, grid_z = 0, block_x = 0, block_y = 0, block_z = 0;

};


class IGPUKernelSizeOptimizer

{

 public:


  enum SpecialSizeHints

  {

    CooperativeLaunch = -1

  };


  virtual void register_kernel(const std::string & kernel_name,

                               void * kernel,

                               const int blocksize_hint,

                               const int gridsize_hint,

                               const int max_total_threads)

  {

    this->register_kernels(kernel_name, 1, &kernel, &blocksize_hint, &gridsize_hint, &max_total_threads, 0);

  }


  virtual void register_kernels(const std::string & tool_name,

                                const int number,

                                void ** kernels,

                                const int * blocksize_hints,

                                const int * gridsize_hints,

                                const int * max_total_threads,

                                const int offset = 0) = 0;


  virtual CUDAKernelLaunchConfiguration get_launch_configuration(const std::string & name,

                                                                 const int number = 0,

                                                                 const int dynamic_memory = 0) const = 0;


  virtual bool can_use_cooperative_groups() const = 0;


  virtual bool can_use_dynamic_parallelism() const = 0;


  virtual bool use_minimal_kernel_sizes() const

  {

    //Testing shows that, at least on the devices we use,

    //we only lose performance by dyn-par'ing our way to do this.

    return false;

  }


  virtual ~IGPUKernelSizeOptimizer() = default;

};

#endif