|
ATLAS Offline Software
|
Go to the documentation of this file.
7 #ifndef CALORECGPU_GPUKERNELSIZEOPTIMIZERSVC_H
8 #define CALORECGPU_GPUKERNELSIZEOPTIMIZERSVC_H
12 #include <unordered_map>
20 #include <nlohmann/json.hpp>
44 const int * blocksize_hints,
45 const int * gridsize_hints,
46 const int * max_total_threads,
47 const int offset = 0)
override;
52 const int dynamic_memory = 0)
const override;
92 const int usage_start = 0,
93 const int usage_end = 100,
94 const bool overwrite =
false)
96 for (
int u = usage_start;
u <= usage_end &&
u <= 100; ++
u)
99 if (overwrite ||
cfg.grid_x <= 0)
107 std::unordered_map<std::string, std::vector<KernelRecord>>
m_kernel_map;
120 Gaudi::Property<std::vector<std::string>>
m_kernelFiles {
this,
"KernelSizeInput", {},
"Kernel size input JSON files"};
125 Gaudi::Property<bool>
m_outputSizes {
this,
"OutputSizes",
true,
"Write out last used kernel sizes"};
129 Gaudi::Property<std::string>
m_outputFile {
this,
"OutputFile",
"sizes.json",
"Kernel size output file"};
146 std::vector< std::vector<KernelInfo> >
kernels;
virtual void register_kernels(const std::string &tool_name, const int number, void **kernels, const int *blocksize_hints, const int *gridsize_hints, const int *max_total_threads, const int offset=0) override
Register a set of kernels that can be referred back to with a name and a number.
virtual bool can_use_cooperative_groups() const override
Whether the device + environment in use support cooperative groups.
Gaudi::Property< std::vector< std::string > > m_kernelFiles
List of JSON files from where to read (hopefully optimized) kernel sizes for different GPUs.
Gaudi::Property< bool > m_outputSizes
If true, writes the (last used) kernel sizes to an output JSON file.
virtual bool should_use_minimal_kernel_sizes() const
Whether to avoid oversizing kernels and instead (if possible) launch kernels with the exact number of...
NLOHMANN_DEFINE_TYPE_INTRUSIVE(KernelInfo, usage_start, usage_end, grid_x, grid_y, grid_z, block_x, block_y, block_z)
void add_configuration(const CUDAKernelLaunchConfiguration &config, const int usage_start=0, const int usage_end=100, const bool overwrite=false)
@ u
Enums for curvilinear frames.
virtual StatusCode finalize() override
virtual StatusCode initialize()
::StatusCode StatusCode
StatusCode definition for legacy code.
std::unordered_map< std::string, std::vector< KernelRecord > > m_kernel_map
Base class to provide some basic common infrastructure for initializing CUDA only at the right place ...
AthROOTErrorHandlerSvc * svc
GPUKernelSizeOptimizerSvc(const std::string &name, ISvcLocator *svc)
CUDAKernelLaunchConfiguration configs[101]
Gaudi::Property< std::string > m_outputFile
If m_outputSizes is true, the file to which the kernel sizes should be output.
virtual CUDAKernelLaunchConfiguration get_launch_configuration(const std::string &name, const int number=0, const int dynamic_memory=0) const override
Retrieve the (hopefully optimal) kernel launch configuration.
int get_GPU_usage() const
Get the GPU usage, in percentage, rounded to the nearest integer.
virtual StatusCode initialize_CUDA() override
Initialization that invokes CUDA functions.
virtual bool can_use_dynamic_parallelism() const override
Whether the device + environment in use support dynamic parallelism.
std::vector< std::vector< KernelInfo > > kernels
virtual StatusCode initialize() override