ATLAS Offline Software
Classes | Functions
CaloRecGPU::CUDA_Helpers Namespace Reference

Classes

struct  CUDAStreamPtrHolder
 

Functions

void * allocate (const size_t num)
 Allocates and returns the address of num bytes from GPU memory. More...
 
void deallocate (void *address)
 Deallocates address in GPU memory. More...
 
void * allocate_pinned (const size_t num)
 Allocates and returns the address of num bytes from CPU pinned memory. More...
 
void deallocate_pinned (void *address)
 Deallocates address in CPU pinned memory. More...
 
void GPU_to_CPU (void *dest, const void *const source, const size_t num)
 Copies num bytse from source in GPU memory to dest in CPU memory. More...
 
void CPU_to_GPU (void *dest, const void *const source, const size_t num)
 Copies num bytes from source in CPU memory to dest in GPU memory. More...
 
void GPU_to_GPU (void *dest, const void *const source, const size_t num)
 Copies num bytes from source to dest, both in GPU memory. More...
 
void GPU_to_CPU_async (void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={})
 Copies num bytes from source in GPU memory to dest in CPU memory, asynchronously. More...
 
void CPU_to_GPU_async (void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={})
 Copies num bytes from source in CPU memory to dest in GPU memory, asynchronously. More...
 
void GPU_to_GPU_async (void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={})
 Copies num bytes from source to dest, both in GPU memory, asynchronously. More...
 
void GPU_synchronize (CUDAStreamPtrHolder stream={})
 Synchronizes the stream. More...
 
void optimize_block_and_grid_size (void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
 Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize. More...
 
void optimize_block_and_grid_size_for_cooperative_launch (void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
 Optimizes block and grid size for a cooperative launch. More...
 
bool supports_cooperative_launches ()
 
bool supports_dynamic_parallelism ()
 
std::string GPU_name ()
 

Function Documentation

◆ allocate()

void* CaloRecGPU::CUDA_Helpers::allocate ( const size_t  num)

Allocates and returns the address of num bytes from GPU memory.

◆ allocate_pinned()

void* CaloRecGPU::CUDA_Helpers::allocate_pinned ( const size_t  num)

Allocates and returns the address of num bytes from CPU pinned memory.

◆ CPU_to_GPU()

void CaloRecGPU::CUDA_Helpers::CPU_to_GPU ( void *  dest,
const void *const  source,
const size_t  num 
)

Copies num bytes from source in CPU memory to dest in GPU memory.

◆ CPU_to_GPU_async()

void CaloRecGPU::CUDA_Helpers::CPU_to_GPU_async ( void *  dest,
const void *const  source,
const size_t  num,
CUDAStreamPtrHolder  stream = {} 
)

Copies num bytes from source in CPU memory to dest in GPU memory, asynchronously.

◆ deallocate()

void CaloRecGPU::CUDA_Helpers::deallocate ( void *  address)

Deallocates address in GPU memory.

◆ deallocate_pinned()

void CaloRecGPU::CUDA_Helpers::deallocate_pinned ( void *  address)

Deallocates address in CPU pinned memory.

◆ GPU_name()

std::string CaloRecGPU::CUDA_Helpers::GPU_name ( )

◆ GPU_synchronize()

void CaloRecGPU::CUDA_Helpers::GPU_synchronize ( CUDAStreamPtrHolder  stream = {})

Synchronizes the stream.

If called with no value, synchronizes with cudaStreamPerThread.

◆ GPU_to_CPU()

void CaloRecGPU::CUDA_Helpers::GPU_to_CPU ( void *  dest,
const void *const  source,
const size_t  num 
)

Copies num bytse from source in GPU memory to dest in CPU memory.

◆ GPU_to_CPU_async()

void CaloRecGPU::CUDA_Helpers::GPU_to_CPU_async ( void *  dest,
const void *const  source,
const size_t  num,
CUDAStreamPtrHolder  stream = {} 
)

Copies num bytes from source in GPU memory to dest in CPU memory, asynchronously.

◆ GPU_to_GPU()

void CaloRecGPU::CUDA_Helpers::GPU_to_GPU ( void *  dest,
const void *const  source,
const size_t  num 
)

Copies num bytes from source to dest, both in GPU memory.

◆ GPU_to_GPU_async()

void CaloRecGPU::CUDA_Helpers::GPU_to_GPU_async ( void *  dest,
const void *const  source,
const size_t  num,
CUDAStreamPtrHolder  stream = {} 
)

Copies num bytes from source to dest, both in GPU memory, asynchronously.

◆ optimize_block_and_grid_size()

void CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size ( void *  func,
int &  block_size,
int &  grid_size,
const int  dynamic_memory = 0,
const int  block_size_limit = 0 
)

Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize.

◆ optimize_block_and_grid_size_for_cooperative_launch()

void CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size_for_cooperative_launch ( void *  func,
int &  block_size,
int &  grid_size,
const int  dynamic_memory = 0,
const int  block_size_limit = 0 
)

Optimizes block and grid size for a cooperative launch.

◆ supports_cooperative_launches()

bool CaloRecGPU::CUDA_Helpers::supports_cooperative_launches ( )

◆ supports_dynamic_parallelism()

bool CaloRecGPU::CUDA_Helpers::supports_dynamic_parallelism ( )