ATLAS Offline Software
|
Classes | |
struct | CUDAStreamPtrHolder |
Functions | |
void * | allocate (const size_t num) |
Allocates and returns the address of num bytes from GPU memory. More... | |
void | deallocate (void *address) |
Deallocates address in GPU memory. More... | |
void * | allocate_pinned (const size_t num) |
Allocates and returns the address of num bytes from CPU pinned memory. More... | |
void | deallocate_pinned (void *address) |
Deallocates address in CPU pinned memory. More... | |
void | GPU_to_CPU (void *dest, const void *const source, const size_t num) |
Copies num bytse from source in GPU memory to dest in CPU memory. More... | |
void | CPU_to_GPU (void *dest, const void *const source, const size_t num) |
Copies num bytes from source in CPU memory to dest in GPU memory. More... | |
void | GPU_to_GPU (void *dest, const void *const source, const size_t num) |
Copies num bytes from source to dest , both in GPU memory. More... | |
void | GPU_to_CPU_async (void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={}) |
Copies num bytes from source in GPU memory to dest in CPU memory, asynchronously. More... | |
void | CPU_to_GPU_async (void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={}) |
Copies num bytes from source in CPU memory to dest in GPU memory, asynchronously. More... | |
void | GPU_to_GPU_async (void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={}) |
Copies num bytes from source to dest , both in GPU memory, asynchronously. More... | |
void | GPU_synchronize (CUDAStreamPtrHolder stream={}) |
Synchronizes the stream . More... | |
void | optimize_block_and_grid_size (void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0) |
Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize . More... | |
void | optimize_block_and_grid_size_for_cooperative_launch (void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0) |
Optimizes block and grid size for a cooperative launch. More... | |
bool | supports_cooperative_launches () |
bool | supports_dynamic_parallelism () |
std::string | GPU_name () |
void* CaloRecGPU::CUDA_Helpers::allocate | ( | const size_t | num | ) |
Allocates and returns the address of num
bytes from GPU memory.
void* CaloRecGPU::CUDA_Helpers::allocate_pinned | ( | const size_t | num | ) |
Allocates and returns the address of num
bytes from CPU pinned memory.
void CaloRecGPU::CUDA_Helpers::CPU_to_GPU | ( | void * | dest, |
const void *const | source, | ||
const size_t | num | ||
) |
Copies num
bytes from source
in CPU memory to dest
in GPU memory.
void CaloRecGPU::CUDA_Helpers::CPU_to_GPU_async | ( | void * | dest, |
const void *const | source, | ||
const size_t | num, | ||
CUDAStreamPtrHolder | stream = {} |
||
) |
Copies num
bytes from source
in CPU memory to dest
in GPU memory, asynchronously.
void CaloRecGPU::CUDA_Helpers::deallocate | ( | void * | address | ) |
Deallocates address
in GPU memory.
void CaloRecGPU::CUDA_Helpers::deallocate_pinned | ( | void * | address | ) |
Deallocates address
in CPU pinned memory.
std::string CaloRecGPU::CUDA_Helpers::GPU_name | ( | ) |
void CaloRecGPU::CUDA_Helpers::GPU_synchronize | ( | CUDAStreamPtrHolder | stream = {} | ) |
Synchronizes the stream
.
If called with no value, synchronizes with cudaStreamPerThread
.
void CaloRecGPU::CUDA_Helpers::GPU_to_CPU | ( | void * | dest, |
const void *const | source, | ||
const size_t | num | ||
) |
Copies num
bytse from source
in GPU memory to dest
in CPU memory.
void CaloRecGPU::CUDA_Helpers::GPU_to_CPU_async | ( | void * | dest, |
const void *const | source, | ||
const size_t | num, | ||
CUDAStreamPtrHolder | stream = {} |
||
) |
Copies num
bytes from source
in GPU memory to dest
in CPU memory, asynchronously.
void CaloRecGPU::CUDA_Helpers::GPU_to_GPU | ( | void * | dest, |
const void *const | source, | ||
const size_t | num | ||
) |
Copies num
bytes from source
to dest
, both in GPU memory.
void CaloRecGPU::CUDA_Helpers::GPU_to_GPU_async | ( | void * | dest, |
const void *const | source, | ||
const size_t | num, | ||
CUDAStreamPtrHolder | stream = {} |
||
) |
Copies num
bytes from source
to dest
, both in GPU memory, asynchronously.
void CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size | ( | void * | func, |
int & | block_size, | ||
int & | grid_size, | ||
const int | dynamic_memory = 0 , |
||
const int | block_size_limit = 0 |
||
) |
Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize
.
void CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size_for_cooperative_launch | ( | void * | func, |
int & | block_size, | ||
int & | grid_size, | ||
const int | dynamic_memory = 0 , |
||
const int | block_size_limit = 0 |
||
) |
Optimizes block and grid size for a cooperative launch.
bool CaloRecGPU::CUDA_Helpers::supports_cooperative_launches | ( | ) |
bool CaloRecGPU::CUDA_Helpers::supports_dynamic_parallelism | ( | ) |