Classes
struct	CUDAStreamPtrHolder

Functions
void *	allocate (const size_t num)
	Allocates and returns the address of `num` bytes from GPU memory. More...

void	deallocate (void *address)
	Deallocates `address` in GPU memory. More...

void *	allocate_pinned (const size_t num)
	Allocates and returns the address of `num` bytes from CPU pinned memory. More...

void	deallocate_pinned (void *address)
	Deallocates `address` in CPU pinned memory. More...

void	GPU_to_CPU (void dest, const void const source, const size_t num)
	Copies `num` bytse from `source` in GPU memory to `dest` in CPU memory. More...

void	CPU_to_GPU (void dest, const void const source, const size_t num)
	Copies `num` bytes from `source` in CPU memory to `dest` in GPU memory. More...

void	GPU_to_GPU (void dest, const void const source, const size_t num)
	Copies `num` bytes from `source` to `dest`, both in GPU memory. More...

void	GPU_to_CPU_async (void dest, const void const source, const size_t num, CUDAStreamPtrHolder stream={})
	Copies `num` bytes from `source` in GPU memory to `dest` in CPU memory, asynchronously. More...

void	CPU_to_GPU_async (void dest, const void const source, const size_t num, CUDAStreamPtrHolder stream={})
	Copies `num` bytes from `source` in CPU memory to `dest` in GPU memory, asynchronously. More...

void	GPU_to_GPU_async (void dest, const void const source, const size_t num, CUDAStreamPtrHolder stream={})
	Copies `num` bytes from `source` to `dest`, both in GPU memory, asynchronously. More...

void	GPU_synchronize (CUDAStreamPtrHolder stream={})
	Synchronizes the `stream`. More...

void	optimize_block_and_grid_size (void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
	Optimizes block and grid size according to `cudaOccupancyMaxPotentialBlockSize`. More...

void	optimize_block_and_grid_size_for_cooperative_launch (void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
	Optimizes block and grid size for a cooperative launch. More...

bool	supports_cooperative_launches ()

bool	supports_dynamic_parallelism ()

std::string	GPU_name ()

Function Documentation

◆ allocate()

void* CaloRecGPU::CUDA_Helpers::allocate ( const size_t num )

Allocates and returns the address of num bytes from GPU memory.

◆ allocate_pinned()

void* CaloRecGPU::CUDA_Helpers::allocate_pinned ( const size_t num )

Allocates and returns the address of num bytes from CPU pinned memory.

◆ CPU_to_GPU()

void CaloRecGPU::CUDA_Helpers::CPU_to_GPU	(	void *	dest,
		const void *const	source,
		const size_t	num
	)

Copies num bytes from source in CPU memory to dest in GPU memory.

◆ CPU_to_GPU_async()

void CaloRecGPU::CUDA_Helpers::CPU_to_GPU_async	(	void *	dest,
		const void *const	source,
		const size_t	num,
		CUDAStreamPtrHolder	stream = `{}`
	)

Copies num bytes from source in CPU memory to dest in GPU memory, asynchronously.

◆ deallocate()

void CaloRecGPU::CUDA_Helpers::deallocate ( void * address )

Deallocates address in GPU memory.

◆ deallocate_pinned()

void CaloRecGPU::CUDA_Helpers::deallocate_pinned ( void * address )

Deallocates address in CPU pinned memory.

◆ GPU_name()

std::string CaloRecGPU::CUDA_Helpers::GPU_name ( )

◆ GPU_synchronize()

void CaloRecGPU::CUDA_Helpers::GPU_synchronize ( CUDAStreamPtrHolder stream = {} )

Synchronizes the stream.

If called with no value, synchronizes with cudaStreamPerThread.

◆ GPU_to_CPU()

void CaloRecGPU::CUDA_Helpers::GPU_to_CPU	(	void *	dest,
		const void *const	source,
		const size_t	num
	)

Copies num bytse from source in GPU memory to dest in CPU memory.

◆ GPU_to_CPU_async()

void CaloRecGPU::CUDA_Helpers::GPU_to_CPU_async	(	void *	dest,
		const void *const	source,
		const size_t	num,
		CUDAStreamPtrHolder	stream = `{}`
	)

Copies num bytes from source in GPU memory to dest in CPU memory, asynchronously.

◆ GPU_to_GPU()

void CaloRecGPU::CUDA_Helpers::GPU_to_GPU	(	void *	dest,
		const void *const	source,
		const size_t	num
	)

Copies num bytes from source to dest, both in GPU memory.

◆ GPU_to_GPU_async()

void CaloRecGPU::CUDA_Helpers::GPU_to_GPU_async	(	void *	dest,
		const void *const	source,
		const size_t	num,
		CUDAStreamPtrHolder	stream = `{}`
	)

Copies num bytes from source to dest, both in GPU memory, asynchronously.

◆ optimize_block_and_grid_size()

void CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size	(	void *	func,
		int &	block_size,
		int &	grid_size,
		const int	dynamic_memory = `0`,
		const int	block_size_limit = `0`
	)

Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize.

◆ optimize_block_and_grid_size_for_cooperative_launch()

void CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size_for_cooperative_launch	(	void *	func,
		int &	block_size,
		int &	grid_size,
		const int	dynamic_memory = `0`,
		const int	block_size_limit = `0`
	)

Optimizes block and grid size for a cooperative launch.

◆ supports_cooperative_launches()

bool CaloRecGPU::CUDA_Helpers::supports_cooperative_launches ( )

◆ supports_dynamic_parallelism()

bool CaloRecGPU::CUDA_Helpers::supports_dynamic_parallelism ( )

Classes

Functions