OpenLB 1.7
Loading...
Searching...
No Matches
Classes | Functions
olb::gpu::cuda::device Namespace Reference

Basic wrappers of common CUDA functions. More...

Classes

class  Stream
 Basic wrapper for device stream. More...
 
class  unique_ptr
 Managed pointer for device-side memory. More...
 

Functions

int getCount ()
 Return number of available devices.
 
void check ()
 Check errors.
 
void synchronize ()
 Synchronize device.
 
int get ()
 Get current device.
 
void copyToHost (void *src, void *dst, std::size_t count)
 Copy data from device to host.
 
void copyToDevice (void *src, void *dst, std::size_t count)
 Copy data from host to device.
 
template<typename T >
T * malloc (std::size_t size)
 Allocate data on device.
 
std::size_t getDevicePageSize ()
 Returns device memory page size.
 
template<typename T >
std::size_t getPageAlignedCount (std::size_t count)
 Returns count rounded up to be a multiple of getDevicePageSize
 
void asyncCopyToHost (Stream &stream, void *src, void *dst, std::size_t count)
 Copy data from device to host (async)
 
void asyncCopyToDevice (Stream &stream, void *src, void *dst, std::size_t count)
 Copy data from host to device (async)
 
void check (CUresult result)
 Check CUDA driver errors.
 

Detailed Description

Basic wrappers of common CUDA functions.

Function Documentation

◆ asyncCopyToDevice()

void olb::gpu::cuda::device::asyncCopyToDevice ( Stream & stream,
void * src,
void * dst,
std::size_t count )

Copy data from host to device (async)

Definition at line 145 of file device.hh.

145 {
146 cudaMemcpyAsync(dst, src, count, cudaMemcpyHostToDevice, stream.get());
147}

References olb::gpu::cuda::device::Stream::get().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ asyncCopyToHost()

void olb::gpu::cuda::device::asyncCopyToHost ( Stream & stream,
void * src,
void * dst,
std::size_t count )

Copy data from device to host (async)

Definition at line 141 of file device.hh.

141 {
142 cudaMemcpyAsync(dst, src, count, cudaMemcpyDeviceToHost, stream.get());
143}

References olb::gpu::cuda::device::Stream::get().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ check() [1/2]

void olb::gpu::cuda::device::check ( )

Check errors.

Definition at line 48 of file device.hh.

48 {
49 cudaError_t error = cudaGetLastError();
50 if (error != cudaSuccess) {
51 throw std::runtime_error(cudaGetErrorString(error));
52 }
53}
+ Here is the caller graph for this function:

◆ check() [2/2]

void olb::gpu::cuda::device::check ( CUresult result)

Check CUDA driver errors.

Definition at line 56 of file device.hh.

56 {
57 if (result != CUDA_SUCCESS) {
58 const char* description{};
59 cuGetErrorString(result, &description);
60 throw std::runtime_error(std::string(description));
61 }
62}

◆ copyToDevice()

void olb::gpu::cuda::device::copyToDevice ( void * src,
void * dst,
std::size_t count )

Copy data from host to device.

Definition at line 83 of file device.hh.

83 {
84 cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice);
85 check();
86}
void check()
Check errors.
Definition device.hh:48

References check().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyToHost()

void olb::gpu::cuda::device::copyToHost ( void * src,
void * dst,
std::size_t count )

Copy data from device to host.

Definition at line 78 of file device.hh.

78 {
79 cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost);
80 check();
81}

References check().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get()

int olb::gpu::cuda::device::get ( )

Get current device.

Definition at line 71 of file device.hh.

71 {
72 int device{};
73 cudaGetDevice(&device);
74 check();
75 return device;
76}

References check().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getCount()

int olb::gpu::cuda::device::getCount ( )

Return number of available devices.

Definition at line 42 of file device.hh.

42 {
43 int devices{};
44 cudaGetDeviceCount(&devices);
45 return devices;
46}
+ Here is the caller graph for this function:

◆ getDevicePageSize()

std::size_t olb::gpu::cuda::device::getDevicePageSize ( )

Returns device memory page size.

Definition at line 96 of file device.hh.

96 {
97 std::size_t granularity = 0;
98 CUmemAllocationProp prop = {};
99 prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
100 prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
101 prop.location.id = device::get();
102 check(cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM));
103 return granularity;
104}

References check(), and get().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getPageAlignedCount()

template<typename T >
std::size_t olb::gpu::cuda::device::getPageAlignedCount ( std::size_t count)

Returns count rounded up to be a multiple of getDevicePageSize

Definition at line 64 of file device.h.

65{
66 const std::size_t page_size = getDevicePageSize();
67 const std::size_t size = ((count * sizeof(T) - 1) / page_size + 1) * page_size;
68 const std::size_t volume = size / sizeof(T);
69
70 if (size % page_size != 0) {
71 throw std::invalid_argument("Buffer size must be multiple of PAGE_SIZE");
72 }
73
74 return volume;
75};
std::size_t getDevicePageSize()
Returns device memory page size.
Definition device.hh:96

References getDevicePageSize().

+ Here is the call graph for this function:

◆ malloc()

template<typename T >
T * olb::gpu::cuda::device::malloc ( std::size_t size)

Allocate data on device.

Definition at line 89 of file device.hh.

89 {
90 T* ptr{};
91 cudaMalloc(&ptr, size*sizeof(T));
92 check();
93 return ptr;
94}

◆ synchronize()

void olb::gpu::cuda::device::synchronize ( )

Synchronize device.

Definition at line 64 of file device.hh.

64 {
65 if (getCount() > 0) {
66 cudaDeviceSynchronize();
67 check();
68 }
69}
int getCount()
Return number of available devices.
Definition device.hh:42

References check(), and getCount().

+ Here is the call graph for this function:
+ Here is the caller graph for this function: