25#ifndef GPU_CUDA_DEVICE_HH
26#define GPU_CUDA_DEVICE_HH
44 cudaGetDeviceCount(&devices);
49 cudaError_t error = cudaGetLastError();
50 if (error != cudaSuccess) {
51 throw std::runtime_error(cudaGetErrorString(error));
57 if (result != CUDA_SUCCESS) {
58 const char* description{};
59 cuGetErrorString(result, &description);
60 throw std::runtime_error(std::string(description));
66 cudaDeviceSynchronize();
73 cudaGetDevice(&device);
78void copyToHost(
void* src,
void* dst, std::size_t count) {
79 cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost);
84 cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice);
91 cudaMalloc(&ptr, size*
sizeof(T));
97 std::size_t granularity = 0;
98 CUmemAllocationProp prop = {};
99 prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
100 prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
102 check(cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM));
109 if (_ptr !=
nullptr) {
117 if (_ptr !=
nullptr) {
121 _ptr = rhs.release();
127 cudaStreamCreateWithFlags(&_stream, flags);
132 cudaStreamDestroy(_stream);
137 cudaStreamSynchronize(_stream);
142 cudaMemcpyAsync(dst, src, count, cudaMemcpyDeviceToHost, stream.
get());
146 cudaMemcpyAsync(dst, src, count, cudaMemcpyHostToDevice, stream.
get());
Basic wrapper for device stream.
Stream(unsigned int flags)
Managed pointer for device-side memory.
unique_ptr & operator=(unique_ptr &&rhs)
std::size_t getDevicePageSize()
Returns device memory page size.
void copyToHost(void *src, void *dst, std::size_t count)
Copy data from device to host.
void copyToDevice(void *src, void *dst, std::size_t count)
Copy data from host to device.
int getCount()
Return number of available devices.
void asyncCopyToDevice(Stream &stream, void *src, void *dst, std::size_t count)
Copy data from host to device (async)
void asyncCopyToHost(Stream &stream, void *src, void *dst, std::size_t count)
Copy data from device to host (async)
void check()
Check errors.
int get()
Get current device.
T * malloc(std::size_t size)
Allocate data on device.
void synchronize()
Synchronize device.
Top level namespace for all of OpenLB.