25#ifndef GPU_CUDA_COLUMN_HH
26#define GPU_CUDA_COLUMN_HH
31#include <thrust/gather.h>
32#include <thrust/scatter.h>
34#include <thrust/host_vector.h>
35#include <thrust/device_vector.h>
36#include <thrust/execution_policy.h>
48 thrust::host_vector<T>
host;
58 host.resize(newCount);
59 device.resize(newCount);
73 _data(rhs._data.release())
79 _data(new
Data(*rhs._data))
89 _data->device.
clear();
91 _count = _data->host.size();
97 _data->resize(newCount);
105 _count = _data->host.size();
111 std::sort(_data->host.begin(), _data->host.end());
112 _data->host.erase(std::unique(_data->host.begin(), _data->host.end()), _data->host.end());
118 return _data->host[i];
124 return _data->host[i];
136 return _data->host.data();
142 return _data->host.data();
148 return _data->device.
data().get();
154 return _data->device.
data().get();
160 if (_count != _data->device.size()) {
161 _data->device.resize(_count);
176 if (_count != _data->device.size()) {
177 _data->device.resize(_count);
198 return _count *
sizeof(T) +
sizeof(std::size_t);
204 std::size_t currentBlock = 0;
205 bool* dataPtr =
nullptr;
207 registerVar(iBlock, sizeBlock, currentBlock, dataPtr, _count);
208 if (loadingMode && iBlock == 1) {
211 registerVar(iBlock, sizeBlock, currentBlock, dataPtr, *data(), _count);
224 CUmemAllocationProp prop{};
225 CUmemAccessDesc access{};
229 host(new T[count] { }),
230 size(count *
sizeof(T))
234 prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
235 prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
236 prop.location.id = device;
244 access.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
245 access.location.id = device;
246 access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
260 _count(device::getPageAlignedCount<T>(count)),
261 _size(_count * sizeof(T)),
262 _data(new
Data(_count)),
265 _deviceBase =
reinterpret_cast<T*
>(_data->ptr);
266 _devicePopulation = _deviceBase;
276 return _data->host[i];
282 return _data->host[i];
307 return _count *
sizeof(T) +
sizeof(std::ptrdiff_t) +
sizeof(std::size_t);
313 std::size_t currentBlock = 0;
314 bool* dataPtr =
nullptr;
316 registerVar(iBlock, sizeBlock, currentBlock, dataPtr, _shift);
317 registerVar(iBlock, sizeBlock, currentBlock, dataPtr, _count);
318 registerVar(iBlock, sizeBlock, currentBlock, dataPtr, *_data->host.get(), _count);
331 std::uint8_t* buffer)
const
333 thrust::gather(thrust::device,
334 thrust::device_pointer_cast(indices.
begin()),
335 thrust::device_pointer_cast(indices.
end()),
336 thrust::device_pointer_cast(_column.deviceData()),
337 thrust::device_pointer_cast(
reinterpret_cast<T*
>(buffer)));
338 return indices.
size() *
sizeof(T);
343 const std::uint8_t* buffer)
345 thrust::scatter(thrust::device,
346 thrust::device_pointer_cast(
reinterpret_cast<const T*
>(buffer)),
347 thrust::device_pointer_cast(
reinterpret_cast<const T*
>(buffer) + indices.
size()),
348 thrust::device_pointer_cast(indices.
begin()),
349 thrust::device_pointer_cast(_column.deviceData()));
350 return indices.
size() *
sizeof(T);
357 std::uint8_t* buffer)
const
359 thrust::gather(thrust::device,
360 thrust::device_pointer_cast(indices.
begin()),
361 thrust::device_pointer_cast(indices.
end()),
362 thrust::device_pointer_cast(_column.deviceData()),
363 thrust::device_pointer_cast(
reinterpret_cast<T*
>(buffer)));
364 return indices.
size() *
sizeof(T);
370 const std::uint8_t* buffer)
372 thrust::scatter(thrust::device,
373 thrust::device_pointer_cast(
reinterpret_cast<const T*
>(buffer)),
374 thrust::device_pointer_cast(
reinterpret_cast<const T*
>(buffer) + indices.
size()),
375 thrust::device_pointer_cast(indices.
begin()),
376 thrust::device_pointer_cast(_column.deviceData()));
377 return indices.
size() *
sizeof(T);
Storage of any FIELD_TYPE data on PLATFORM.
void setProcessingContext(ProcessingContext)
void resize(std::size_t count)
const T & operator[](std::size_t i) const override
bool * getBlock(std::size_t iBlock, std::size_t &sizeBlock, bool loadingMode) override
Return a pointer to the memory of the current block and its size for the serializable interface.
std::size_t getSerializableSize() const override
Binary size for the serializer.
std::size_t getNblock() const override
Number of data blocks for the serializable interface.
virtual ~Column()=default
std::size_t getNblock() const override
Number of data blocks for the serializable interface.
CyclicColumn(std::size_t count)
const T & operator[](std::size_t i) const override
bool * getBlock(std::size_t iBlock, std::size_t &sizeBlock, bool loadingMode) override
Return a pointer to the memory of the current block and its size for the serializable interface.
std::size_t getSerializableSize() const override
Binary size for the serializer.
void setProcessingContext(ProcessingContext)
Plain column for CUDA GPU targets.
void clear()
Reset size to zero.
Virtual memory based cyclic column for usage in ColumnVector.
Basic wrapper for device stream.
void copyToHost(void *src, void *dst, std::size_t count)
Copy data from device to host.
void copyToDevice(void *src, void *dst, std::size_t count)
Copy data from host to device.
void asyncCopyToDevice(Stream &stream, void *src, void *dst, std::size_t count)
Copy data from host to device (async)
void asyncCopyToHost(Stream &stream, void *src, void *dst, std::size_t count)
Copy data from device to host (async)
void check()
Check errors.
int get()
Get current device.
Top level namespace for all of OpenLB.
ProcessingContext
OpenLB processing contexts.
@ Simulation
Data available on host for e.g. functor evaluation.
thrust::device_vector< T > device
void resize(std::size_t newCount)
thrust::host_vector< T > host
CUmemGenericAllocationHandle handle
std::unique_ptr< T[]> host