24#ifndef GPU_CUDA_CONTEXT_HH
25#define GPU_CUDA_CONTEXT_HH
38template <
typename T,
typename DESCRIPTOR,
typename FIELD>
39static auto getDeviceFieldPointer(ConcreteBlockLattice<T,DESCRIPTOR,Platform::GPU_CUDA>& lattice) {
40 auto& fieldArray = lattice.template getField<FIELD>();
41 return Vector<typename FIELD::template value_type<T>*,DESCRIPTOR::template size<FIELD>()>([&](
unsigned iD) {
42 return fieldArray[iD].deviceData();
47template <
typename T,
typename DESCRIPTOR,
typename... FIELDS>
48static auto getDeviceFieldPointers(
typename meta::list<FIELDS...>,
49 ConcreteBlockLattice<T,DESCRIPTOR,Platform::GPU_CUDA>& lattice) {
50 return std::make_tuple(getDeviceFieldPointer<T,DESCRIPTOR,FIELDS>(lattice)...);
54template <
typename T,
typename DESCRIPTOR>
57 const std::size_t _nCells;
60 decltype(getDeviceFieldPointers(
typename DESCRIPTOR::fields_t(),
67 void*** _customFieldsD;
74 _nCells(lattice.getNcells()),
75 _staticFieldsD(getDeviceFieldPointers(
typename DESCRIPTOR::fields_t(), lattice)),
76 _customFieldsD(lattice.getDataRegistry().deviceData())
83 template <
typename FIELD>
84 typename FIELD::template value_type<T>**
getField() __device__ {
85 if constexpr (DESCRIPTOR::template provides<FIELD>()) {
86 return std::get<(DESCRIPTOR::fields_t::template index<FIELD>())>(_staticFieldsD).data();
88 return reinterpret_cast<typename FIELD::template value_type<T>**
>(
89 _customFieldsD[field_type_index<FieldTypeRegistry<T,DESCRIPTOR,Platform::GPU_CUDA>,
Array<FIELD>>]);
96template <
typename T,
typename DESCRIPTOR,
typename FIELD>
98 DESCRIPTOR::template size<FIELD>(),
99 FieldPtr<T,DESCRIPTOR,FIELD>> {
105 DESCRIPTOR::template size<FIELD>(),
111 return _data.template getField<FIELD>()[iDim] + _index;
115 return _data.template getField<FIELD>()[iDim] + _index;
125 _index(rhs._index) { }
127 template <
typename U,
typename IMPL>
130 for (
unsigned iDim=0; iDim < DESCRIPTOR::template size<FIELD>(); ++iDim) {
139template <
typename T,
typename DESCRIPTOR>
154 template <
typename FIELD>
156 return _data.template getField<FIELD>()[iD][
_iCell];
160 return _data.template getField<descriptors::POPULATION>()[iPop][
_iCell];
163 template <
typename FIELD>
165 auto fieldArray =
_data.template getField<FIELD>();
166 if constexpr (descriptor_t::template size<FIELD>() == 1) {
167 return fieldArray[0][
_iCell];
170 return fieldArray[iD][
_iCell];
175 template <
typename FIELD>
180 template <
typename FIELD>
182 auto fieldArray =
_data.template getField<FIELD>();
183 for (
unsigned iD=0; iD < descriptor_t::template size<FIELD>(); ++iD) {
184 fieldArray[iD][
_iCell] = value[iD];
188 template <
typename FIELD>
190 auto fieldArray =
_data.template getField<FIELD>();
191 for (
unsigned iD=0; iD < descriptor_t::template size<FIELD>(); ++iD) {
192 fieldArray[iD][
_iCell] = value[iD];
199template <
typename T,
typename DESCRIPTOR>
206template <
typename T,
typename DESCRIPTOR>
215 _padding{lattice.getPadding()}
217 auto size = lattice.getExtent() + 2*_padding;
218 if constexpr (DESCRIPTOR::d == 3) {
219 _projection = {size[1]*size[2], size[2], 1};
221 _projection = {size[1], 1};
226 return (loc+_padding) * _projection;
230 return dir * _projection;
247template <
typename T,
typename DESCRIPTOR>
264 return *this->
template getField<DYNAMICS<T,DESCRIPTOR>>();
280 T rho, u[DESCRIPTOR::d] { };
285 getDynamics().computeAllMomenta(*
this, rho, u, pi);
298 return getDynamics().defineAllMomenta(*
this, rho, u, pi);
301 for (
int iPop=0; iPop < descriptors::q<DESCRIPTOR>(); ++iPop) {
322template <
typename T,
typename DESCRIPTOR,
typename PARAMETERS>
324 _deviceParameters{gpu::cuda::device::malloc<ParametersD>(1)},
328 _deviceParameters.
get(),
329 sizeof(ParametersD));
332template <
typename T,
typename DESCRIPTOR,
typename PARAMETERS>
338 _deviceParameters.get(),
339 sizeof(ParametersD));
343template<
typename T,
typename DESCRIPTOR,
typename PARAMETERS>
346 return decltype(parameters)::fields_t::size;
349template<
typename T,
typename DESCRIPTOR,
typename PARAMETERS>
352 std::size_t size = 0;
353 decltype(parameters)::fields_t::for_each([&size](
auto field) {
354 using field_t =
typename decltype(field)::type;
360template<
typename T,
typename DESCRIPTOR,
typename PARAMETERS>
362 std::size_t iBlock, std::size_t& sizeBlock,
bool loadingMode)
364 std::size_t currentBlock = 0;
365 bool* dataPtr =
nullptr;
366 decltype(parameters)::fields_t::for_each([&](
auto field) {
367 using field_t =
typename decltype(field)::type;
368 if constexpr (DESCRIPTOR::template size<field_t>() == 1) {
369 registerVar(iBlock, sizeBlock, currentBlock, dataPtr,
370 parameters.template get<field_t>(), loadingMode);
372 registerSerializableOfConstSize(iBlock, sizeBlock, currentBlock, dataPtr,
373 parameters.template get<field_t>(), loadingMode);
Highest-level interface to Cell data.
Implementation of BlockLattice on a concrete PLATFORM.
constexpr std::size_t getSerializableSize() const
Device-side implementation of the Cell concept for post processors.
void iniEquilibrium(T rho, T *u) __device__
Cell< T, DESCRIPTOR > neighbor(LatticeR< DESCRIPTOR::d > offset) __device__
void computeJ(T *j) __device__
void computeAllMomenta(T &rho, T *u, T *pi) __device__
T computeRho() __device__
Cell(DeviceBlockLattice< T, DESCRIPTOR > &data, CellID iCell) __device__
DeviceBlockLattice< T, DESCRIPTOR > & getBlock() __device__
void inverseShiftRhoU(T &rho, T *u) __device__
void defineU(T *u) __device__
void computeU(T *u) __device__
void defineRhoU(T &rho, T *u) __device__
void defineAllMomenta(T &rho, T *u, T *pi) __device__
Dynamics< T, DESCRIPTOR > & getDynamics() __device__
void definePopulations(const T *f) __device__
void iniRegularized(T rho, T *u, T *pi) __device__
void defineRho(T &rho) __device__
void computeStress(T *pi) __device__
void computeRhoU(T &rho, T *u) __device__
Device-side implementation of the data-only Cell concept for collision steps.
value_t & operator[](int iPop) __device__
DataOnlyCell(DeviceContext< T, DESCRIPTOR > &data, CellID iCell) __device__
void setField(FieldD< value_t, descriptor_t, FIELD > &&value) __device__
void setField(const FieldD< value_t, descriptor_t, FIELD > &value) __device__
auto getField() const __device__
FieldPtr< T, DESCRIPTOR, FIELD > getFieldPointer() __device__
DeviceContext< T, DESCRIPTOR > & _data
FIELD::template value_type< T > getFieldComponent(unsigned iD) __device__
Device-side view of a block lattice.
CellID getCellId(LatticeR< DESCRIPTOR::d > loc) const __device__
CellDistance getNeighborDistance(LatticeR< DESCRIPTOR::d > dir) const __device__
Cell< T, DESCRIPTOR > get(LatticeR< DESCRIPTOR::d > loc) __device__
Cell< T, DESCRIPTOR > get(CellID iCell) __device__
DeviceBlockLattice(ConcreteBlockLattice< T, DESCRIPTOR, Platform::GPU_CUDA > &lattice) __host__
Structure for passing pointers to on-device data into CUDA kernels.
DeviceContext(ConcreteBlockLattice< T, DESCRIPTOR, Platform::GPU_CUDA > &lattice) __host__
FIELD::template value_type< T > ** getField() __device__
std::size_t getNcells() const any_platform
Pointer to row of a D-dimensional field.
FieldPtr(FieldPtr< T, DESCRIPTOR, FIELD > &&rhs) __device__
FieldPtr< T, DESCRIPTOR, FIELD > & operator=(const GenericVector< U, DESCRIPTOR::template size< FIELD >(), IMPL > &rhs) __device__
FIELD::template value_type< T > * getComponentPointer(unsigned iDim) __device__
FieldPtr(DeviceContext< T, DESCRIPTOR > &data, std::size_t index) __device__
const FIELD::template value_type< T > * getComponentPointer(unsigned iDim) const __device__
void copyToDevice(void *src, void *dst, std::size_t count)
Copy data from host to device.
Top level namespace for all of OpenLB.
std::uint32_t CellID
Type for sequential block-local cell indices.
ProcessingContext
OpenLB processing contexts.
@ Simulation
Data available on host for e.g. functor evaluation.
std::int64_t CellDistance
Type for in-memory distance of block-local cell indices.
Describe FieldArray of a FIELD in Data.
std::size_t getNblock() const override
Number of data blocks for the serializable interface.
bool * getBlock(std::size_t iBlock, std::size_t &sizeBlock, bool loadingMode) override
Return a pointer to the memory of the current block and its size for the serializable interface.
void setProcessingContext(ProcessingContext context) override
ConcreteParametersD(std::size_t)
ParametersD< T, DESCRIPTOR >::template include< PARAMETERS > parameters
std::size_t getSerializableSize() const override
Binary size for the serializer.
Generic vector of values supporting basic arithmetic.
constexpr const T & operator[](unsigned iDim) const any_platform
const auto & get() const any_platform
GenericVector< T, D, FieldPtr< T, DESCRIPTOR, FIELD > > type
Virtual interface for device-side dynamically-dispatched dynamics access.