OpenLB 1.7
Loading...
Searching...
No Matches
Functions
olb::gpu::cuda::kernel Namespace Reference

CUDA kernels to execute collisions and post processors. More...

Functions

template<typename CONTEXT , typename FIELD >
void gather_field (CONTEXT lattice, const CellID *indices, std::size_t nIndices, typename FIELD::template value_type< typename CONTEXT::value_t > *buffer) __global__
 CUDA kernel for gathering FIELD data of lattice at indices into buffer.
 
template<typename SOURCE , typename TARGET , typename FIELD >
void copy_field (SOURCE sourceLattice, TARGET targetLattice, const CellID *sourceIndices, const CellID *targetIndices, std::size_t nIndices) __global__
 CUDA kernel for copying FIELD data of sourceLattice at sourceIndices into targetLattice at targetIndices.
 
__global__ void gather_any_fields (AnyDeviceFieldArrayD *fields, std::size_t nFields, const CellID *indices, std::size_t nIndices, std::uint8_t *buffer)
 CUDA kernel for gathering fields at indices into buffer.
 
__global__ void copy_any_fields (AnyDeviceFieldArrayD *sourceFields, AnyDeviceFieldArrayD *targetFields, std::size_t nFields, const CellID *sourceIndices, const CellID *targetIndices, std::size_t nIndices)
 CUDA kernel for copying sourceFields at sourceIndices to targetFields at targetIndices.
 
template<typename CONTEXT , typename FIELD >
void scatter_field (CONTEXT lattice, const CellID *indices, std::size_t nIndices, typename FIELD::template value_type< typename CONTEXT::value_t > *buffer) __global__
 CUDA kernel for scattering FIELD data in buffer to indices in lattice.
 
__global__ void scatter_any_fields (AnyDeviceFieldArrayD *fields, std::size_t nFields, const CellID *indices, std::size_t nIndices, std::uint8_t *buffer)
 CUDA kernel for scattering fields in buffer to indices in lattice.
 
template<typename CONTEXT , typename... OPERATORS>
void call_operators (CONTEXT lattice, bool *subdomain, OPERATORS... ops) __global__
 CUDA kernel for applying purely local collision steps.
 
template<typename CONTEXT , typename... OPERATORS>
void call_operators_with_statistics (CONTEXT lattice, bool *subdomain, OPERATORS... ops) __global__
 CUDA kernel for applying purely local collision steps while tracking statistics.
 
template<typename CONTEXT , typename... OPERATORS>
void call_list_operators (CONTEXT lattice, const CellID *indices, std::size_t nIndices, OPERATORS... ops) __global__
 CUDA kernel for applying generic OPERATORS with OperatorScope::PerCell or ListedCollision.
 
template<typename CONTEXT , typename... OPERATORS>
void call_list_operators_with_statistics (CONTEXT lattice, const CellID *indices, std::size_t nIndices, OPERATORS... ops) __global__
 CUDA kernel for applying ListedCollision.
 
template<typename CONTEXTS , typename... OPERATORS>
void call_coupling_operators (CONTEXTS lattices, bool *subdomain, OPERATORS... ops) __global__
 CUDA kernel for applying UnmaskedCoupling(WithParameters)
 
template<typename T , typename DESCRIPTOR , typename DYNAMICS , typename PARAMETERS = typename DYNAMICS::ParametersD>
void construct_dynamics (void *target, PARAMETERS *parameters) __global__
 CUDA kernel for constructing on-device ConcreteDynamics.
 

Detailed Description

CUDA kernels to execute collisions and post processors.

Function Documentation

◆ call_coupling_operators()

template<typename CONTEXTS , typename... OPERATORS>
void olb::gpu::cuda::kernel::call_coupling_operators ( CONTEXTS lattices,
bool * subdomain,
OPERATORS... ops )

CUDA kernel for applying UnmaskedCoupling(WithParameters)

Definition at line 341 of file operator.hh.

341 {
342 const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
343 const auto nCells = lattices.template get<0>().getNcells();
344 if (!(iCell < nCells) || !subdomain[iCell]) {
345 return;
346 }
347 (ops(lattices, iCell) || ... );
348}
std::uint32_t CellID
Type for sequential block-local cell indices.
+ Here is the caller graph for this function:

◆ call_list_operators()

template<typename CONTEXT , typename... OPERATORS>
void olb::gpu::cuda::kernel::call_list_operators ( CONTEXT lattice,
const CellID * indices,
std::size_t nIndices,
OPERATORS... ops )

CUDA kernel for applying generic OPERATORS with OperatorScope::PerCell or ListedCollision.

Definition at line 301 of file operator.hh.

303 {
304 const std::size_t iIndex = blockIdx.x * blockDim.x + threadIdx.x;
305 if (!(iIndex < nIndices)) {
306 return;
307 }
308 (ops(lattice, indices[iIndex]) || ... );
309}
+ Here is the caller graph for this function:

◆ call_list_operators_with_statistics()

template<typename CONTEXT , typename... OPERATORS>
void olb::gpu::cuda::kernel::call_list_operators_with_statistics ( CONTEXT lattice,
const CellID * indices,
std::size_t nIndices,
OPERATORS... ops )

CUDA kernel for applying ListedCollision.

Statistics data is reduced by StatisticsPostProcessor

Definition at line 316 of file operator.hh.

318 {
319 const std::size_t iIndex = blockIdx.x * blockDim.x + threadIdx.x;
320 if (!(iIndex < nIndices)) {
321 return;
322 }
323 typename CONTEXT::value_t** statistic = lattice.template getField<descriptors::STATISTIC>();
324 int* statisticGenerated = lattice.template getField<descriptors::STATISTIC_GENERATED>()[0];
325 CellStatistic<typename CONTEXT::value_t> cellStatistic{-1, -1};
326 if ((ops(lattice, indices[iIndex], cellStatistic) || ... )) {
327 if (cellStatistic) {
328 statisticGenerated[indices[iIndex]] = 1;
329 statistic[0][indices[iIndex]] = cellStatistic.rho;
330 statistic[1][indices[iIndex]] = cellStatistic.uSqr;
331 } else {
332 statisticGenerated[indices[iIndex]] = 0;
333 statistic[0][indices[iIndex]] = 0;
334 statistic[1][indices[iIndex]] = 0;
335 }
336 }
337}
Return value of any collision.
Definition interface.h:43

References olb::CellStatistic< T >::rho.

◆ call_operators()

template<typename CONTEXT , typename... OPERATORS>
void olb::gpu::cuda::kernel::call_operators ( CONTEXT lattice,
bool * subdomain,
OPERATORS... ops )

CUDA kernel for applying purely local collision steps.

Definition at line 265 of file operator.hh.

265 {
266 const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
267 if (!(iCell < lattice.getNcells()) || !subdomain[iCell]) {
268 return;
269 }
270 (ops(lattice, iCell) || ... );
271}
+ Here is the caller graph for this function:

◆ call_operators_with_statistics()

template<typename CONTEXT , typename... OPERATORS>
void olb::gpu::cuda::kernel::call_operators_with_statistics ( CONTEXT lattice,
bool * subdomain,
OPERATORS... ops )

CUDA kernel for applying purely local collision steps while tracking statistics.

Statistics data is reduced by StatisticsPostProcessor

Definition at line 278 of file operator.hh.

278 {
279 const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
280 if (!(iCell < lattice.getNcells()) || !subdomain[iCell]) {
281 return;
282 }
283 typename CONTEXT::value_t** statistic = lattice.template getField<descriptors::STATISTIC>();
284 int* statisticGenerated = lattice.template getField<descriptors::STATISTIC_GENERATED>()[0];
285 CellStatistic<typename CONTEXT::value_t> cellStatistic{-1, -1};
286 if ((ops(lattice, iCell, cellStatistic) || ... )) {
287 if (cellStatistic) {
288 statisticGenerated[iCell] = 1;
289 statistic[0][iCell] = cellStatistic.rho;
290 statistic[1][iCell] = cellStatistic.uSqr;
291 } else {
292 statisticGenerated[iCell] = 0;
293 statistic[0][iCell] = 0;
294 statistic[1][iCell] = 0;
295 }
296 }
297}

References olb::CellStatistic< T >::rho.

+ Here is the caller graph for this function:

◆ construct_dynamics()

template<typename T , typename DESCRIPTOR , typename DYNAMICS , typename PARAMETERS = typename DYNAMICS::ParametersD>
void olb::gpu::cuda::kernel::construct_dynamics ( void * target,
PARAMETERS * parameters )

CUDA kernel for constructing on-device ConcreteDynamics.

Definition at line 352 of file operator.hh.

352 {
353 new (target) ConcreteDynamics<T,DESCRIPTOR,DYNAMICS>(parameters);
354}
Implementation of gpu::cuda::Dynamics for concrete DYNAMICS.
Definition dynamics.hh:81

◆ copy_any_fields()

__global__ void olb::gpu::cuda::kernel::copy_any_fields ( AnyDeviceFieldArrayD * sourceFields,
AnyDeviceFieldArrayD * targetFields,
std::size_t nFields,
const CellID * sourceIndices,
const CellID * targetIndices,
std::size_t nIndices )

CUDA kernel for copying sourceFields at sourceIndices to targetFields at targetIndices.

source and target fields may be of different block lattices but must represent the same field types in the same sequence

Definition at line 145 of file communicator.hh.

150 {
151 const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
152 if (!(iIndex < nIndices)) {
153 return;
154 }
155 for (unsigned iField=0; iField < nFields; ++iField) {
156 auto& sourceField = sourceFields[iField];
157 auto& targetField = targetFields[iField];
158 for (unsigned iD=0; iD < sourceField.column_count; ++iD) {
159 memcpy(targetField[iD] + targetIndices[iIndex]*sourceField.element_size,
160 sourceField[iD] + sourceIndices[iIndex]*sourceField.element_size,
161 sourceField.element_size);
162 }
163 }
164}

◆ copy_field()

template<typename SOURCE , typename TARGET , typename FIELD >
void olb::gpu::cuda::kernel::copy_field ( SOURCE sourceLattice,
TARGET targetLattice,
const CellID * sourceIndices,
const CellID * targetIndices,
std::size_t nIndices )

CUDA kernel for copying FIELD data of sourceLattice at sourceIndices into targetLattice at targetIndices.

Definition at line 106 of file communicator.hh.

109 {
110 const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
111 if (!(iIndex < nIndices)) {
112 return;
113 }
114 auto* source = sourceLattice.template getField<FIELD>();
115 auto* target = targetLattice.template getField<FIELD>();
116 for (unsigned iD=0; iD < SOURCE::descriptor_t::template size<FIELD>(); ++iD) {
117 target[iD][targetIndices[iIndex]] = source[iD][sourceIndices[iIndex]];
118 }
119}

◆ gather_any_fields()

__global__ void olb::gpu::cuda::kernel::gather_any_fields ( AnyDeviceFieldArrayD * fields,
std::size_t nFields,
const CellID * indices,
std::size_t nIndices,
std::uint8_t * buffer )

CUDA kernel for gathering fields at indices into buffer.

Definition at line 122 of file communicator.hh.

124 {
125 const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
126 if (!(iIndex < nIndices)) {
127 return;
128 }
129 for (unsigned iField=0; iField < nFields; ++iField) {
130 auto& field = fields[iField];
131 for (unsigned iD=0; iD < field.column_count; ++iD) {
132 memcpy(buffer + (iD*nIndices + iIndex)*field.element_size,
133 field[iD] + indices[iIndex]*field.element_size,
134 field.element_size);
135 }
136 buffer += nIndices*field.column_count*field.element_size;
137 }
138}

◆ gather_field()

template<typename CONTEXT , typename FIELD >
void olb::gpu::cuda::kernel::gather_field ( CONTEXT lattice,
const CellID * indices,
std::size_t nIndices,
typename FIELD::template value_type< typename CONTEXT::value_t > * buffer )

CUDA kernel for gathering FIELD data of lattice at indices into buffer.

Definition at line 91 of file communicator.hh.

93 {
94 const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
95 if (!(iIndex < nIndices)) {
96 return;
97 }
98 auto* field = lattice.template getField<FIELD>();
99 for (unsigned iD=0; iD < CONTEXT::descriptor_t::template size<FIELD>(); ++iD) {
100 buffer[iD*nIndices+iIndex] = field[iD][indices[iIndex]];
101 }
102}

◆ scatter_any_fields()

__global__ void olb::gpu::cuda::kernel::scatter_any_fields ( AnyDeviceFieldArrayD * fields,
std::size_t nFields,
const CellID * indices,
std::size_t nIndices,
std::uint8_t * buffer )

CUDA kernel for scattering fields in buffer to indices in lattice.

Definition at line 182 of file communicator.hh.

184 {
185 const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
186 if (!(iIndex < nIndices)) {
187 return;
188 }
189 for (unsigned iField=0; iField < nFields; ++iField) {
190 auto& field = fields[iField];
191 for (unsigned iD=0; iD < field.column_count; ++iD) {
192 memcpy(field[iD] + indices[iIndex]*field.element_size,
193 buffer + (iD*nIndices + iIndex)*field.element_size,
194 field.element_size);
195 }
196 buffer += nIndices*field.column_count*field.element_size;
197 }
198}

◆ scatter_field()

template<typename CONTEXT , typename FIELD >
void olb::gpu::cuda::kernel::scatter_field ( CONTEXT lattice,
const CellID * indices,
std::size_t nIndices,
typename FIELD::template value_type< typename CONTEXT::value_t > * buffer )

CUDA kernel for scattering FIELD data in buffer to indices in lattice.

Definition at line 168 of file communicator.hh.

170 {
171 const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
172 if (!(iIndex < nIndices)) {
173 return;
174 }
175 auto* field = lattice.template getField<FIELD>();
176 for (unsigned iD=0; iD < CONTEXT::descriptor_t::template size<FIELD>(); ++iD) {
177 field[iD][indices[iIndex]] = buffer[iD*nIndices+iIndex];
178 }
179}