24#ifndef GPU_CUDA_OPERATOR_HH
25#define GPU_CUDA_OPERATOR_HH
35struct CollisionSubdomainMask;
44template <
typename T,
typename DESCRIPTOR,
typename DYNAMICS>
54 return DYNAMICS().apply(cell, _parameters);
63 _parameters{parameters},
74 apply(lattice, iCell);
83 statistic = apply(lattice, iCell);
92template <
typename T,
typename DESCRIPTOR,
typename DYNAMICS>
99 _parameters{parameters}
104 DYNAMICS().apply(cell, _parameters);
110 statistic =
DYNAMICS().apply(cell, _parameters);
118template <
typename OPERATOR>
129 template <
typename T,
typename DESCRIPTOR>
133 OPERATOR().apply(cell);
145template <
typename OPERATOR>
147 template <
typename T,
typename DESCRIPTOR>
150 OPERATOR().apply(cell);
157template <
typename T,
typename DESCRIPTOR,
typename OPERATOR>
164 _parameters{parameters}
169 OPERATOR().apply(cell, _parameters);
176template <
typename COUPLER>
178 template <
typename CONTEXT>
180 CellID iCell) __device__ {
181 auto cells = lattices.exchange_values([&](
auto name) ->
auto {
182 return Cell{lattices.get(name), iCell};
184 COUPLER().apply(cells);
190template <
typename COUPLER,
typename COUPLEES>
193 typename COUPLER::parameters::template decompose_into<
198 template <
typename PARAMETERS>
200 _parameters{parameters}
203 template <
typename CONTEXT>
205 CellID iCell) __device__ {
206 auto cells = lattices.exchange_values([&](
auto name) ->
auto {
207 return Cell{lattices.get(name), iCell};
209 COUPLER().apply(cells, _parameters);
231template <
typename T,
typename DESCRIPTOR,
typename... DYNAMICS>
235 bool* subdomain = block.template getData<CollisionSubdomainMask>().deviceData();
237 if (block.statisticsEnabled()) {
242 block.template getData<OperatorParameters<DYNAMICS>>().parameters,
251 block.template getData<OperatorParameters<DYNAMICS>>().parameters,
264template <
typename CONTEXT,
typename... OPERATORS>
265void call_operators(CONTEXT lattice,
bool* subdomain, OPERATORS... ops) __global__ {
266 const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
267 if (!(iCell < lattice.getNcells()) || !subdomain[iCell]) {
270 (ops(lattice, iCell) || ... );
277template <
typename CONTEXT,
typename... OPERATORS>
279 const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
280 if (!(iCell < lattice.getNcells()) || !subdomain[iCell]) {
283 typename CONTEXT::value_t** statistic = lattice.template getField<descriptors::STATISTIC>();
284 int* statisticGenerated = lattice.template getField<descriptors::STATISTIC_GENERATED>()[0];
286 if ((ops(lattice, iCell, cellStatistic) || ... )) {
288 statisticGenerated[iCell] = 1;
289 statistic[0][iCell] = cellStatistic.
rho;
290 statistic[1][iCell] = cellStatistic.uSqr;
292 statisticGenerated[iCell] = 0;
293 statistic[0][iCell] = 0;
294 statistic[1][iCell] = 0;
300template <
typename CONTEXT,
typename... OPERATORS>
302 const CellID* indices, std::size_t nIndices,
303 OPERATORS... ops) __global__ {
304 const std::size_t iIndex = blockIdx.x * blockDim.x + threadIdx.x;
305 if (!(iIndex < nIndices)) {
308 (ops(lattice, indices[iIndex]) || ... );
315template <
typename CONTEXT,
typename... OPERATORS>
317 const CellID* indices, std::size_t nIndices,
318 OPERATORS... ops) __global__ {
319 const std::size_t iIndex = blockIdx.x * blockDim.x + threadIdx.x;
320 if (!(iIndex < nIndices)) {
323 typename CONTEXT::value_t** statistic = lattice.template getField<descriptors::STATISTIC>();
324 int* statisticGenerated = lattice.template getField<descriptors::STATISTIC_GENERATED>()[0];
326 if ((ops(lattice, indices[iIndex], cellStatistic) || ... )) {
328 statisticGenerated[indices[iIndex]] = 1;
329 statistic[0][indices[iIndex]] = cellStatistic.
rho;
330 statistic[1][indices[iIndex]] = cellStatistic.uSqr;
332 statisticGenerated[indices[iIndex]] = 0;
333 statistic[0][indices[iIndex]] = 0;
334 statistic[1][indices[iIndex]] = 0;
340template <
typename CONTEXTS,
typename... OPERATORS>
342 const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
343 const auto nCells = lattices.template get<0>().getNcells();
344 if (!(iCell < nCells) || !subdomain[iCell]) {
347 (ops(lattices, iCell) || ... );
351template <
typename T,
typename DESCRIPTOR,
typename DYNAMICS,
typename PARAMETERS=
typename DYNAMICS::ParametersD>
362template <
typename CONTEXT,
typename... ARGS>
364 const auto block_size = 32;
365 const auto block_count = (lattice.getNcells() + block_size - 1) / block_size;
367 lattice, subdomain, std::forward<decltype(args)>(args)...);
372template <
typename CONTEXT,
typename... ARGS>
374 const auto block_size = 32;
375 const auto block_count = (lattice.getNcells() + block_size - 1) / block_size;
377 lattice, subdomain, std::forward<decltype(args)>(args)...);
385template <
typename CONTEXT,
typename... ARGS>
387 const auto block_size = 32;
388 const auto block_count = (lattice.getNcells() + block_size - 1) / block_size;
390 lattice, subdomain, std::forward<decltype(args)>(args)...);
395template <
typename CONTEXT,
typename... ARGS>
397 const auto block_size = 32;
398 const auto block_count = (lattice.getNcells() + block_size - 1) / block_size;
400 lattice, subdomain, std::forward<decltype(args)>(args)...);
408template <
typename CONTEXT,
typename... ARGS>
412 const auto block_size = 32;
413 const auto block_count = (cells.
size() + block_size - 1) / block_size;
417 std::forward<decltype(args)>(args)...);
422template <
typename CONTEXT,
typename... ARGS>
427 const auto block_size = 32;
428 const auto block_count = (cells.
size() + block_size - 1) / block_size;
429 kernel::call_list_operators<<<block_count,block_size,0,stream>>>(
432 std::forward<decltype(args)>(args)...);
437template <
typename CONTEXT,
typename... ARGS>
442 const auto block_size = 32;
443 const auto block_count = (cells.
size() + block_size - 1) / block_size;
444 kernel::call_list_operators_with_statistics<<<block_count,block_size,0,stream>>>(
447 std::forward<decltype(args)>(args)...);
452template <
typename CONTEXT,
typename... ARGS>
454 const auto nCells = lattices.template get<0>().getNcells();
455 const auto block_size = 32;
456 const auto block_count = (nCells + block_size - 1) / block_size;
458 lattices, subdomain, std::forward<decltype(args)>(args)...);
466template <
typename T,
typename DESCRIPTOR,
typename DYNAMICS>
468 _dynamics(new DYNAMICS()),
469 _parameters(nullptr),
473 _stream(cudaStreamDefault)
476template <
typename T,
typename DESCRIPTOR,
typename DYNAMICS>
481 using namespace gpu::cuda;
482 DeviceContext<T,DESCRIPTOR> lattice(block);
484 call_operators_with_statistics(
486 subdomain.deviceData(),
487 MaskedCollision<T,DESCRIPTOR,DYNAMICS>{_parameters->parameters, _mask->deviceData()},
488 DynamicDispatchCollision{});
492 subdomain.deviceData(),
493 MaskedCollision<T,DESCRIPTOR,DYNAMICS>{_parameters->parameters, _mask->deviceData()},
494 DynamicDispatchCollision{});
498template <
typename T,
typename DESCRIPTOR,
typename DYNAMICS>
499void ConcreteBlockCollisionO<T,DESCRIPTOR,Platform::GPU_CUDA,DYNAMICS>::applyIndividual(
500 ConcreteBlockLattice<T,DESCRIPTOR,Platform::GPU_CUDA>& block,
501 ConcreteBlockMask<T,Platform::GPU_CUDA>& subdomain)
503 using namespace gpu::cuda;
504 DeviceContext<T,DESCRIPTOR> lattice(block);
506 if (_mask->weight() > 0.5*subdomain.weight()) {
507 if (block.statisticsEnabled()) {
511 subdomain.deviceData(),
512 MaskedCollision<T,DESCRIPTOR,DYNAMICS>{_parameters->parameters, _mask->deviceData()});
517 subdomain.deviceData(),
518 MaskedCollision<T,DESCRIPTOR,DYNAMICS>{_parameters->parameters, _mask->deviceData()});
526 for (
CellID iCell=0; iCell < block.getNcells(); ++iCell) {
527 if (_mask->operator[](iCell)) {
528 _cells.push_back(iCell);
535 if (block.statisticsEnabled()) {
540 ListedCollision<T,DESCRIPTOR,DYNAMICS>{_parameters->parameters});
546 ListedCollision<T,DESCRIPTOR,DYNAMICS>{_parameters->parameters});
551template <
typename T,
typename DESCRIPTOR,
typename DYNAMICS>
556 _parameters = &block.template getData<OperatorParameters<DYNAMICS>>();
557 _mask = &block.template getData<DynamicsMask<DYNAMICS>>();
561 _deviceDynamics = gpu::cuda::device::malloc<gpu::cuda::ConcreteDynamics<T,DESCRIPTOR,DYNAMICS>>(1);
562 gpu::cuda::kernel::construct_dynamics<T,DESCRIPTOR,DYNAMICS><<<1,1>>>(
563 _deviceDynamics.get(),
564 _parameters->deviceData());
568 _dynamicsOfCells = block.template getField<gpu::cuda::DYNAMICS<T,DESCRIPTOR>>()[0].data();
572template <
typename T,
typename DESCRIPTOR,
typename OPERATOR>
576 _stream{cudaStreamDefault}
579template <
typename T,
typename DESCRIPTOR,
typename OPERATOR>
583 if (_cells.size() > 0) {
585 _cells.deduplicate();
597template <
typename T,
typename DESCRIPTOR,
typename OPERATOR>
601 _stream{cudaStreamDefault}
604template <
typename T,
typename DESCRIPTOR,
typename OPERATOR>
608 if (_cells.size() > 0) {
610 _cells.deduplicate();
614 using namespace gpu::cuda;
615 DeviceBlockLattice<T,DESCRIPTOR> lattice(block);
616 async_call_list_operators(_stream.get(),
619 ListedPostProcessorWithParameters<T,DESCRIPTOR,OPERATOR>{_parameters->parameters});
624template <
typename COUPLER,
typename COUPLEES>
629 _mask = std::make_unique<ConcreteBlockMask<typename COUPLEES::values_t::template get<0>::value_t,
631 _lattices.template get<0>()->template getData<CollisionSubdomainMask>()
634 _mask->set(iCell, state);
637template <
typename COUPLER,
typename COUPLEES>
640 auto deviceLattice = _lattices.exchange_values([&](
auto name) ->
auto {
646 deviceLattice, _mask->deviceData(),
649 auto& mask = _lattices.template get<0>()->template getData<CollisionSubdomainMask>();
651 deviceLattice, mask.deviceData(),
656template <
typename COUPLER,
typename COUPLEES>
661 _mask = std::make_unique<ConcreteBlockMask<typename COUPLEES::values_t::template get<0>::value_t,
663 _lattices.template get<0>()->template getData<CollisionSubdomainMask>()
666 _mask->set(iCell, state);
669template <
typename COUPLER,
typename COUPLEES>
672 auto deviceLattice = _lattices.exchange_values([&](
auto name) ->
auto {
678 deviceLattice, _mask->deviceData(),
681 auto& mask = _lattices.template get<0>()->template getData<CollisionSubdomainMask>();
683 deviceLattice, mask.deviceData(),
bool statisticsEnabled() const
Collision operation of concrete DYNAMICS on concrete block lattices of PLATFORM.
Coupling of COUPLEES using concrete OPERATOR with SCOPE on PLATFORM lattices.
Implementation of BlockLattice on a concrete PLATFORM.
Block application of concrete OPERATOR called using SCOPE on PLATFORM.
Device-side implementation of the Cell concept for post processors.
Plain column for CUDA GPU targets.
const T * deviceData() const
Implementation of gpu::cuda::Dynamics for concrete DYNAMICS.
Device-side implementation of the data-only Cell concept for collision steps.
Device-side view of a block lattice.
Cell< T, DESCRIPTOR > get(CellID iCell) __device__
Structure for passing pointers to on-device data into CUDA kernels.
List-based application of DYNAMICS::apply for use in kernel::call_list_operators.
bool operator()(DeviceContext< T, DESCRIPTOR > &lattice, CellID iCell) __device__
bool operator()(DeviceContext< T, DESCRIPTOR > &lattice, CellID iCell, CellStatistic< T > &statistic) __device__
ListedCollision(ParametersOfOperatorD< T, DESCRIPTOR, DYNAMICS > ¶meters) __host__
List-based application of OPERATOR::apply with parameters.
bool operator()(DeviceBlockLattice< T, DESCRIPTOR > &lattice, CellID iCell) __device__
ListedPostProcessorWithParameters(ParametersOfOperatorD< T, DESCRIPTOR, OPERATOR > ¶meters) __host__
Masked application of DYNAMICS::apply for use in kernel::call_operators.
bool operator()(DeviceContext< T, DESCRIPTOR > &lattice, CellID iCell) __device__
Chainable call operator for use in kernel::call_operators.
MaskedCollision(ParametersOfOperatorD< T, DESCRIPTOR, DYNAMICS > ¶meters, bool *mask) any_platform
Constructor (commonly called on the host side)
bool operator()(DeviceContext< T, DESCRIPTOR > &lattice, CellID iCell, CellStatistic< T > &statistic) __device__
Chainable call operator with statistics storage.
Masked application of OPERATOR::apply.
MaskedPostProcessor(bool *mask) any_platform
bool operator()(DeviceBlockLattice< T, DESCRIPTOR > &lattice, CellID iCell) __device__
Unrestricted application of COUPLING::apply with parameters.
bool operator()(CONTEXT &lattices, CellID iCell) __device__
UnmaskedCouplingWithParameters(PARAMETERS ¶meters) any_platform
void check()
Check errors.
void call_coupling_operators(CONTEXTS lattices, bool *subdomain, OPERATORS... ops) __global__
CUDA kernel for applying UnmaskedCoupling(WithParameters)
void call_list_operators(CONTEXT lattice, const CellID *indices, std::size_t nIndices, OPERATORS... ops) __global__
CUDA kernel for applying generic OPERATORS with OperatorScope::PerCell or ListedCollision.
void construct_dynamics(void *target, PARAMETERS *parameters) __global__
CUDA kernel for constructing on-device ConcreteDynamics.
void call_operators(CONTEXT lattice, bool *subdomain, OPERATORS... ops) __global__
CUDA kernel for applying purely local collision steps.
void call_list_operators_with_statistics(CONTEXT lattice, const CellID *indices, std::size_t nIndices, OPERATORS... ops) __global__
CUDA kernel for applying ListedCollision.
void call_operators_with_statistics(CONTEXT lattice, bool *subdomain, OPERATORS... ops) __global__
CUDA kernel for applying purely local collision steps while tracking statistics.
void async_call_list_operators(cudaStream_t stream, CONTEXT &lattice, const gpu::cuda::Column< CellID > &cells, ARGS &&... args)
Apply operators to listed cell indices (async version)
void async_call_operators(cudaStream_t stream, CONTEXT &lattice, bool *subdomain, ARGS &&... args)
Apply masked collision operators to lattice (async)
void call_operators(CONTEXT &lattice, bool *subdomain, ARGS &&... args)
Apply masked collision operators to lattice.
void async_call_list_operators_with_statistics(cudaStream_t stream, CONTEXT &lattice, const gpu::cuda::Column< CellID > &cells, ARGS &&... args)
Apply ListedCollision with statistics (async version)
void async_call_operators_with_statistics(cudaStream_t stream, CONTEXT &lattice, bool *subdomain, ARGS &&... args)
Apply masked collision operators to lattice while tracking statistics (async)
void call_list_operators(CONTEXT &lattice, const gpu::cuda::Column< CellID > &cells, ARGS &&... args)
Apply operators to listed cell indices.
std::function< void(ConcreteBlockLattice< T, DESCRIPTOR, Platform::GPU_CUDA > &) getFusedCollisionO)()
Helper for constructing fused collision operators.
void call_operators_with_statistics(CONTEXT &lattice, bool *subdomain, ARGS &&... args)
Apply masked collision operators to lattice while tracking statistics.
void call_coupling_operators(CONTEXT &lattices, bool *subdomain, ARGS &&... args)
Apply coupling on subdomain.
Top level namespace for all of OpenLB.
std::uint32_t CellID
Type for sequential block-local cell indices.
@ Simulation
Data available on host for e.g. functor evaluation.
@ GPU_CUDA
Vector CPU (AVX2 / AVX-512 collision)
typename ParametersD< T, DESCRIPTOR >::template include< typename OPERATOR::parameters > ParametersOfOperatorD
Deduce ParametersD of OPERATOR w.r.t. T and DESCRIPTOR.
Base of block-wide coupling operators executed by SuperLatticeCoupling.
Return value of any collision.
Describe mask of DYNAMICS in Data.
On-device field mirroring BlockDynamicsMap.
Last node in a MaskedDynamics chain in kernel::call_operators.
List-based application of OPERATOR::apply.
bool operator()(DeviceBlockLattice< T, DESCRIPTOR > &lattice, CellID iCell) __device__
Unrestricted application of COUPLING::apply.
bool operator()(CONTEXT &lattices, CellID iCell) __device__