OpenLB 1.7
|
Implementations of Nvidia CUDA specifics. More...
Namespaces | |
namespace | device |
Basic wrappers of common CUDA functions. | |
namespace | kernel |
CUDA kernels to execute collisions and post processors. | |
Classes | |
struct | AnyDeviceFieldArrayD |
Type-erased pointer to FieldArrayD device data. More... | |
class | Cell |
Device-side implementation of the Cell concept for post processors. More... | |
class | Column |
Plain column for CUDA GPU targets. More... | |
class | ConcreteDynamics |
Implementation of gpu::cuda::Dynamics for concrete DYNAMICS. More... | |
class | CyclicColumn |
Virtual memory based cyclic column for usage in ColumnVector. More... | |
class | DataOnlyCell |
Device-side implementation of the data-only Cell concept for collision steps. More... | |
class | DeviceBlockLattice |
Device-side view of a block lattice. More... | |
class | DeviceContext |
Structure for passing pointers to on-device data into CUDA kernels. More... | |
struct | DynamicDispatchCollision |
Last node in a MaskedDynamics chain in kernel::call_operators. More... | |
struct | Dynamics |
Virtual interface for device-side dynamically-dispatched dynamics access. More... | |
struct | DYNAMICS |
On-device field mirroring BlockDynamicsMap. More... | |
struct | FieldArrayPointer |
Host-side version of gpu::cuda::AnyDeviceFieldArrayD. More... | |
class | FieldPtr |
Pointer to row of a D-dimensional field. More... | |
class | ListedCollision |
List-based application of DYNAMICS::apply for use in kernel::call_list_operators. More... | |
struct | ListedPostProcessor |
List-based application of OPERATOR::apply. More... | |
class | ListedPostProcessorWithParameters |
List-based application of OPERATOR::apply with parameters. More... | |
class | MaskedCollision |
Masked application of DYNAMICS::apply for use in kernel::call_operators. More... | |
class | MaskedPostProcessor |
Masked application of OPERATOR::apply. More... | |
struct | maximum_and_plus |
Function object for simulateneously computing maximum and sum in a single thrust::reduce. More... | |
struct | pair |
Plain pair type with single-value constructor for use in gpu::cuda::maximum_and_plus. More... | |
struct | UnmaskedCoupling |
Unrestricted application of COUPLING::apply. More... | |
class | UnmaskedCouplingWithParameters |
Unrestricted application of COUPLING::apply with parameters. More... | |
Functions | |
template<typename FIELD , typename CONTEXT > | |
void | gather_field (CONTEXT &lattice, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Blocking gather of FIELD at given indices into buffer. | |
template<typename FIELD , typename CONTEXT > | |
void | async_gather_field (cudaStream_t stream, CONTEXT &lattice, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Non-blocking gather of FIELD at given indices into buffer. | |
template<typename FIELD , typename SOURCE , typename TARGET > | |
void | async_copy_field (cudaStream_t stream, SOURCE &sourceLattice, TARGET &targetLattice, const thrust::device_vector< CellID > &sourceIndices, const thrust::device_vector< CellID > &targetIndices) |
Non-blocking copy of FIELD at given indices from sourceLattice to targetLattice. | |
void | gather_any_fields (thrust::device_vector< AnyDeviceFieldArrayD > &fields, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Blocking gather of fields at given indices into buffer. | |
void | async_gather_any_fields (cudaStream_t stream, thrust::device_vector< AnyDeviceFieldArrayD > &fields, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Non-blocking gather of fields at given indices into buffer. | |
void | async_copy_any_fields (cudaStream_t stream, thrust::device_vector< AnyDeviceFieldArrayD > &sourceFields, thrust::device_vector< AnyDeviceFieldArrayD > &targetFields, const thrust::device_vector< CellID > &sourceIndices, const thrust::device_vector< CellID > &targetIndices) |
Non-blocking copy of fields at given indices from sourceIndices to targetIndices. | |
template<typename FIELD , typename CONTEXT > | |
void | scatter_field (CONTEXT &lattice, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Blocking scatter of FIELD data in buffer to given indices. | |
template<typename FIELD , typename CONTEXT > | |
void | async_scatter_field (cudaStream_t stream, CONTEXT &lattice, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Non-blocking scatter of FIELD data in buffer to given indices. | |
void | scatter_any_fields (thrust::device_vector< AnyDeviceFieldArrayD > &fields, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Blocking scatter of fields data in buffer to given indices. | |
void | async_scatter_any_fields (cudaStream_t stream, thrust::device_vector< AnyDeviceFieldArrayD > &fields, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer) |
Non-blocking scatter of fields data in buffer to given indices. | |
template<typename CONTEXT , typename... ARGS> | |
void | call_operators (CONTEXT &lattice, bool *subdomain, ARGS &&... args) |
Apply masked collision operators to lattice. | |
template<typename CONTEXT , typename... ARGS> | |
void | async_call_operators (cudaStream_t stream, CONTEXT &lattice, bool *subdomain, ARGS &&... args) |
Apply masked collision operators to lattice (async) | |
template<typename CONTEXT , typename... ARGS> | |
void | call_operators_with_statistics (CONTEXT &lattice, bool *subdomain, ARGS &&... args) |
Apply masked collision operators to lattice while tracking statistics. | |
template<typename CONTEXT , typename... ARGS> | |
void | async_call_operators_with_statistics (cudaStream_t stream, CONTEXT &lattice, bool *subdomain, ARGS &&... args) |
Apply masked collision operators to lattice while tracking statistics (async) | |
template<typename CONTEXT , typename... ARGS> | |
void | call_list_operators (CONTEXT &lattice, const gpu::cuda::Column< CellID > &cells, ARGS &&... args) |
Apply operators to listed cell indices. | |
template<typename CONTEXT , typename... ARGS> | |
void | async_call_list_operators (cudaStream_t stream, CONTEXT &lattice, const gpu::cuda::Column< CellID > &cells, ARGS &&... args) |
Apply operators to listed cell indices (async version) | |
template<typename CONTEXT , typename... ARGS> | |
void | async_call_list_operators_with_statistics (cudaStream_t stream, CONTEXT &lattice, const gpu::cuda::Column< CellID > &cells, ARGS &&... args) |
Apply ListedCollision with statistics (async version) | |
template<typename CONTEXT , typename... ARGS> | |
void | call_coupling_operators (CONTEXT &lattices, bool *subdomain, ARGS &&... args) |
Apply coupling on subdomain. | |
Variables | |
template<typename T , typename DESCRIPTOR , typename... DYNAMICS> | |
std::function< void(ConcreteBlockLattice< T, DESCRIPTOR, Platform::GPU_CUDA > &) | getFusedCollisionO )() |
Helper for constructing fused collision operators. | |
template<typename CONTEXT , typename TYPE > | |
__constant__ std::size_t | field_type_index |
Mapping of TYPE in CONTEXT to runtime-fixed index. | |
Implementations of Nvidia CUDA specifics.
void olb::gpu::cuda::async_call_list_operators | ( | cudaStream_t | stream, |
CONTEXT & | lattice, | ||
const gpu::cuda::Column< CellID > & | cells, | ||
ARGS &&... | args ) |
Apply operators to listed cell indices (async version)
Definition at line 423 of file operator.hh.
References olb::gpu::cuda::device::check(), olb::gpu::cuda::Column< T >::deviceData(), and olb::gpu::cuda::Column< T >::size().
void olb::gpu::cuda::async_call_list_operators_with_statistics | ( | cudaStream_t | stream, |
CONTEXT & | lattice, | ||
const gpu::cuda::Column< CellID > & | cells, | ||
ARGS &&... | args ) |
Apply ListedCollision with statistics (async version)
Definition at line 438 of file operator.hh.
References olb::gpu::cuda::device::check(), olb::gpu::cuda::Column< T >::deviceData(), and olb::gpu::cuda::Column< T >::size().
void olb::gpu::cuda::async_call_operators | ( | cudaStream_t | stream, |
CONTEXT & | lattice, | ||
bool * | subdomain, | ||
ARGS &&... | args ) |
Apply masked collision operators to lattice (async)
Definition at line 373 of file operator.hh.
References olb::gpu::cuda::kernel::call_operators(), and olb::gpu::cuda::device::check().
void olb::gpu::cuda::async_call_operators_with_statistics | ( | cudaStream_t | stream, |
CONTEXT & | lattice, | ||
bool * | subdomain, | ||
ARGS &&... | args ) |
Apply masked collision operators to lattice while tracking statistics (async)
Definition at line 396 of file operator.hh.
References olb::gpu::cuda::kernel::call_operators_with_statistics(), and olb::gpu::cuda::device::check().
void olb::gpu::cuda::async_copy_any_fields | ( | cudaStream_t | stream, |
thrust::device_vector< AnyDeviceFieldArrayD > & | sourceFields, | ||
thrust::device_vector< AnyDeviceFieldArrayD > & | targetFields, | ||
const thrust::device_vector< CellID > & | sourceIndices, | ||
const thrust::device_vector< CellID > & | targetIndices ) |
Non-blocking copy of fields at given indices from sourceIndices to targetIndices.
Definition at line 276 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::async_copy_field | ( | cudaStream_t | stream, |
SOURCE & | sourceLattice, | ||
TARGET & | targetLattice, | ||
const thrust::device_vector< CellID > & | sourceIndices, | ||
const thrust::device_vector< CellID > & | targetIndices ) |
Non-blocking copy of FIELD at given indices from sourceLattice to targetLattice.
Definition at line 231 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::async_gather_any_fields | ( | cudaStream_t | stream, |
thrust::device_vector< AnyDeviceFieldArrayD > & | fields, | ||
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Non-blocking gather of fields at given indices into buffer.
Definition at line 262 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::async_gather_field | ( | cudaStream_t | stream, |
CONTEXT & | lattice, | ||
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Non-blocking gather of FIELD at given indices into buffer.
Definition at line 216 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::async_scatter_any_fields | ( | cudaStream_t | stream, |
thrust::device_vector< AnyDeviceFieldArrayD > & | fields, | ||
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Non-blocking scatter of fields data in buffer to given indices.
Definition at line 330 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::async_scatter_field | ( | cudaStream_t | stream, |
CONTEXT & | lattice, | ||
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Non-blocking scatter of FIELD data in buffer to given indices.
Definition at line 303 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::call_coupling_operators | ( | CONTEXT & | lattices, |
bool * | subdomain, | ||
ARGS &&... | args ) |
Apply coupling on subdomain.
Definition at line 453 of file operator.hh.
References olb::gpu::cuda::kernel::call_coupling_operators(), and olb::gpu::cuda::device::check().
void olb::gpu::cuda::call_list_operators | ( | CONTEXT & | lattice, |
const gpu::cuda::Column< CellID > & | cells, | ||
ARGS &&... | args ) |
Apply operators to listed cell indices.
Used to call post processors in ConcreteBlockO with OperatorScope::PerCell
Definition at line 409 of file operator.hh.
References olb::gpu::cuda::kernel::call_list_operators(), olb::gpu::cuda::device::check(), olb::gpu::cuda::Column< T >::deviceData(), and olb::gpu::cuda::Column< T >::size().
void olb::gpu::cuda::call_operators | ( | CONTEXT & | lattice, |
bool * | subdomain, | ||
ARGS &&... | args ) |
Apply masked collision operators to lattice.
ARGS are instances of MaskedCollision or DynamicDispatchCollision
Definition at line 363 of file operator.hh.
References olb::gpu::cuda::kernel::call_operators(), and olb::gpu::cuda::device::check().
void olb::gpu::cuda::call_operators_with_statistics | ( | CONTEXT & | lattice, |
bool * | subdomain, | ||
ARGS &&... | args ) |
Apply masked collision operators to lattice while tracking statistics.
ARGS are instances of MaskedCollision or DynamicDispatchCollision
Definition at line 386 of file operator.hh.
References olb::gpu::cuda::kernel::call_operators_with_statistics(), and olb::gpu::cuda::device::check().
void olb::gpu::cuda::gather_any_fields | ( | thrust::device_vector< AnyDeviceFieldArrayD > & | fields, |
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Blocking gather of fields at given indices into buffer.
Definition at line 249 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::gather_field | ( | CONTEXT & | lattice, |
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Blocking gather of FIELD at given indices into buffer.
Definition at line 204 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::scatter_any_fields | ( | thrust::device_vector< AnyDeviceFieldArrayD > & | fields, |
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Blocking scatter of fields data in buffer to given indices.
Definition at line 317 of file communicator.hh.
References olb::gpu::cuda::device::check().
void olb::gpu::cuda::scatter_field | ( | CONTEXT & | lattice, |
const thrust::device_vector< CellID > & | indices, | ||
std::uint8_t * | buffer ) |
Blocking scatter of FIELD data in buffer to given indices.
Definition at line 291 of file communicator.hh.
References olb::gpu::cuda::device::check().
__constant__ std::size_t olb::gpu::cuda::field_type_index |
Mapping of TYPE in CONTEXT to runtime-fixed index.
Used for dynamic access to field arrays
Definition at line 40 of file registry.hh.
std::function< void(ConcreteBlockLattice< T, DESCRIPTOR, Platform::GPU_CUDA > &) olb::gpu::cuda::getFusedCollisionO) () | ( | ) |
Helper for constructing fused collision operators.
This is a convenient way for potentially improving performance by injecting application knowledge. E.g. if the lattice contains primarily BGK and BounceBack dynamics this can be declared using:
Definition at line 233 of file operator.hh.