CUDA kernels to execute collisions and post processors. More...

Functions
template<typename CONTEXT , typename FIELD >
void	gather_field (CONTEXT lattice, const CellID indices, std::size_t nIndices, typename FIELD::template value_type< typename CONTEXT::value_t > buffer) __global__
	CUDA kernel for gathering FIELD data of lattice at indices into buffer.

template<typename SOURCE , typename TARGET , typename FIELD >
void	copy_field (SOURCE sourceLattice, TARGET targetLattice, const CellID sourceIndices, const CellID targetIndices, std::size_t nIndices) __global__
	CUDA kernel for copying FIELD data of sourceLattice at sourceIndices into targetLattice at targetIndices.

__global__ void	gather_any_fields (AnyDeviceFieldArrayD fields, std::size_t nFields, const CellID indices, std::size_t nIndices, std::uint8_t *buffer)
	CUDA kernel for gathering fields at indices into buffer.

__global__ void	copy_any_fields (AnyDeviceFieldArrayD sourceFields, AnyDeviceFieldArrayD targetFields, std::size_t nFields, const CellID sourceIndices, const CellID targetIndices, std::size_t nIndices)
	CUDA kernel for copying sourceFields at sourceIndices to targetFields at targetIndices.

template<typename CONTEXT , typename FIELD >
void	scatter_field (CONTEXT lattice, const CellID indices, std::size_t nIndices, typename FIELD::template value_type< typename CONTEXT::value_t > buffer) __global__
	CUDA kernel for scattering FIELD data in buffer to indices in lattice.

__global__ void	scatter_any_fields (AnyDeviceFieldArrayD fields, std::size_t nFields, const CellID indices, std::size_t nIndices, std::uint8_t *buffer)
	CUDA kernel for scattering fields in buffer to indices in lattice.

template<typename CONTEXT , typename... OPERATORS>
void	call_operators (CONTEXT lattice, bool *subdomain, OPERATORS... ops) __global__
	CUDA kernel for applying purely local collision steps.

template<typename CONTEXT , typename... OPERATORS>
void	call_operators_with_statistics (CONTEXT lattice, bool *subdomain, OPERATORS... ops) __global__
	CUDA kernel for applying purely local collision steps while tracking statistics.

template<typename CONTEXT , typename... OPERATORS>
void	call_list_operators (CONTEXT lattice, const CellID *indices, std::size_t nIndices, OPERATORS... ops) __global__
	CUDA kernel for applying generic OPERATORS with OperatorScope::PerCell or ListedCollision.

template<typename CONTEXT , typename... OPERATORS>
void	call_list_operators_with_statistics (CONTEXT lattice, const CellID *indices, std::size_t nIndices, OPERATORS... ops) __global__
	CUDA kernel for applying ListedCollision.

template<typename CONTEXTS , typename... OPERATORS>
void	call_coupling_operators (CONTEXTS lattices, bool *subdomain, OPERATORS... ops) __global__
	CUDA kernel for applying UnmaskedCoupling(WithParameters)

template<typename T , typename DESCRIPTOR , typename DYNAMICS , typename PARAMETERS = typename DYNAMICS::ParametersD>
void	construct_dynamics (void target, PARAMETERS parameters) __global__
	CUDA kernel for constructing on-device ConcreteDynamics.

Detailed Description

CUDA kernels to execute collisions and post processors.

Function Documentation

◆ call_coupling_operators()

template<typename CONTEXTS , typename... OPERATORS>

void olb::gpu::cuda::kernel::call_coupling_operators	(	CONTEXTS	lattices,
		bool *	subdomain,
		OPERATORS...	ops )

CUDA kernel for applying UnmaskedCoupling(WithParameters)

Definition at line 341 of file operator.hh.

                                                                                              {
  const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
  const auto nCells = lattices.template get<0>().getNcells();
  if (!(iCell < nCells) || !subdomain[iCell]) {
    return;
  }
  (ops(lattices, iCell) || ... );
}

Here is the caller graph for this function:

◆ call_list_operators()

template<typename CONTEXT , typename... OPERATORS>

void olb::gpu::cuda::kernel::call_list_operators	(	CONTEXT	lattice,
		const CellID *	indices,
		std::size_t	nIndices,
		OPERATORS...	ops )

CUDA kernel for applying generic OPERATORS with OperatorScope::PerCell or ListedCollision.

Definition at line 301 of file operator.hh.

                                                      {
  const std::size_t iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  (ops(lattice, indices[iIndex]) || ... );
}

Here is the caller graph for this function:

◆ call_list_operators_with_statistics()

template<typename CONTEXT , typename... OPERATORS>

void olb::gpu::cuda::kernel::call_list_operators_with_statistics	(	CONTEXT	lattice,
		const CellID *	indices,
		std::size_t	nIndices,
		OPERATORS...	ops )

CUDA kernel for applying ListedCollision.

Statistics data is reduced by StatisticsPostProcessor

Definition at line 316 of file operator.hh.

                                                                      {
  const std::size_t iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  typename CONTEXT::value_t** statistic = lattice.template getField<descriptors::STATISTIC>();
  int* statisticGenerated = lattice.template getField<descriptors::STATISTIC_GENERATED>()[0];
  CellStatistic<typename CONTEXT::value_t> cellStatistic{-1, -1};
  if ((ops(lattice, indices[iIndex], cellStatistic) || ... )) {
    if (cellStatistic) {
      statisticGenerated[indices[iIndex]] = 1;
      statistic[0][indices[iIndex]] = cellStatistic.rho;
      statistic[1][indices[iIndex]] = cellStatistic.uSqr;
    } else {
      statisticGenerated[indices[iIndex]] = 0;
      statistic[0][indices[iIndex]] = 0;
      statistic[1][indices[iIndex]] = 0;
    }
  }
}

References olb::CellStatistic< T >::rho.

◆ call_operators()

template<typename CONTEXT , typename... OPERATORS>

void olb::gpu::cuda::kernel::call_operators	(	CONTEXT	lattice,
		bool *	subdomain,
		OPERATORS...	ops )

CUDA kernel for applying purely local collision steps.

Definition at line 265 of file operator.hh.

                                                                                   {
  const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iCell < lattice.getNcells()) || !subdomain[iCell]) {
    return;
  }
  (ops(lattice, iCell) || ... );
}

Here is the caller graph for this function:

◆ call_operators_with_statistics()

template<typename CONTEXT , typename... OPERATORS>

void olb::gpu::cuda::kernel::call_operators_with_statistics	(	CONTEXT	lattice,
		bool *	subdomain,
		OPERATORS...	ops )

CUDA kernel for applying purely local collision steps while tracking statistics.

Statistics data is reduced by StatisticsPostProcessor

Definition at line 278 of file operator.hh.

                                                                                                   {
  const CellID iCell = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iCell < lattice.getNcells()) || !subdomain[iCell]) {
    return;
  }
  typename CONTEXT::value_t** statistic = lattice.template getField<descriptors::STATISTIC>();
  int* statisticGenerated = lattice.template getField<descriptors::STATISTIC_GENERATED>()[0];
  CellStatistic<typename CONTEXT::value_t> cellStatistic{-1, -1};
  if ((ops(lattice, iCell, cellStatistic) || ... )) {
    if (cellStatistic) {
      statisticGenerated[iCell] = 1;
      statistic[0][iCell] = cellStatistic.rho;
      statistic[1][iCell] = cellStatistic.uSqr;
    } else {
      statisticGenerated[iCell] = 0;
      statistic[0][iCell] = 0;
      statistic[1][iCell] = 0;
    }
  }
}

References olb::CellStatistic< T >::rho.

Here is the caller graph for this function:

◆ construct_dynamics()

template<typename T , typename DESCRIPTOR , typename DYNAMICS , typename PARAMETERS = typename DYNAMICS::ParametersD>

void olb::gpu::cuda::kernel::construct_dynamics	(	void *	target,
		PARAMETERS *	parameters )

CUDA kernel for constructing on-device ConcreteDynamics.

Definition at line 352 of file operator.hh.

                                                                         {
  new (target) ConcreteDynamics<T,DESCRIPTOR,DYNAMICS>(parameters);
}

◆ copy_any_fields()

__global__ void olb::gpu::cuda::kernel::copy_any_fields	(	AnyDeviceFieldArrayD *	sourceFields,
		AnyDeviceFieldArrayD *	targetFields,
		std::size_t	nFields,
		const CellID *	sourceIndices,
		const CellID *	targetIndices,
		std::size_t	nIndices )

CUDA kernel for copying sourceFields at sourceIndices to targetFields at targetIndices.

source and target fields may be of different block lattices but must represent the same field types in the same sequence

Definition at line 145 of file communicator.hh.

                                                    {
  const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  for (unsigned iField=0; iField < nFields; ++iField) {
    auto& sourceField = sourceFields[iField];
    auto& targetField = targetFields[iField];
    for (unsigned iD=0; iD < sourceField.column_count; ++iD) {
      memcpy(targetField[iD] + targetIndices[iIndex]*sourceField.element_size,
             sourceField[iD] + sourceIndices[iIndex]*sourceField.element_size,
             sourceField.element_size);
    }
  }
}

◆ copy_field()

template<typename SOURCE , typename TARGET , typename FIELD >

void olb::gpu::cuda::kernel::copy_field	(	SOURCE	sourceLattice,
		TARGET	targetLattice,
		const CellID *	sourceIndices,
		const CellID *	targetIndices,
		std::size_t	nIndices )

CUDA kernel for copying FIELD data of sourceLattice at sourceIndices into targetLattice at targetIndices.

Definition at line 106 of file communicator.hh.

                                               {
  const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  auto* source = sourceLattice.template getField<FIELD>();
  auto* target = targetLattice.template getField<FIELD>();
  for (unsigned iD=0; iD < SOURCE::descriptor_t::template size<FIELD>(); ++iD) {
    target[iD][targetIndices[iIndex]] = source[iD][sourceIndices[iIndex]];
  }
}

◆ gather_any_fields()

__global__ void olb::gpu::cuda::kernel::gather_any_fields	(	AnyDeviceFieldArrayD *	fields,
		std::size_t	nFields,
		const CellID *	indices,
		std::size_t	nIndices,
		std::uint8_t *	buffer )

CUDA kernel for gathering fields at indices into buffer.

Definition at line 122 of file communicator.hh.

                                                      {
  const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  for (unsigned iField=0; iField < nFields; ++iField) {
    auto& field = fields[iField];
    for (unsigned iD=0; iD < field.column_count; ++iD) {
      memcpy(buffer + (iD*nIndices + iIndex)*field.element_size,
             field[iD] + indices[iIndex]*field.element_size,
             field.element_size);
    }
    buffer += nIndices*field.column_count*field.element_size;
  }
}

◆ gather_field()

template<typename CONTEXT , typename FIELD >

void olb::gpu::cuda::kernel::gather_field	(	CONTEXT	lattice,
		const CellID *	indices,
		std::size_t	nIndices,
		typename FIELD::template value_type< typename CONTEXT::value_t > *	buffer )

CUDA kernel for gathering FIELD data of lattice at indices into buffer.

Definition at line 91 of file communicator.hh.

                                                                                                 {
  const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  auto* field = lattice.template getField<FIELD>();
  for (unsigned iD=0; iD < CONTEXT::descriptor_t::template size<FIELD>(); ++iD) {
    buffer[iD*nIndices+iIndex] = field[iD][indices[iIndex]];
  }
}

◆ scatter_any_fields()

__global__ void olb::gpu::cuda::kernel::scatter_any_fields	(	AnyDeviceFieldArrayD *	fields,
		std::size_t	nFields,
		const CellID *	indices,
		std::size_t	nIndices,
		std::uint8_t *	buffer )

CUDA kernel for scattering fields in buffer to indices in lattice.

Definition at line 182 of file communicator.hh.

                                                       {
  const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  for (unsigned iField=0; iField < nFields; ++iField) {
    auto& field = fields[iField];
    for (unsigned iD=0; iD < field.column_count; ++iD) {
      memcpy(field[iD] + indices[iIndex]*field.element_size,
             buffer + (iD*nIndices + iIndex)*field.element_size,
             field.element_size);
    }
    buffer += nIndices*field.column_count*field.element_size;
  }
}

◆ scatter_field()

template<typename CONTEXT , typename FIELD >

void olb::gpu::cuda::kernel::scatter_field	(	CONTEXT	lattice,
		const CellID *	indices,
		std::size_t	nIndices,
		typename FIELD::template value_type< typename CONTEXT::value_t > *	buffer )

CUDA kernel for scattering FIELD data in buffer to indices in lattice.

Definition at line 168 of file communicator.hh.

                                                                                                  {
  const CellID iIndex = blockIdx.x * blockDim.x + threadIdx.x;
  if (!(iIndex < nIndices)) {
    return;
  }
  auto* field = lattice.template getField<FIELD>();
  for (unsigned iD=0; iD < CONTEXT::descriptor_t::template size<FIELD>(); ++iD) {
    field[iD][indices[iIndex]] = buffer[iD*nIndices+iIndex];
  }
}

Functions

Detailed Description

Function Documentation

◆ call_coupling_operators()

◆ call_list_operators()

◆ call_list_operators_with_statistics()

◆ call_operators()

◆ call_operators_with_statistics()

◆ construct_dynamics()

◆ copy_any_fields()

◆ copy_field()

◆ gather_any_fields()

◆ gather_field()

◆ scatter_any_fields()

◆ scatter_field()