OpenLB 1.7
Loading...
Searching...
No Matches
Public Member Functions | List of all members
olb::HeterogeneousCopyTaskDataForGpuSource< T, DESCRIPTOR, TARGET > Class Template Reference

Private implementation of heterogeneous copy task between GPU_CUDA source and CPU_* target. More...

#include <communicator.hh>

+ Inheritance diagram for olb::HeterogeneousCopyTaskDataForGpuSource< T, DESCRIPTOR, TARGET >:
+ Collaboration diagram for olb::HeterogeneousCopyTaskDataForGpuSource< T, DESCRIPTOR, TARGET >:

Public Member Functions

 HeterogeneousCopyTaskDataForGpuSource (const std::vector< std::type_index > &fields, const std::vector< CellID > &targetCells, ConcreteBlockLattice< T, DESCRIPTOR, TARGET > &target, const std::vector< CellID > &sourceCells, ConcreteBlockLattice< T, DESCRIPTOR, Platform::GPU_CUDA > &source)
 
void copy () override
 
void wait () override
 
- Public Member Functions inherited from olb::ConcreteHeterogeneousCopyTask
virtual ~ConcreteHeterogeneousCopyTask ()
 

Detailed Description

template<typename T, typename DESCRIPTOR, Platform TARGET>
class olb::HeterogeneousCopyTaskDataForGpuSource< T, DESCRIPTOR, TARGET >

Private implementation of heterogeneous copy task between GPU_CUDA source and CPU_* target.

Definition at line 642 of file communicator.hh.

Constructor & Destructor Documentation

◆ HeterogeneousCopyTaskDataForGpuSource()

template<typename T , typename DESCRIPTOR , Platform TARGET>
olb::HeterogeneousCopyTaskDataForGpuSource< T, DESCRIPTOR, TARGET >::HeterogeneousCopyTaskDataForGpuSource ( const std::vector< std::type_index > & fields,
const std::vector< CellID > & targetCells,
ConcreteBlockLattice< T, DESCRIPTOR, TARGET > & target,
const std::vector< CellID > & sourceCells,
ConcreteBlockLattice< T, DESCRIPTOR, Platform::GPU_CUDA > & source )
inline

Definition at line 659 of file communicator.hh.

662 :
663 _sourceFields(source.getDataRegistry().deviceFieldArrays(fields)),
664 _onlyPopulationField(fields.size() == 1 && fields[0] == typeid(descriptors::POPULATION)),
665 _targetCells(targetCells),
666 _sourceCells(sourceCells),
667 _target(target, fields),
668 _source(source),
669 _stream(std::make_unique<gpu::cuda::device::Stream>(cudaStreamNonBlocking)),
670 _buffer(_target.size(_targetCells))
671 { }
auto & getDataRegistry()
Return reference to Data's FieldTypeRegistry.

Member Function Documentation

◆ copy()

template<typename T , typename DESCRIPTOR , Platform TARGET>
void olb::HeterogeneousCopyTaskDataForGpuSource< T, DESCRIPTOR, TARGET >::copy ( )
inlineoverridevirtual

Implements olb::ConcreteHeterogeneousCopyTask.

Definition at line 673 of file communicator.hh.

673 {
674 if (_onlyPopulationField) {
675 gpu::cuda::DeviceContext<T,DESCRIPTOR> lattice(_source);
676 gpu::cuda::async_gather_field<descriptors::POPULATION>(_stream->get(), lattice, _sourceCells, _buffer.deviceData());
677 } else {
678 gpu::cuda::async_gather_any_fields(_stream->get(), _sourceFields, _sourceCells, _buffer.deviceData());
679 }
680
682 }
const T * deviceData() const
Definition column.hh:146
void setProcessingContext(ProcessingContext)
Definition column.hh:158
void async_gather_any_fields(cudaStream_t stream, thrust::device_vector< AnyDeviceFieldArrayD > &fields, const thrust::device_vector< CellID > &indices, std::uint8_t *buffer)
Non-blocking gather of fields at given indices into buffer.

References olb::gpu::cuda::async_gather_any_fields(), olb::gpu::cuda::Column< T >::deviceData(), olb::Evaluation, and olb::gpu::cuda::Column< T >::setProcessingContext().

+ Here is the call graph for this function:

◆ wait()

template<typename T , typename DESCRIPTOR , Platform TARGET>
void olb::HeterogeneousCopyTaskDataForGpuSource< T, DESCRIPTOR, TARGET >::wait ( )
inlineoverridevirtual

Implements olb::ConcreteHeterogeneousCopyTask.

Definition at line 684 of file communicator.hh.

684 {
685 _stream->synchronize();
686 _target.deserialize(_targetCells, _buffer.data());
687 }
const T * data() const
Definition column.hh:134

References olb::gpu::cuda::Column< T >::data(), and olb::MultiConcreteCommunicatable< COMMUNICATEE >::deserialize().

+ Here is the call graph for this function:

The documentation for this class was generated from the following file: