Reply To: Manipulating populations with postprocessors on GPU
› Forums › OpenLB › General Topics › Manipulating populations with postprocessors on GPU › Reply To: Manipulating populations with postprocessors on GPU
Hi Adrian,
Here are the contents of the config.mk file currently setup for debugging (the same problem persists with -O3 optimization):
CXX := nvcc
CC := nvcc
CXXFLAGS := -O0
CXXFLAGS += -g -G -std=c++20 --forward-unknown-to-host-compiler
PARALLEL_MODE := NONE
PLATFORMS := CPU_SISD GPU_CUDA
# for e.g. RTX 30* (Ampere), see table in <code>rules.mk</code> for other options
CUDA_ARCH := 90
FLOATING_POINT_TYPE := float
USE_EMBEDDED_DEPENDENCIES := ON
Below is the code for the entire post processor. The post processor itself is added in a function called “setShearPeriodic” which is very similar to the setTurbulentWallModel function for the implementation of the turbulent wall model post processor. I am using the same portion of code in setTurbulentWallModel to compute the surface normal vectors before adding the post processor.
#ifndef SHEAR_PERIODIC_POST_PROCESSOR_H
#define SHEAR_PERIODIC_POST_PROCESSOR_H
#include "core/util.h"
#include <math.h>
namespace olb {
template <typename CELL, typename V = typename CELL::value_t, typename DESCRIPTOR = typename CELL::descriptor_t>
void interpolateDF(CELL& cell, Vector<V,DESCRIPTOR::d> distance, V f_tilde[DESCRIPTOR::q]) any_platform{
Vector<V,DESCRIPTOR::d> point1;
for( int iD = 0; iD < DESCRIPTOR::d; iD++ ) {
point1[iD] = util::floor(distance[iD]);
}
V q = distance[0] - point1[0];
Vector<V,DESCRIPTOR::d> point2 = point1;
point2[0] += 1;
auto f_x1 = cell.neighbor(point1);
auto f_x2 = cell.neighbor(point2);
for(int iPop = 0; iPop < DESCRIPTOR::q; iPop++ ) {
f_tilde[iPop] = f_x1[iPop]*(V{1}-q) + f_x2[iPop]*q;
}
};
//======================================================================
// ========== Shear Periodic (Lees-Edwards) BC PostProcessor =========//
//======================================================================
class shearPeriodicPostProcessor {
public:
static constexpr OperatorScope scope = OperatorScope::PerCellWithParameters;
using parameters = meta::list<descriptors::LATTICE_TIME>;
int getPriority() const {
return 1;
}
template <typename CELL, typename PARAMETERS, typename V = typename CELL::value_t>
void apply(CELL& cell, PARAMETERS& parameters) any_platform {
// Step 1: Find location x_m (y2)
using DESCRIPTOR = typename CELL::descriptor_t;
int iT = parameters.template get<descriptors::LATTICE_TIME>();
auto extents = cell.template getField<DOMAIN_EXTENT>();
auto latticeShear = cell.template getField<LATTICE_SHEAR>();
auto y1 = cell.template getField<descriptors::Y1>();
Vector<V,DESCRIPTOR::d> latticeR = cell.template getField<descriptors::LOCATION>();
V y1Norm = util::norm<DESCRIPTOR::d>(y1);
Vector<V,DESCRIPTOR::d> normal;
Vector<V,DESCRIPTOR::d> tangent;
for ( int iD = 0; iD < DESCRIPTOR::d; iD++ ) {
normal[iD] = y1[iD] / y1Norm;
}
// This only works when shear is du/dy
tangent[0] = normal[1];
tangent[1] = 0.;
tangent[2] = 0.;
// Note that this only works for my exact case
V x_m = fmod(3 + latticeR[0] + tangent[0]*latticeShear*extents[1]*iT,extents[0] + 6) - 3;
if (x_m < -3){
x_m += extents[0] + 6;
}
x_m -= latticeR[0];
Vector<V,DESCRIPTOR::d> dx_m;
dx_m[0] = x_m;
dx_m[1] = 0.;
dx_m[2] = 0.;
Vector<V,DESCRIPTOR::d> distance = extents[1]*normal + dx_m;
V f_tilde[DESCRIPTOR::q] = {0.};
// Step 2: interpolate value f_tilde
interpolateDF(cell, distance, f_tilde);
// Step 3: Galiliean Transform of f_tilde -> f_xm
V drho = V(0.); // density from the temporary DF
Vector<V,DESCRIPTOR::d> u_xm; // u from the temporary DF
for (int iPop = 0; iPop < DESCRIPTOR::q; ++iPop){
drho += f_tilde[iPop];
u_xm += f_tilde[iPop]*descriptors::c<DESCRIPTOR>(iPop);
}
u_xm /= 1;
Vector<V,DESCRIPTOR::d> u_xt = u_xm - latticeShear*(extents[1]+1)*tangent;
auto uDotU_xm = util::dotProduct(u_xm,u_xm);
auto uDotU_xt = util::dotProduct(u_xt,u_xt);
auto invCs2 = descriptors::invCs2<V,DESCRIPTOR>();
// Equilibrium DFs
V f_eq_xm = V(0.);
V f_eq_xt = V(0.);
V cDotU_xm= V(0.);
V cDotU_xt= V(0.);
V t = V(0.);
for (int iPop = 0; iPop < DESCRIPTOR::q; iPop++){
Vector<V,DESCRIPTOR::d> c = descriptors::c<DESCRIPTOR>(iPop);
t = descriptors::t<V,DESCRIPTOR>(iPop);
cDotU_xm = util::dotProduct(c,u_xm);
cDotU_xt = util::dotProduct(c,u_xt);
f_eq_xm = drho * t +
1 * t * (cDotU_xm + util::pow(cDotU_xm,2)*(0.5 * invCs2) - uDotU_xm * 0.5)*invCs2;
f_eq_xt = drho * t +
1 * t * (cDotU_xt + util::pow(cDotU_xt,2)*(0.5 * invCs2) - uDotU_xt * 0.5)*invCs2;
// Step 4: Copy f to positions x_t
cell.neighbor(-1 * normal)[iPop] = f_tilde[iPop] - f_eq_xm + f_eq_xt;
}
}
};
}
#endif
Please let me know if you need any more portions of the code and thank you for the help!
-jmctighe
