Skip to content

Reply To: Manipulating populations with postprocessors on GPU

Due to recent bot attacks we have changed the sign-up process. If you want to participate in our forum, first register on this website and then send a message via our contact form.

Forums OpenLB General Topics Manipulating populations with postprocessors on GPU Reply To: Manipulating populations with postprocessors on GPU

#10937
jmctighe
Participant

Hi Adrian,

Here are the contents of the config.mk file currently setup for debugging (the same problem persists with -O3 optimization):

CXX             := nvcc
CC              := nvcc

CXXFLAGS        := -O0
CXXFLAGS        += -g -G -std=c++20 --forward-unknown-to-host-compiler

PARALLEL_MODE   := NONE

PLATFORMS       := CPU_SISD GPU_CUDA

# for e.g. RTX 30* (Ampere), see table in <code>rules.mk</code> for other options
CUDA_ARCH       := 90

FLOATING_POINT_TYPE := float

USE_EMBEDDED_DEPENDENCIES := ON

Below is the code for the entire post processor. The post processor itself is added in a function called “setShearPeriodic” which is very similar to the setTurbulentWallModel function for the implementation of the turbulent wall model post processor. I am using the same portion of code in setTurbulentWallModel to compute the surface normal vectors before adding the post processor.

#ifndef SHEAR_PERIODIC_POST_PROCESSOR_H
#define SHEAR_PERIODIC_POST_PROCESSOR_H

#include "core/util.h"
#include <math.h>

namespace olb {

  template <typename CELL, typename V = typename CELL::value_t, typename DESCRIPTOR = typename CELL::descriptor_t>
  void interpolateDF(CELL& cell, Vector<V,DESCRIPTOR::d> distance, V f_tilde[DESCRIPTOR::q]) any_platform{
    Vector<V,DESCRIPTOR::d> point1;
    for( int iD = 0; iD < DESCRIPTOR::d; iD++ ) {
      point1[iD] = util::floor(distance[iD]);
    }
    V q = distance[0] - point1[0];
    Vector<V,DESCRIPTOR::d> point2 = point1;
    point2[0] += 1;
    auto f_x1 = cell.neighbor(point1);
    auto f_x2 = cell.neighbor(point2);

    for(int iPop = 0; iPop < DESCRIPTOR::q; iPop++ ) {
      f_tilde[iPop] = f_x1[iPop]*(V{1}-q) + f_x2[iPop]*q;
    }
    
    
  };
  
//======================================================================
// ========== Shear Periodic (Lees-Edwards) BC PostProcessor =========//
//======================================================================
class shearPeriodicPostProcessor {
public:
  static constexpr OperatorScope scope = OperatorScope::PerCellWithParameters;
  using parameters = meta::list<descriptors::LATTICE_TIME>;

  int getPriority() const {
    return 1;
  }

  template <typename CELL, typename PARAMETERS, typename V = typename CELL::value_t>
  void apply(CELL& cell, PARAMETERS& parameters) any_platform {
    // Step 1: Find location x_m (y2)
    using DESCRIPTOR = typename CELL::descriptor_t;
    int iT = parameters.template get<descriptors::LATTICE_TIME>();
    auto extents = cell.template getField<DOMAIN_EXTENT>();
    auto latticeShear = cell.template getField<LATTICE_SHEAR>();
    auto y1 = cell.template getField<descriptors::Y1>();
    Vector<V,DESCRIPTOR::d> latticeR = cell.template getField<descriptors::LOCATION>();

    V y1Norm = util::norm<DESCRIPTOR::d>(y1);
    Vector<V,DESCRIPTOR::d> normal;
    Vector<V,DESCRIPTOR::d> tangent;
    for ( int iD = 0; iD < DESCRIPTOR::d; iD++ ) {
      normal[iD] = y1[iD] / y1Norm;
    }
    // This only works when shear is du/dy
    tangent[0] = normal[1];
    tangent[1] = 0.;
    tangent[2] = 0.;
    // Note that this only works for my exact case
    V x_m = fmod(3 + latticeR[0] + tangent[0]*latticeShear*extents[1]*iT,extents[0] + 6) - 3;
    if (x_m < -3){
      x_m += extents[0] + 6;
    }
    x_m -= latticeR[0];
    Vector<V,DESCRIPTOR::d> dx_m;
    dx_m[0] = x_m;
    dx_m[1] = 0.;
    dx_m[2] = 0.;
    Vector<V,DESCRIPTOR::d> distance = extents[1]*normal + dx_m;
    V f_tilde[DESCRIPTOR::q] = {0.};
    // Step 2: interpolate value f_tilde
    interpolateDF(cell, distance, f_tilde);
    
    // Step 3: Galiliean Transform of f_tilde -> f_xm
    V drho = V(0.); // density from the temporary DF    
    Vector<V,DESCRIPTOR::d> u_xm; // u from the temporary DF

    for (int iPop = 0; iPop < DESCRIPTOR::q; ++iPop){
      drho += f_tilde[iPop];
      u_xm += f_tilde[iPop]*descriptors::c<DESCRIPTOR>(iPop);
    }
    u_xm /= 1; 
    Vector<V,DESCRIPTOR::d> u_xt = u_xm - latticeShear*(extents[1]+1)*tangent;

    auto uDotU_xm = util::dotProduct(u_xm,u_xm);
    auto uDotU_xt = util::dotProduct(u_xt,u_xt);
    auto invCs2 = descriptors::invCs2<V,DESCRIPTOR>();
    // Equilibrium DFs
    V f_eq_xm = V(0.);
    V f_eq_xt = V(0.);

    V cDotU_xm= V(0.);
    V cDotU_xt= V(0.);
    V t = V(0.);
            
    for (int iPop = 0; iPop < DESCRIPTOR::q; iPop++){
      Vector<V,DESCRIPTOR::d> c = descriptors::c<DESCRIPTOR>(iPop);
      t = descriptors::t<V,DESCRIPTOR>(iPop);
      cDotU_xm = util::dotProduct(c,u_xm);
      cDotU_xt = util::dotProduct(c,u_xt);
      f_eq_xm = drho * t + 
                1 * t * (cDotU_xm + util::pow(cDotU_xm,2)*(0.5 * invCs2) - uDotU_xm * 0.5)*invCs2;
      f_eq_xt = drho * t + 
                1 * t * (cDotU_xt + util::pow(cDotU_xt,2)*(0.5 * invCs2) - uDotU_xt * 0.5)*invCs2;    
      // Step 4: Copy f to positions x_t
      cell.neighbor(-1 * normal)[iPop] = f_tilde[iPop] - f_eq_xm + f_eq_xt;
    }
  }
};

}
#endif

Please let me know if you need any more portions of the code and thank you for the help!
-jmctighe