Reply To: Problem while running code with GPU but running well using CPU
› Forums › OpenLB › Bug Reports › Problem while running code with GPU but running well using CPU › Reply To: Problem while running code with GPU but running well using CPU
This is the code I have modified
/* Lattice Boltzmann sample, written in C++, using the OpenLB
* library
*
* Copyright (C) 2006-2021 Nicolas Hafen, Mathias J. Krause
* E-mail contact: info@openlb.net
* The most recent release of OpenLB can be downloaded at
* <http://www.openlb.net/>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the Free
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/* settlingCube3d.cpp:
* The case examines the settling of a cubical silica particle
* under the influence of gravity.
* The object is surrounded by water in a rectangular domain
* limited by no-slip boundary conditions.
* For the calculation of forces an DNS approach is chosen
* which also leads to a back-coupling of the particle on the fluid,
* inducing a flow.
*
* The simulation is based on the homogenised lattice Boltzmann approach
* (HLBM) introduced in “Particle flow simulations with homogenised
* lattice Boltzmann methods” by Krause et al.
* and extended in “Towards the simulation of arbitrarily shaped 3D particles
* using a homogenised lattice Boltzmann method” by Trunk et al.
* for the simulation of 3D particles.
*
* This example demonstrates the usage of HLBM in the OpenLB framework.
* To improve parallel performance, the particle decomposition scheme
* described in 10.48550/arXiv.2312.14172 is used.
*/
#include “olb3D.h”
#include “olb3D.hh” // use generic version only!
using namespace olb;
using namespace olb::descriptors;
using namespace olb::graphics;
using namespace olb::util;
using namespace olb::particles;
using namespace olb::particles::dynamics;
using namespace std;
using T = FLOATING_POINT_TYPE;
// Define lattice type
typedef PorousParticleD3Q19Descriptor DESCRIPTOR;
// Define particleType
#ifdef PARALLEL_MODE_MPI
// Particle decomposition improves parallel performance
typedef ResolvedDecomposedParticle3D PARTICLETYPE;
#else
typedef ResolvedParticle3D PARTICLETYPE;
#endif
#define WriteVTK
#define WriteGnuPlot
//std::string gnuplotFilename = “gnuplot.dat”;
// Discretization Settings
int res = 800;
T const charLatticeVelocity =0.03;
// Time Settings
T const maxPhysT = 0.15; // max. simulation time in s
T const iTwrite = 0.001; // write out intervall in s
// Domain Settings
T const lengthX = 0.1016;
T const lengthY = 0.1016;
T const lengthZ = 0.1016;
// Fluid Settings
T const physDensity = 1000;
T const physViscosity = 1E-6;
T const sphered = 0.00635/2;
//Particle Settings
T centerX = lengthX*.5;
T centerY =0.0079268;
T centerZ = lengthZ*.5;
T const sphereDensity = 4000;
Vector<T,3> sphereCenter = {centerX,centerY,centerZ};
// External acceleration: gravity
Vector<T,3> externalAcceleration = {.0, .0, -9.81};
// Particle settings: ensure initial velocity is zero
Vector<T,3> sphereVelocity = {0., -0.07884, 0.07884};
// Characteristic Quantities./s
T const charPhysLength = lengthX;
T const charPhysVelocity = 0.2; // Assumed maximal velocity
// Prepare geometry
void prepareGeometry(UnitConverter<T,DESCRIPTOR> const& converter,
SuperGeometry<T,3>& superGeometry)
{
OstreamManager clout(std::cout, “prepareGeometry”);
clout << “Prepare Geometry …” << std::endl;
superGeometry.rename(0, 2);
superGeometry.rename(2, 1, {1, 1, 1});
superGeometry.clean();
superGeometry.innerClean();
superGeometry.checkForErrors();
superGeometry.getStatistics().print();
clout << “Prepare Geometry … OK” << std::endl;
return;
}
// Set up the geometry of the simulation
void prepareLattice(
SuperLatticeGpu<T, DESCRIPTOR> sLattice(superGeometry);
, UnitConverter<T,DESCRIPTOR> const& converter,
SuperGeometry<T,3>& superGeometry)
{
OstreamManager clout(std::cout, “prepareLattice”); //output information control
clout << “Prepare Lattice …” << std::endl; //out prepare information
clout << “setting Velocity Boundaries …” << std::endl; //velocity boundaries
/// Material=0 –>do nothing
sLattice.defineDynamics<PorousParticleBGKdynamics>(superGeometry, 1); //dynamics module
setBounceBackBoundary(sLattice, superGeometry, 2); // bounce boundary
sLattice.setParameter<descriptors::OMEGA>(converter.getLatticeRelaxationFrequency());//relax OMEGA
{
auto& communicator = sLattice.getCommunicator(stage::PostPostProcess());
communicator.requestFields<POROSITY,VELOCITY_NUMERATOR,VELOCITY_DENOMINATOR>();//kongxilv sudufenzi,fenmu
communicator.requestOverlap(sLattice.getOverlap());//overlap area
communicator.exchangeRequests();
}
clout << “Prepare Lattice … OK” << std::endl; //papare ok
}
//Set Boundary Values
void setBoundaryValues(SuperLattice<T, DESCRIPTOR>& sLattice,
UnitConverter<T,DESCRIPTOR> const& converter, int iT,
SuperGeometry<T,3>& superGeometry)
{
OstreamManager clout(std::cout, “setBoundaryValues”);
if (iT == 0) {
AnalyticalConst3D<T, T> zero(0.);
AnalyticalConst3D<T, T> one(1.);
sLattice.defineField<descriptors::POROSITY>(superGeometry.getMaterialIndicator({0,1,2}), one);
// Set initial condition
AnalyticalConst3D<T, T> ux(0.);
AnalyticalConst3D<T, T> uy(0.);
AnalyticalConst3D<T, T> uz(0.);
AnalyticalConst3D<T, T> rho(1.);
AnalyticalComposed3D<T, T> u(ux, uy, uz);
//Initialize all values of distribution functions to their local equilibrium
sLattice.defineRhoU(superGeometry, 1, rho, u);
sLattice.iniEquilibrium(superGeometry, 1, rho, u);
// Make the lattice ready for simulation
sLattice.initialize();
}
}
/// Computes the pressure drop between the voxels before and after the cylinder
void getResults(SuperLattice<T, DESCRIPTOR>& sLattice,
UnitConverter<T,DESCRIPTOR> const& converter, int iT,
SuperGeometry<T,3>& superGeometry, Timer<T>& timer,
XParticleSystem<T, PARTICLETYPE>& xParticleSystem )
{
OstreamManager clout(std::cout, “getResults”); //information control
#ifdef WriteVTK
SuperVTMwriter3D<T> vtkWriter(“sedimentation”);
SuperLatticePhysVelocity3D<T, DESCRIPTOR> velocity(sLattice, converter);
SuperLatticePhysPressure3D<T, DESCRIPTOR> pressure(sLattice, converter);
SuperLatticePhysExternalPorosity3D<T, DESCRIPTOR> externalPor(sLattice, converter);
vtkWriter.addFunctor(velocity);
vtkWriter.addFunctor(pressure);
vtkWriter.addFunctor(externalPor);
if (iT == 0) {
SuperLatticeGeometry3D<T, DESCRIPTOR> geometry(sLattice, superGeometry);
SuperLatticeCuboid3D<T, DESCRIPTOR> cuboid(sLattice);
SuperLatticeRank3D<T, DESCRIPTOR> rank(sLattice);
vtkWriter.write(geometry);
vtkWriter.write(cuboid);
vtkWriter.write(rank);
vtkWriter.createMasterFile();
}
if (iT % converter.getLatticeTime(iTwrite) == 0) {
vtkWriter.write(iT);
}
#endif
#ifdef PARALLEL_MODE_MPI
communication::forParticlesInSuperParticleSystem<
T, PARTICLETYPE, conditions::valid_particle_centres>(
xParticleSystem,
[&](Particle<T, PARTICLETYPE>& particle,
ParticleSystem<T, PARTICLETYPE>& particleSystem, int globiC) {
io::printResolvedParticleInfo(particle);
});
#else
for (std::size_t iP=0; iP<xParticleSystem.size(); ++iP) {
auto particle = xParticleSystem.get(iP);
// io::printResolvedParticleInfo(particle);
}
#endif
} // 函数结束的正确闭包
int main(int argc, char* argv[])
{
/// === 1st Step: Initialization ===
olbInit(&argc, &argv);
singleton::directories().setOutputDir(“./tmp/”);
OstreamManager clout(std::cout, “main”);
// Define restitution coefficient
T restitutionCoefficient1 = 0.87;
T restitutionCoefficient2 = 0.8;
clout << “Restitution coefficient: ” << restitutionCoefficient1 << std::endl;
// Set bounce to 0
T bounce = 0.0;
clout << “Bounce: ” << bounce << std::endl;
// Calculate cell-wall position
T cellWallPosition = centerY – sphered;
clout << “Cell-wall position: ” << cellWallPosition << std::endl;
// New flag to track bounce
// bool hasBounced = false; // Initially, the particle has not bounced
UnitConverterFromResolutionAndLatticeVelocity<T,DESCRIPTOR> converter(
(int) res, //resolution
( T ) charLatticeVelocity, //charLatticeVelocity
( T ) charPhysLength, //charPhysLength
( T ) charPhysVelocity, //charPhysVelocity
( T ) physViscosity, //physViscosity
( T ) physDensity //physDensity
);
converter.print();
/// === 2rd Step: Prepare Geometry ===
/// Instantiation of a cuboidGeometry with weights
Vector<T,3> origin( 0. );
Vector<T,3> extend( lengthX, lengthY, lengthZ );
IndicatorCuboid3D<T> cuboid(extend, origin);
#ifdef PARALLEL_MODE_MPI
CuboidGeometry3D<T> cuboidGeometry(cuboid, converter.getConversionFactorLength(), singleton::mpi().getSize());
#else
CuboidGeometry3D<T> cuboidGeometry(cuboid, converter.getConversionFactorLength(), 7);
#endif
cuboidGeometry.print();
HeuristicLoadBalancer<T> loadBalancer(cuboidGeometry);
SuperGeometry<T,3> superGeometry(cuboidGeometry, loadBalancer, 2);
prepareGeometry(converter, superGeometry);
/// === 3rd Step: Prepare Lattice ===
SuperLattice<T, DESCRIPTOR> sLattice(superGeometry);
// Prepare lattice
prepareLattice(sLattice, converter, superGeometry);
// Create ParticleSystem
#ifdef PARALLEL_MODE_MPI
SuperParticleSystem<T,PARTICLETYPE> particleSystem(superGeometry);
#else
ParticleSystem<T,PARTICLETYPE> particleSystem;
#endif
//Create particle manager handling coupling, gravity and particle dynamics
ParticleManager<T,DESCRIPTOR,PARTICLETYPE> particleManager(
particleSystem, superGeometry, sLattice, converter, externalAcceleration);
// Create and assign resolved particle dynamics
particleSystem.defineDynamics<
VerletParticleDynamics<T,PARTICLETYPE>>();
// Calculate particle quantities
T epsilon = 0.5*converter.getConversionFactorLength();
Vector<T,3> sphereExtend( sphered );
// Create Particle 1
creators::addResolvedSphere3D(particleSystem, sphereCenter, sphered, epsilon, sphereDensity);
auto particle = particleSystem.get(0);
particle.template setField<MOBILITY,VELOCITY>(sphereVelocity);
// Check ParticleSystem
particleSystem.checkForErrors();
/// === 4th Step: Main Loop with Timer ===
Timer<T> timer(converter.getLatticeTime(maxPhysT), superGeometry.getStatistics().getNvoxel());
timer.start();
/// === 5th Step: Definition of Initial and Boundary Conditions ===
setBoundaryValues(sLattice, converter, 0, superGeometry);
clout << “MaxIT: ” << converter.getLatticeTime(maxPhysT) << std::endl;
std::ofstream outputFile(“particle_data.dat”);
if (!outputFile.is_open()) {
std::cerr << “Failed to open the file for writing.” << std::endl;
return 1;
}
// Define the total physical time for uniform motion
T uniformMotionTime = 0.02; // 0.02 seconds of uniform motion
// Adjust the time step to ensure that the total motion time is consistent across resolutions
T adjustedTimeStep = uniformMotionTime / (converter.getPhysTime(iTwrite)); // Time adjustment factor based on lattice time step
// Initialize the elapsed time and velocity for uniform motion
T elapsedTime = 0.0;
Vector<T, 3> constantVelocity = sphereVelocity; // 水平和垂直速度分别为0.12
size_t writeStep = converter.getLatticeTime(iTwrite);
if (writeStep < 1) writeStep = 1; // 避免 mod 0 错误
// 确保 tau > 0.5
T tau = converter.getLatticeRelaxationFrequency();
if (tau <= 0.5) {
clout << “Warning: Tau is too small! Current Tau: ” << tau << std::endl;
exit(1);
}
// 确保 Ma < 0.1
T cs = 1.0 / sqrt(3.0);
T maxMach = charLatticeVelocity / cs;
if (maxMach >= 0.1) {
clout << “Warning: Ma = ” << maxMach << ” is too large! Reduce charLatticeVelocity.” << std::endl;
exit(1);
}
// Modify the main loop to ensure uniform motion during the specified time
for (std::size_t iT = 0; iT < converter.getLatticeTime(maxPhysT) + 10; ++iT) {
particleManager.execute<
couple_lattice_to_particles<T, DESCRIPTOR, PARTICLETYPE>, // 计算流体对粒子的力
#ifdef PARALLEL_MODE_MPI
communicate_surface_force<T, PARTICLETYPE>,
#endif
//apply_hydrodynamic_force<T, PARTICLETYPE>, // 应用流体阻力
process_dynamics<T, PARTICLETYPE>,
#ifdef PARALLEL_MODE_MPI
update_particle_core_distribution<T, PARTICLETYPE>,
#endif
couple_particles_to_lattice<T, DESCRIPTOR, PARTICLETYPE>
>();
//if (iT % converter.getLatticeTime(iTwrite) == 0) {
if (iT % 1 == 0) {
if (singleton::mpi().getRank() == 0) {
elapsedTime = converter.getPhysTime(iT);
if (elapsedTime <= uniformMotionTime) {
auto particle = particleSystem.get(0); // 获取第一个粒子
Vector<T, 3> position = particle.template getField<GENERAL, POSITION>();
Vector<T, 3> velocity = constantVelocity; // 设定匀速运动速度
// Update the particle’s position with the adjusted time step
// position += velocity * adjustedTimeStep; // Adjusting the step for consistent physical time
particle.template setField<GENERAL, POSITION>(position);
particle.template setField<MOBILITY, VELOCITY>(velocity);
elapsedTime += adjustedTimeStep; // Update elapsed time with adjusted step
}
// Execute particle manager
auto particle = particleSystem.get(0); // 从粒子系统中获取粒子
Vector<T, 3> position = particle.template getField<GENERAL, POSITION>();
Vector<T, 3> velocity = particle.template getField<MOBILITY, VELOCITY>();
T bottomPosition = position[1] – sphered;
T epsilon = 2e-4; // 容差值
if (bottomPosition <= epsilon) { // 外层判断:检测是否碰到墙壁
if (velocity[1] < 0 && bounce < 1) { // 内层判断:速度方向向下,且未达到最大反弹次数
// 打印碰撞前的信息
std::cout << “Collision detected!” << std::endl;
std::cout << “Velocity before bounce: ” << velocity[1]
<< “, Position before bounce: ” << position[1] << std::endl;
// 反转速度并应用回弹系数
velocity[1] = -velocity[1] * restitutionCoefficient1;
velocity[2] = velocity[2] * restitutionCoefficient2;
// 更新粒子的位置和速度
particle.template setField<GENERAL, POSITION>(position);
particle.template setField<MOBILITY, VELOCITY>(velocity);
bounce += 1;
// 打印碰撞后的信息
std::cout << “New velocity after bounce: ” << velocity[1]
<< “, Position after bounce: ” << position[1] << std::endl;
std::cout << “Bounce count: ” << bounce << std::endl;
}
}
if (bounce > 0 && velocity[1] < 0.03) { // 检查水平速度是否小于 0.05
velocity[1] *= 2; // 将水平速度乘以 2
particle.template setField<MOBILITY, VELOCITY>(velocity); // 更新速度
}
// 输出当前时间和粒子的位置、速度
std::cout << “Time: ” << converter.getPhysTime(iT)
<< ” Current position: ” << position[1] << “, ” << position[2]
<< “, Velocity: ” << velocity[1] << “, ” << velocity[2] << std::endl;
Vector<T, 3> force = particle.template getField<FORCING, FORCE>();
Vector<T, 3> acceleration = particle.template getField<MOBILITY, ACCELERATION_STRD>();
// 输出粒子数据到文件
outputFile << converter.getPhysTime(iT) << ” ”
<< position[1] << ” ” << position[2] << ” ”
<< velocity[1] << ” ” << velocity[2] << ” ”
<< acceleration[1] << ” ” << acceleration[2] << ” ”
<< force[1] << ” ” << force[2] << std::endl;
}
}
// Get Results
getResults(sLattice, converter, iT, superGeometry, timer, particleSystem);
// Collide and stream
sLattice.collideAndStreamGpu();
}
outputFile.close();
timer.stop();
timer.printSummary();
}
