24#ifndef SIMD_PACK_512_H_
25#define SIMD_PACK_512_H_
38template <
typename T>
class Mask;
39template <
typename T>
class Pack;
48 static constexpr unsigned storage_size = 8;
53 for (
unsigned j=1; j < storage_size; ++j) {
54 mask |= value[j] << j;
59 Mask(
bool b0,
bool b1,
bool b2,
bool b3,
bool b4,
bool b5,
bool b6,
bool b7):
60 _reg(std::uint16_t(b0 | b1<<1 | b2<<2 | b3<<3 | b4<<4 | b5<<5 | b6<<6 | b7<<7)) { }
63 _reg(_load_mask16(reinterpret_cast<std::uint16_t*>(ptr))) { }
66 Mask(ptr + iCell / storage_size) { }
78 return _knot_mask8(_reg);
83 const std::uint8_t* value =
reinterpret_cast<const std::uint8_t*
>(&_reg);
95 static constexpr unsigned storage_size = 16;
100 for (
unsigned j=1; j < storage_size; ++j) {
101 mask |= value[j] << j;
107 _reg(_load_mask16(ptr)) { }
110 Mask(ptr + iCell / storage_size) { }
122 return _knot_mask16(_reg);
125 operator bool()
const
127 const std::uint16_t* value =
reinterpret_cast<const std::uint16_t*
>(&_reg);
128 return value[0] != 0;
134class Pack<double> :
public SimdBase {
142 static constexpr std::size_t size = 8;
150 Pack(_mm512_set1_pd(val)) { }
153 Pack(static_cast<double>(val)) { }
156 Pack(static_cast<double>(val)) { }
158 Pack(
double a,
double b,
double c,
double d,
double e,
double f,
double g,
double h):
159 Pack(_mm512_set_pd(h,g,f,e,d,c,b,a)) { }
162 Pack(_mm512_loadu_pd(ptr)) { }
165 Pack(_mm512_i32gather_pd(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(idx)), ptr, sizeof(double))) { }
180 return reinterpret_cast<const double*
>(&_reg)[i];
185 return Pack(_mm512_add_pd(_reg, rhs));
190 _reg = _mm512_add_pd(_reg, rhs);
196 return Pack(_mm512_sub_pd(_reg, rhs));
201 _reg = _mm512_sub_pd(_reg, rhs);
207 return Pack(_mm512_mul_pd(_reg, rhs));
212 _reg = _mm512_mul_pd(_reg, rhs);
218 return Pack(_mm512_div_pd(_reg, rhs));
223 _reg = _mm512_div_pd(_reg, rhs);
229 return *
this *
Pack(-1);
234 return _mm512_sqrt_pd(_reg);
239class Pack<float> :
public SimdBase {
247 static constexpr std::size_t size = 16;
255 Pack(_mm512_set1_ps(val)) { }
258 Pack(static_cast<float>(val)) { }
261 Pack(static_cast<float>(val)) { }
264 Pack(static_cast<float>(val)) { }
266 Pack(
float a,
float b,
float c,
float d,
float e,
float f,
float g,
float h,
float i,
float j,
float k,
float l,
float m,
float n,
float o,
float p):
267 Pack(_mm512_set_ps(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a)) { }
270 Pack(_mm512_loadu_ps(ptr)) { }
273 Pack(_mm512_i32gather_ps(_mm512_loadu_si512(reinterpret_cast<const __m512i*>(idx)), ptr, sizeof(float))) { }
288 return reinterpret_cast<const float*
>(&_reg)[i];
293 return Pack(_mm512_add_ps(_reg, rhs));
298 _reg = _mm512_add_ps(_reg, rhs);
304 return Pack(_mm512_sub_ps(_reg, rhs));
309 _reg = _mm512_sub_ps(_reg, rhs);
315 return Pack(_mm512_mul_ps(_reg, rhs));
320 _reg = _mm512_mul_ps(_reg, rhs);
326 return Pack(_mm512_div_ps(_reg, rhs));
331 _reg = _mm512_div_ps(_reg, rhs);
337 return *
this *
Pack(-1);
342 return _mm512_sqrt_ps(_reg);
348Pack<T>
pow(Pack<T> base, Pack<T> exp)
350 if constexpr (std::is_same_v<T,double>) {
351 return _mm512_pow_pd(base, exp);
353 return _mm512_pow_ps(base, exp);
358Pack<T>
min(Pack<T> rhs, Pack<T> lhs)
360 if constexpr (std::is_same_v<T,double>) {
361 return _mm512_min_pd(rhs, lhs);
363 return _mm512_min_ps(rhs, lhs);
368Pack<T>
max(Pack<T> rhs, Pack<T> lhs)
370 if constexpr (std::is_same_v<T,double>) {
371 return _mm512_max_pd(rhs, lhs);
373 return _mm512_max_ps(rhs, lhs);
378Pack<T>
fabs(Pack<T> x)
380 if constexpr (std::is_same_v<T,double>) {
381 return _mm512_abs_pd(x);
383 return _mm512_abs_ps(x);
388void maskstore(T* target, Mask<T> mask, Pack<T> value);
393 _mm512_mask_storeu_pd(target, mask, value);
399 _mm512_mask_storeu_ps(target, mask, value);
404void store(T* target, Pack<T> value);
409 _mm512_storeu_pd(target, value);
415 _mm512_storeu_ps(target, value);
420void store(T* target, Pack<T> value,
const typename Pack<T>::index_t* indices);
425 _mm512_i32scatter_pd(target, _mm256_loadu_si256(
reinterpret_cast<const __m256i*
>(indices)), value,
sizeof(
double));
432 _mm512_i32scatter_ps(target, _mm512_loadu_si512(
reinterpret_cast<const __m512i*
>(indices)), value,
sizeof(
float));
Mask(storage_t *ptr, std::size_t iCell)
static storage_t encode(bool *value)
Mask(bool b0, bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7)
Mask(storage_t *ptr, std::size_t iCell)
static storage_t encode(bool *value)
Pack & operator+=(Pack rhs)
Pack operator/=(Pack rhs)
Pack operator/(Pack rhs) const
Pack operator+(Pack rhs) const
double operator[](unsigned i) const
Pack operator-=(Pack rhs)
Pack operator-(Pack rhs) const
Pack & operator=(Pack rhs)
Pack(const double *ptr, const index_t *idx)
Pack operator*(Pack rhs) const
Pack & operator*=(Pack rhs)
Pack(double a, double b, double c, double d, double e, double f, double g, double h)
Pack operator*(Pack rhs) const
Pack & operator*=(Pack rhs)
Pack & operator+=(Pack rhs)
Pack operator+(Pack rhs) const
Pack & operator-=(Pack rhs)
Pack operator/(Pack rhs) const
Pack operator-(Pack rhs) const
Pack(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p)
Pack(const float *ptr, const index_t *idx)
Pack & operator/=(Pack rhs)
Pack & operator=(Pack rhs)
float operator[](unsigned i) const
void maskstore< double >(double *target, Mask< double > mask, Pack< double > value)
Pack< T > pow(Pack< T > base, Pack< T > exp)
void store< double >(double *target, Pack< double > value)
Pack< T > min(Pack< T > rhs, Pack< T > lhs)
Pack< T > max(Pack< T > rhs, Pack< T > lhs)
void maskstore< float >(float *target, Mask< float > mask, Pack< float > value)
void store(T *target, Pack< T > value)
void store< float >(float *target, Pack< float > value)
Pack< T > fabs(Pack< T > x)
void maskstore(T *target, Mask< T > mask, Pack< T > value)
Top level namespace for all of OpenLB.