24#ifndef SIMD_PACK_256_H_
25#define SIMD_PACK_256_H_
38template <
typename T>
class Mask;
39template <
typename T>
class Pack;
48 static constexpr unsigned storage_size = 1;
55 return value ? true_v : false_v;
60 return encode(*value);
63 Mask(
bool a,
bool b,
bool c,
bool d):
64 _reg(_mm256_set_epi64x(encode(d),encode(c),encode(b),encode(a))) { }
66 Mask(std::uint64_t a, std::uint64_t b, std::uint64_t c, std::uint64_t d):
67 _reg(_mm256_set_epi64x(d,c,b,a)) { }
70 _reg(_mm256_loadu_si256(reinterpret_cast<__m256i*>(ptr))) { }
76 Mask(ptr[0],ptr[1],ptr[2],ptr[3]) { }
78 Mask(
bool* ptr, std::size_t iCell):
91 return _mm256_sub_epi64(_mm256_set1_epi64x(true_v), _reg);
96 const std::uint64_t* values =
reinterpret_cast<const std::uint64_t*
>(&_reg);
97 return values[0] == true_v
98 || values[1] == true_v
99 || values[2] == true_v
100 || values[3] == true_v;
111 static constexpr unsigned storage_size = 1;
118 return value ? true_v : false_v;
123 return encode(*value);
126 Mask(
bool a,
bool b,
bool c,
bool d,
bool e,
bool f,
bool g,
bool h):
127 _reg(_mm256_set_epi32(encode(h),encode(g),encode(f),encode(e),encode(d),encode(c),encode(b),encode(a))) { }
130 _reg(_mm256_loadu_si256(reinterpret_cast<__m256i*>(ptr))) { }
133 Mask(ptr + iCell) { }
136 Mask(ptr[0],ptr[1],ptr[2],ptr[3],ptr[4],ptr[5],ptr[6],ptr[7]) { }
138 Mask(
bool* ptr, std::size_t iCell):
139 Mask(ptr + iCell) { }
151 return _mm256_sub_epi32(_mm256_set1_epi32(true_v), _reg);
154 operator bool()
const
156 const std::uint32_t* values =
reinterpret_cast<const std::uint32_t*
>(&_reg);
157 return values[0] == true_v
158 || values[1] == true_v
159 || values[2] == true_v
160 || values[3] == true_v
161 || values[4] == true_v
162 || values[5] == true_v
163 || values[6] == true_v
164 || values[7] == true_v;
168template <
typename T>
class Pack;
179 static constexpr std::size_t size = 4;
187 Pack(_mm256_set1_pd(val)) { }
190 Pack(static_cast<double>(val)) { }
193 Pack(static_cast<double>(val)) { }
195 Pack(
double a,
double b,
double c,
double d):
196 Pack(_mm256_set_pd(d,c,b,a)) { }
199 Pack(_mm256_loadu_pd(ptr)) { }
202 Pack(_mm256_i32gather_pd(ptr, _mm_loadu_si128(reinterpret_cast<const __m128i*>(idx)), sizeof(double))) { }
217 return reinterpret_cast<const double*
>(&_reg)[i];
222 return reinterpret_cast<double*
>(&_reg)[i];
227 return Pack(_mm256_add_pd(_reg, rhs));
232 _reg = _mm256_add_pd(_reg, rhs);
238 return Pack(_mm256_sub_pd(_reg, rhs));
243 _reg = _mm256_sub_pd(_reg, rhs);
249 return Pack(_mm256_mul_pd(_reg, rhs));
254 _reg = _mm256_mul_pd(_reg, rhs);
260 return Pack(_mm256_div_pd(_reg, rhs));
265 _reg = _mm256_div_pd(_reg, rhs);
271 return *
this *
Pack(-1);
276 return _mm256_sqrt_pd(_reg);
289 static constexpr std::size_t size = 8;
297 Pack(_mm256_set1_ps(val)) { }
300 Pack(static_cast<float>(val)) { }
303 Pack(static_cast<float>(val)) { }
306 Pack(static_cast<float>(val)) { }
308 Pack(
float a,
float b,
float c,
float d,
float e,
float f,
float g,
float h):
309 Pack(_mm256_set_ps(h,g,f,e,d,c,b,a)) { }
312 Pack(_mm256_loadu_ps(ptr)) { }
315 Pack(_mm256_i32gather_ps(ptr, _mm256_loadu_si256(reinterpret_cast<const __m256i*>(idx)), sizeof(float))) { }
330 return reinterpret_cast<const float*
>(&_reg)[i];
335 return reinterpret_cast<float*
>(&_reg)[i];
340 return Pack(_mm256_add_ps(_reg, rhs));
345 _reg = _mm256_add_ps(_reg, rhs);
351 return Pack(_mm256_sub_ps(_reg, rhs));
356 _reg = _mm256_sub_ps(_reg, rhs);
362 return Pack(_mm256_mul_ps(_reg, rhs));
367 _reg = _mm256_mul_ps(_reg, rhs);
373 return Pack(_mm256_div_ps(_reg, rhs));
378 _reg = _mm256_div_ps(_reg, rhs);
384 return *
this *
Pack(-1);
389 return _mm256_sqrt_ps(_reg);
399 for (
unsigned i=0; i < Pack<T>::size; ++i) {
408 if constexpr (std::is_same_v<T,double>) {
409 return _mm256_min_pd(rhs, lhs);
411 return _mm256_min_ps(rhs, lhs);
418 if constexpr (std::is_same_v<T,double>) {
419 return _mm256_max_pd(rhs, lhs);
421 return _mm256_max_ps(rhs, lhs);
437 _mm256_maskstore_pd(target, mask, value);
443 _mm256_maskstore_ps(target, mask, value);
453 _mm256_storeu_pd(target, value);
459 _mm256_storeu_ps(target, value);
470 _mm256_i32scatter_pd(target, _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(indices)), value,
sizeof(
double));
473 for (
unsigned i=0; i < simd::Pack<double>::size; ++i) {
474 target[indices[i]] = reg[i];
483 _mm256_i32scatter_ps(target, _mm256_loadu_si256(
reinterpret_cast<const __m256i*
>(indices)), value,
sizeof(
float));
486 for (
unsigned i=0; i < simd::Pack<float>::size; ++i) {
487 target[indices[i]] = reg[i];
Mask(storage_t *ptr, std::size_t iCell)
Mask(bool *ptr, std::size_t iCell)
static storage_t encode(bool *value)
static storage_t encode(bool value)
Mask(std::uint64_t a, std::uint64_t b, std::uint64_t c, std::uint64_t d)
Mask(bool a, bool b, bool c, bool d)
Mask(bool a, bool b, bool c, bool d, bool e, bool f, bool g, bool h)
static storage_t encode(bool value)
Mask(bool *ptr, std::size_t iCell)
Mask(storage_t *ptr, std::size_t iCell)
static storage_t encode(bool *value)
Pack & operator+=(Pack rhs)
Pack operator/(Pack rhs) const
Pack operator+(Pack rhs) const
double operator[](unsigned i) const
Pack operator-=(Pack rhs)
Pack operator-(Pack rhs) const
Pack & operator=(Pack rhs)
Pack(const double *ptr, const index_t *idx)
Pack & operator/=(Pack rhs)
Pack operator*(Pack rhs) const
Pack(double a, double b, double c, double d)
Pack & operator*=(Pack rhs)
double & operator[](unsigned i)
Pack operator*(Pack rhs) const
Pack & operator*=(Pack rhs)
Pack & operator+=(Pack rhs)
Pack operator+(Pack rhs) const
Pack & operator-=(Pack rhs)
Pack operator/(Pack rhs) const
float & operator[](unsigned i)
Pack operator-(Pack rhs) const
Pack(const float *ptr, const index_t *idx)
Pack & operator/=(Pack rhs)
Pack(float a, float b, float c, float d, float e, float f, float g, float h)
Pack & operator=(Pack rhs)
float operator[](unsigned i) const
void maskstore< double >(double *target, Mask< double > mask, Pack< double > value)
Pack< T > pow(Pack< T > base, Pack< T > exp)
void store< double >(double *target, Pack< double > value)
Pack< T > min(Pack< T > rhs, Pack< T > lhs)
Pack< T > max(Pack< T > rhs, Pack< T > lhs)
void maskstore< float >(float *target, Mask< float > mask, Pack< float > value)
void store(T *target, Pack< T > value)
void store< float >(float *target, Pack< float > value)
Pack< T > fabs(Pack< T > x)
void maskstore(T *target, Mask< T > mask, Pack< T > value)
cpu::simd::Pack< T > pow(cpu::simd::Pack< T > base, cpu::simd::Pack< T > exp)
Top level namespace for all of OpenLB.