OpenLB 1.7
Loading...
Searching...
No Matches
256.h
Go to the documentation of this file.
1/* This file is part of the OpenLB library
2 *
3 * Copyright (C) 2021 Adrian Kummerlaender
4 * E-mail contact: info@openlb.net
5 * The most recent release of OpenLB can be downloaded at
6 * <http://www.openlb.net/>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public
19 * License along with this program; if not, write to the Free
20 * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22*/
23
24#ifndef SIMD_PACK_256_H_
25#define SIMD_PACK_256_H_
26
27#include <immintrin.h>
28
29#include <cstdint>
30#include <type_traits>
31
32namespace olb {
33
34namespace cpu {
35
36namespace simd {
37
38template <typename T> class Mask;
39template <typename T> class Pack;
40
41template <>
42class Mask<double> {
43private:
44 __m256i _reg;
45
46public:
47 using storage_t = std::uint64_t;
48 static constexpr unsigned storage_size = 1;
49
50 static constexpr storage_t true_v = 1l << 63;
51 static constexpr storage_t false_v = 0l;
52
53 static storage_t encode(bool value)
54 {
55 return value ? true_v : false_v;
56 }
57
58 static storage_t encode(bool* value)
59 {
60 return encode(*value);
61 }
62
63 Mask(bool a, bool b, bool c, bool d):
64 _reg(_mm256_set_epi64x(encode(d),encode(c),encode(b),encode(a))) { }
65
66 Mask(std::uint64_t a, std::uint64_t b, std::uint64_t c, std::uint64_t d):
67 _reg(_mm256_set_epi64x(d,c,b,a)) { }
68
69 Mask(std::uint64_t* ptr):
70 _reg(_mm256_loadu_si256(reinterpret_cast<__m256i*>(ptr))) { }
71
72 Mask(storage_t* ptr, std::size_t iCell):
73 Mask(ptr + iCell) { }
74
75 Mask(bool* ptr):
76 Mask(ptr[0],ptr[1],ptr[2],ptr[3]) { }
77
78 Mask(bool* ptr, std::size_t iCell):
79 Mask(ptr + iCell) { }
80
81 Mask(__m256i reg):
82 _reg(reg) { }
83
84 operator __m256i()
85 {
86 return _reg;
87 }
88
89 __m256i neg() const
90 {
91 return _mm256_sub_epi64(_mm256_set1_epi64x(true_v), _reg);
92 }
93
94 operator bool() const
95 {
96 const std::uint64_t* values = reinterpret_cast<const std::uint64_t*>(&_reg);
97 return values[0] == true_v
98 || values[1] == true_v
99 || values[2] == true_v
100 || values[3] == true_v;
101 }
102};
103
104template <>
105class Mask<float> {
106private:
107 __m256i _reg;
108
109public:
110 using storage_t = std::uint32_t;
111 static constexpr unsigned storage_size = 1;
112
113 static constexpr storage_t true_v = 1 << 31;
114 static constexpr storage_t false_v = 0;
115
116 static storage_t encode(bool value)
117 {
118 return value ? true_v : false_v;
119 }
120
121 static storage_t encode(bool* value)
122 {
123 return encode(*value);
124 }
125
126 Mask(bool a, bool b, bool c, bool d, bool e, bool f, bool g, bool h):
127 _reg(_mm256_set_epi32(encode(h),encode(g),encode(f),encode(e),encode(d),encode(c),encode(b),encode(a))) { }
128
130 _reg(_mm256_loadu_si256(reinterpret_cast<__m256i*>(ptr))) { }
131
132 Mask(storage_t* ptr, std::size_t iCell):
133 Mask(ptr + iCell) { }
134
135 Mask(bool* ptr):
136 Mask(ptr[0],ptr[1],ptr[2],ptr[3],ptr[4],ptr[5],ptr[6],ptr[7]) { }
137
138 Mask(bool* ptr, std::size_t iCell):
139 Mask(ptr + iCell) { }
140
141 Mask(__m256i reg):
142 _reg(reg) { }
143
144 operator __m256i()
145 {
146 return _reg;
147 }
148
149 __m256i neg() const
150 {
151 return _mm256_sub_epi32(_mm256_set1_epi32(true_v), _reg);
152 }
153
154 operator bool() const
155 {
156 const std::uint32_t* values = reinterpret_cast<const std::uint32_t*>(&_reg);
157 return values[0] == true_v
158 || values[1] == true_v
159 || values[2] == true_v
160 || values[3] == true_v
161 || values[4] == true_v
162 || values[5] == true_v
163 || values[6] == true_v
164 || values[7] == true_v;
165 }
166};
167
168template <typename T> class Pack;
169
170template <>
171class Pack<double> : public SimdBase {
172private:
173 __m256d _reg;
174
175public:
177 using index_t = std::uint32_t;
178
179 static constexpr std::size_t size = 4;
180
181 Pack() = default;
182
183 Pack(__m256d reg):
184 _reg(reg) { }
185
186 Pack(double val):
187 Pack(_mm256_set1_pd(val)) { }
188
189 Pack(int val):
190 Pack(static_cast<double>(val)) { }
191
192 Pack(std::size_t val):
193 Pack(static_cast<double>(val)) { }
194
195 Pack(double a, double b, double c, double d):
196 Pack(_mm256_set_pd(d,c,b,a)) { }
197
198 Pack(const double* ptr):
199 Pack(_mm256_loadu_pd(ptr)) { }
200
201 Pack(const double* ptr, const index_t* idx):
202 Pack(_mm256_i32gather_pd(ptr, _mm_loadu_si128(reinterpret_cast<const __m128i*>(idx)), sizeof(double))) { }
203
204 operator __m256d()
205 {
206 return _reg;
207 }
208
210 {
211 _reg = rhs._reg;
212 return *this;
213 }
214
215 double operator[](unsigned i) const
216 {
217 return reinterpret_cast<const double*>(&_reg)[i];
218 }
219
220 double& operator[](unsigned i)
221 {
222 return reinterpret_cast<double*>(&_reg)[i];
223 }
224
225 Pack operator+(Pack rhs) const
226 {
227 return Pack(_mm256_add_pd(_reg, rhs));
228 }
229
231 {
232 _reg = _mm256_add_pd(_reg, rhs);
233 return *this;
234 }
235
236 Pack operator-(Pack rhs) const
237 {
238 return Pack(_mm256_sub_pd(_reg, rhs));
239 }
240
242 {
243 _reg = _mm256_sub_pd(_reg, rhs);
244 return *this;
245 }
246
247 Pack operator*(Pack rhs) const
248 {
249 return Pack(_mm256_mul_pd(_reg, rhs));
250 }
251
253 {
254 _reg = _mm256_mul_pd(_reg, rhs);
255 return *this;
256 }
257
258 Pack operator/(Pack rhs) const
259 {
260 return Pack(_mm256_div_pd(_reg, rhs));
261 }
262
264 {
265 _reg = _mm256_div_pd(_reg, rhs);
266 return *this;
267 }
268
270 {
271 return *this * Pack(-1);
272 }
273
274 Pack sqrt() const
275 {
276 return _mm256_sqrt_pd(_reg);
277 }
278};
279
280template <>
281class Pack<float> : public SimdBase {
282private:
283 __m256 _reg;
284
285public:
287 using index_t = std::uint32_t;
288
289 static constexpr std::size_t size = 8;
290
291 Pack() = default;
292
293 Pack(__m256 reg):
294 _reg(reg) { }
295
296 Pack(float val):
297 Pack(_mm256_set1_ps(val)) { }
298
299 Pack(double val):
300 Pack(static_cast<float>(val)) { }
301
302 Pack(int val):
303 Pack(static_cast<float>(val)) { }
304
305 Pack(std::size_t val):
306 Pack(static_cast<float>(val)) { }
307
308 Pack(float a, float b, float c, float d, float e, float f, float g, float h):
309 Pack(_mm256_set_ps(h,g,f,e,d,c,b,a)) { }
310
311 Pack(const float* ptr):
312 Pack(_mm256_loadu_ps(ptr)) { }
313
314 Pack(const float* ptr, const index_t* idx):
315 Pack(_mm256_i32gather_ps(ptr, _mm256_loadu_si256(reinterpret_cast<const __m256i*>(idx)), sizeof(float))) { }
316
317 operator __m256()
318 {
319 return _reg;
320 }
321
323 {
324 _reg = rhs._reg;
325 return *this;
326 }
327
328 float operator[](unsigned i) const
329 {
330 return reinterpret_cast<const float*>(&_reg)[i];
331 }
332
333 float& operator[](unsigned i)
334 {
335 return reinterpret_cast<float*>(&_reg)[i];
336 }
337
338 Pack operator+(Pack rhs) const
339 {
340 return Pack(_mm256_add_ps(_reg, rhs));
341 }
342
344 {
345 _reg = _mm256_add_ps(_reg, rhs);
346 return *this;
347 }
348
349 Pack operator-(Pack rhs) const
350 {
351 return Pack(_mm256_sub_ps(_reg, rhs));
352 }
353
355 {
356 _reg = _mm256_sub_ps(_reg, rhs);
357 return *this;
358 }
359
360 Pack operator*(Pack rhs) const
361 {
362 return Pack(_mm256_mul_ps(_reg, rhs));
363 }
364
366 {
367 _reg = _mm256_mul_ps(_reg, rhs);
368 return *this;
369 }
370
371 Pack operator/(Pack rhs) const
372 {
373 return Pack(_mm256_div_ps(_reg, rhs));
374 }
375
377 {
378 _reg = _mm256_div_ps(_reg, rhs);
379 return *this;
380 }
381
383 {
384 return *this * Pack(-1);
385 }
386
387 __m256 sqrt()
388 {
389 return _mm256_sqrt_ps(_reg);
390 }
391};
392
393
394template <typename T>
396{
397 // TODO: Replace by more efficient implementation
398 Pack<T> result;
399 for (unsigned i=0; i < Pack<T>::size; ++i) {
400 result[i] = util::pow(base[i], exp[i]);
401 }
402 return result;
403}
404
405template <typename T>
407{
408 if constexpr (std::is_same_v<T,double>) {
409 return _mm256_min_pd(rhs, lhs);
410 } else {
411 return _mm256_min_ps(rhs, lhs);
412 }
413}
414
415template <typename T>
417{
418 if constexpr (std::is_same_v<T,double>) {
419 return _mm256_max_pd(rhs, lhs);
420 } else {
421 return _mm256_max_ps(rhs, lhs);
422 }
423}
424
425template <typename T>
427{
428 return max(x, -x);
429}
430
431template <typename T>
432void maskstore(T* target, Mask<T> mask, Pack<T> value);
433
434template <>
435void maskstore<double>(double* target, Mask<double> mask, Pack<double> value)
436{
437 _mm256_maskstore_pd(target, mask, value);
438}
439
440template <>
441void maskstore<float>(float* target, Mask<float> mask, Pack<float> value)
442{
443 _mm256_maskstore_ps(target, mask, value);
444}
445
446
447template <typename T>
448void store(T* target, Pack<T> value);
449
450template <>
451void store<double>(double* target, Pack<double> value)
452{
453 _mm256_storeu_pd(target, value);
454}
455
456template <>
457void store<float>(float* target, Pack<float> value)
458{
459 _mm256_storeu_ps(target, value);
460}
461
462
463template <typename T>
464void store(T* target, Pack<T> value, const typename Pack<T>::index_t* indices);
465
466template <>
467void store<double>(double* target, Pack<double> value, const Pack<double>::index_t* indices)
468{
469#ifdef __AVX512F__
470 _mm256_i32scatter_pd(target, _mm_loadu_si128(reinterpret_cast<const __m128i*>(indices)), value, sizeof(double));
471#else
472 __m256d reg = value;
473 for (unsigned i=0; i < simd::Pack<double>::size; ++i) {
474 target[indices[i]] = reg[i];
475 }
476#endif
477}
478
479template <>
480void store<float>(float* target, Pack<float> value, const Pack<float>::index_t* indices)
481{
482#ifdef __AVX512F__
483 _mm256_i32scatter_ps(target, _mm256_loadu_si256(reinterpret_cast<const __m256i*>(indices)), value, sizeof(float));
484#else
485 __m256 reg = value;
486 for (unsigned i=0; i < simd::Pack<float>::size; ++i) {
487 target[indices[i]] = reg[i];
488 }
489#endif
490}
491
492}
493
494}
495
496}
497
498#endif
Mask(storage_t *ptr, std::size_t iCell)
Definition 256.h:72
Mask(bool *ptr, std::size_t iCell)
Definition 256.h:78
__m256i neg() const
Definition 256.h:89
Mask(std::uint64_t *ptr)
Definition 256.h:69
static storage_t encode(bool *value)
Definition 256.h:58
static storage_t encode(bool value)
Definition 256.h:53
Mask(std::uint64_t a, std::uint64_t b, std::uint64_t c, std::uint64_t d)
Definition 256.h:66
Mask(bool a, bool b, bool c, bool d)
Definition 256.h:63
std::uint64_t storage_t
Definition 256.h:47
Mask(bool a, bool b, bool c, bool d, bool e, bool f, bool g, bool h)
Definition 256.h:126
static storage_t encode(bool value)
Definition 256.h:116
std::uint32_t storage_t
Definition 256.h:110
Mask(bool *ptr, std::size_t iCell)
Definition 256.h:138
Mask(storage_t *ptr, std::size_t iCell)
Definition 256.h:132
Mask(storage_t *ptr)
Definition 256.h:129
static storage_t encode(bool *value)
Definition 256.h:121
__m256i neg() const
Definition 256.h:149
Pack & operator+=(Pack rhs)
Definition 256.h:230
Pack operator/(Pack rhs) const
Definition 256.h:258
Pack operator+(Pack rhs) const
Definition 256.h:225
double operator[](unsigned i) const
Definition 256.h:215
Pack operator-=(Pack rhs)
Definition 256.h:241
Pack operator-(Pack rhs) const
Definition 256.h:236
Pack & operator=(Pack rhs)
Definition 256.h:209
Pack(std::size_t val)
Definition 256.h:192
Pack(const double *ptr, const index_t *idx)
Definition 256.h:201
Pack & operator/=(Pack rhs)
Definition 256.h:263
std::uint32_t index_t
Definition 256.h:177
Pack(const double *ptr)
Definition 256.h:198
Pack operator*(Pack rhs) const
Definition 256.h:247
Pack(double a, double b, double c, double d)
Definition 256.h:195
Pack & operator*=(Pack rhs)
Definition 256.h:252
double & operator[](unsigned i)
Definition 256.h:220
Pack operator*(Pack rhs) const
Definition 256.h:360
Pack & operator*=(Pack rhs)
Definition 256.h:365
Pack operator-() const
Definition 256.h:382
Pack & operator+=(Pack rhs)
Definition 256.h:343
Pack operator+(Pack rhs) const
Definition 256.h:338
Pack(std::size_t val)
Definition 256.h:305
Pack(const float *ptr)
Definition 256.h:311
Pack & operator-=(Pack rhs)
Definition 256.h:354
Pack operator/(Pack rhs) const
Definition 256.h:371
float & operator[](unsigned i)
Definition 256.h:333
Pack operator-(Pack rhs) const
Definition 256.h:349
Pack(const float *ptr, const index_t *idx)
Definition 256.h:314
Pack & operator/=(Pack rhs)
Definition 256.h:376
Pack(float a, float b, float c, float d, float e, float f, float g, float h)
Definition 256.h:308
Pack & operator=(Pack rhs)
Definition 256.h:322
float operator[](unsigned i) const
Definition 256.h:328
std::uint32_t index_t
Definition 256.h:287
void maskstore< double >(double *target, Mask< double > mask, Pack< double > value)
Definition 256.h:435
Pack< T > pow(Pack< T > base, Pack< T > exp)
Definition 256.h:395
void store< double >(double *target, Pack< double > value)
Definition 256.h:451
Pack< T > min(Pack< T > rhs, Pack< T > lhs)
Definition 256.h:406
Pack< T > max(Pack< T > rhs, Pack< T > lhs)
Definition 256.h:416
void maskstore< float >(float *target, Mask< float > mask, Pack< float > value)
Definition 256.h:441
void store(T *target, Pack< T > value)
void store< float >(float *target, Pack< float > value)
Definition 256.h:457
Pack< T > fabs(Pack< T > x)
Definition 256.h:426
void maskstore(T *target, Mask< T > mask, Pack< T > value)
cpu::simd::Pack< T > pow(cpu::simd::Pack< T > base, cpu::simd::Pack< T > exp)
Definition pack.h:112
Top level namespace for all of OpenLB.