OpenLB 1.7
Loading...
Searching...
No Matches
512.h
Go to the documentation of this file.
1/* This file is part of the OpenLB library
2 *
3 * Copyright (C) 2021 Adrian Kummerlaender
4 * E-mail contact: info@openlb.net
5 * The most recent release of OpenLB can be downloaded at
6 * <http://www.openlb.net/>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public
19 * License along with this program; if not, write to the Free
20 * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22*/
23
24#ifndef SIMD_PACK_512_H_
25#define SIMD_PACK_512_H_
26
27#include <immintrin.h>
28
29#include <cstdint>
30#include <type_traits>
31
32namespace olb {
33
34namespace cpu {
35
36namespace simd {
37
38template <typename T> class Mask;
39template <typename T> class Pack;
40
41template <>
42class Mask<double> {
43private:
44 __mmask8 _reg;
45
46public:
47 using storage_t = std::uint8_t;
48 static constexpr unsigned storage_size = 8;
49
50 static storage_t encode(bool* value)
51 {
52 storage_t mask = value[0];
53 for (unsigned j=1; j < storage_size; ++j) {
54 mask |= value[j] << j;
55 }
56 return mask;
57 }
58
59 Mask(bool b0, bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7):
60 _reg(std::uint16_t(b0 | b1<<1 | b2<<2 | b3<<3 | b4<<4 | b5<<5 | b6<<6 | b7<<7)) { }
61
62 Mask(std::uint8_t* ptr):
63 _reg(_load_mask16(reinterpret_cast<std::uint16_t*>(ptr))) { }
64
65 Mask(storage_t* ptr, std::size_t iCell):
66 Mask(ptr + iCell / storage_size) { }
67
68 Mask(__mmask8 reg):
69 _reg(reg) { }
70
71 operator __mmask8()
72 {
73 return _reg;
74 }
75
76 __mmask8 neg() const
77 {
78 return _knot_mask8(_reg);
79 }
80
81 operator bool() const
82 {
83 const std::uint8_t* value = reinterpret_cast<const std::uint8_t*>(&_reg);
84 return value[0] != 0;
85 }
86};
87
88template <>
89class Mask<float> {
90private:
91 __mmask16 _reg;
92
93public:
94 using storage_t = std::uint16_t;
95 static constexpr unsigned storage_size = 16;
96
97 static storage_t encode(bool* value)
98 {
99 storage_t mask = value[0];
100 for (unsigned j=1; j < storage_size; ++j) {
101 mask |= value[j] << j;
102 }
103 return mask;
104 }
105
106 Mask(std::uint16_t* ptr):
107 _reg(_load_mask16(ptr)) { }
108
109 Mask(storage_t* ptr, std::size_t iCell):
110 Mask(ptr + iCell / storage_size) { }
111
112 Mask(__mmask16 reg):
113 _reg(reg) { }
114
115 operator __mmask16()
116 {
117 return _reg;
118 }
119
120 __mmask16 neg() const
121 {
122 return _knot_mask16(_reg);
123 }
124
125 operator bool() const
126 {
127 const std::uint16_t* value = reinterpret_cast<const std::uint16_t*>(&_reg);
128 return value[0] != 0;
129 }
130};
131
132
133template <>
134class Pack<double> : public SimdBase {
135private:
136 __m512d _reg;
137
138public:
140 using index_t = std::uint32_t;
141
142 static constexpr std::size_t size = 8;
143
144 Pack() = default;
145
146 Pack(__m512d reg):
147 _reg(reg) { }
148
149 Pack(double val):
150 Pack(_mm512_set1_pd(val)) { }
151
152 Pack(int val):
153 Pack(static_cast<double>(val)) { }
154
155 Pack(std::size_t val):
156 Pack(static_cast<double>(val)) { }
157
158 Pack(double a, double b, double c, double d, double e, double f, double g, double h):
159 Pack(_mm512_set_pd(h,g,f,e,d,c,b,a)) { }
160
161 Pack(const double* ptr):
162 Pack(_mm512_loadu_pd(ptr)) { }
163
164 Pack(const double* ptr, const index_t* idx):
165 Pack(_mm512_i32gather_pd(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(idx)), ptr, sizeof(double))) { }
166
167 operator __m512d()
168 {
169 return _reg;
170 }
171
173 {
174 _reg = rhs._reg;
175 return *this;
176 }
177
178 double operator[](unsigned i) const
179 {
180 return reinterpret_cast<const double*>(&_reg)[i];
181 }
182
183 Pack operator+(Pack rhs) const
184 {
185 return Pack(_mm512_add_pd(_reg, rhs));
186 }
187
189 {
190 _reg = _mm512_add_pd(_reg, rhs);
191 return *this;
192 }
193
194 Pack operator-(Pack rhs) const
195 {
196 return Pack(_mm512_sub_pd(_reg, rhs));
197 }
198
200 {
201 _reg = _mm512_sub_pd(_reg, rhs);
202 return *this;
203 }
204
205 Pack operator*(Pack rhs) const
206 {
207 return Pack(_mm512_mul_pd(_reg, rhs));
208 }
209
211 {
212 _reg = _mm512_mul_pd(_reg, rhs);
213 return *this;
214 }
215
216 Pack operator/(Pack rhs) const
217 {
218 return Pack(_mm512_div_pd(_reg, rhs));
219 }
220
222 {
223 _reg = _mm512_div_pd(_reg, rhs);
224 return *this;
225 }
226
228 {
229 return *this * Pack(-1);
230 }
231
232 Pack sqrt() const
233 {
234 return _mm512_sqrt_pd(_reg);
235 }
236};
237
238template <>
239class Pack<float> : public SimdBase {
240private:
241 __m512 _reg;
242
243public:
245 using index_t = std::uint32_t;
246
247 static constexpr std::size_t size = 16;
248
249 Pack() = default;
250
251 Pack(__m512 reg):
252 _reg(reg) { }
253
254 Pack(float val):
255 Pack(_mm512_set1_ps(val)) { }
256
257 Pack(double val):
258 Pack(static_cast<float>(val)) { }
259
260 Pack(int val):
261 Pack(static_cast<float>(val)) { }
262
263 Pack(std::size_t val):
264 Pack(static_cast<float>(val)) { }
265
266 Pack(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p):
267 Pack(_mm512_set_ps(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a)) { }
268
269 Pack(const float* ptr):
270 Pack(_mm512_loadu_ps(ptr)) { }
271
272 Pack(const float* ptr, const index_t* idx):
273 Pack(_mm512_i32gather_ps(_mm512_loadu_si512(reinterpret_cast<const __m512i*>(idx)), ptr, sizeof(float))) { }
274
275 operator __m512()
276 {
277 return _reg;
278 }
279
281 {
282 _reg = rhs._reg;
283 return *this;
284 }
285
286 float operator[](unsigned i) const
287 {
288 return reinterpret_cast<const float*>(&_reg)[i];
289 }
290
291 Pack operator+(Pack rhs) const
292 {
293 return Pack(_mm512_add_ps(_reg, rhs));
294 }
295
297 {
298 _reg = _mm512_add_ps(_reg, rhs);
299 return *this;
300 }
301
302 Pack operator-(Pack rhs) const
303 {
304 return Pack(_mm512_sub_ps(_reg, rhs));
305 }
306
308 {
309 _reg = _mm512_sub_ps(_reg, rhs);
310 return *this;
311 }
312
313 Pack operator*(Pack rhs) const
314 {
315 return Pack(_mm512_mul_ps(_reg, rhs));
316 }
317
319 {
320 _reg = _mm512_mul_ps(_reg, rhs);
321 return *this;
322 }
323
324 Pack operator/(Pack rhs) const
325 {
326 return Pack(_mm512_div_ps(_reg, rhs));
327 }
328
330 {
331 _reg = _mm512_div_ps(_reg, rhs);
332 return *this;
333 }
334
336 {
337 return *this * Pack(-1);
338 }
339
340 __m512 sqrt() const
341 {
342 return _mm512_sqrt_ps(_reg);
343 }
344};
345
346
347template <typename T>
348Pack<T> pow(Pack<T> base, Pack<T> exp)
349{
350 if constexpr (std::is_same_v<T,double>) {
351 return _mm512_pow_pd(base, exp);
352 } else {
353 return _mm512_pow_ps(base, exp);
354 }
355}
356
357template <typename T>
358Pack<T> min(Pack<T> rhs, Pack<T> lhs)
359{
360 if constexpr (std::is_same_v<T,double>) {
361 return _mm512_min_pd(rhs, lhs);
362 } else {
363 return _mm512_min_ps(rhs, lhs);
364 }
365}
366
367template <typename T>
368Pack<T> max(Pack<T> rhs, Pack<T> lhs)
369{
370 if constexpr (std::is_same_v<T,double>) {
371 return _mm512_max_pd(rhs, lhs);
372 } else {
373 return _mm512_max_ps(rhs, lhs);
374 }
375}
376
377template <typename T>
378Pack<T> fabs(Pack<T> x)
379{
380 if constexpr (std::is_same_v<T,double>) {
381 return _mm512_abs_pd(x);
382 } else {
383 return _mm512_abs_ps(x);
384 }
385}
386
387template <typename T>
388void maskstore(T* target, Mask<T> mask, Pack<T> value);
389
390template <>
391void maskstore<double>(double* target, Mask<double> mask, Pack<double> value)
392{
393 _mm512_mask_storeu_pd(target, mask, value);
394}
395
396template <>
397void maskstore<float>(float* target, Mask<float> mask, Pack<float> value)
398{
399 _mm512_mask_storeu_ps(target, mask, value);
400}
401
402
403template <typename T>
404void store(T* target, Pack<T> value);
405
406template <>
407void store<double>(double* target, Pack<double> value)
408{
409 _mm512_storeu_pd(target, value);
410}
411
412template <>
413void store<float>(float* target, Pack<float> value)
414{
415 _mm512_storeu_ps(target, value);
416}
417
418
419template <typename T>
420void store(T* target, Pack<T> value, const typename Pack<T>::index_t* indices);
421
422template <>
423void store<double>(double* target, Pack<double> value, const Pack<double>::index_t* indices)
424{
425 _mm512_i32scatter_pd(target, _mm256_loadu_si256(reinterpret_cast<const __m256i*>(indices)), value, sizeof(double));
426}
427
428
429template <>
430void store<float>(float* target, Pack<float> value, const Pack<float>::index_t* indices)
431{
432 _mm512_i32scatter_ps(target, _mm512_loadu_si512(reinterpret_cast<const __m512i*>(indices)), value, sizeof(float));
433}
434
435}
436
437}
438
439}
440
441#endif
Mask(storage_t *ptr, std::size_t iCell)
Definition 512.h:65
static storage_t encode(bool *value)
Definition 512.h:50
__mmask8 neg() const
Definition 512.h:76
Mask(bool b0, bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7)
Definition 512.h:59
std::uint64_t storage_t
Definition 256.h:47
Mask(std::uint8_t *ptr)
Definition 512.h:62
Mask(__mmask16 reg)
Definition 512.h:112
std::uint32_t storage_t
Definition 256.h:110
Mask(std::uint16_t *ptr)
Definition 512.h:106
__mmask16 neg() const
Definition 512.h:120
Mask(storage_t *ptr, std::size_t iCell)
Definition 512.h:109
static storage_t encode(bool *value)
Definition 512.h:97
Pack & operator+=(Pack rhs)
Definition 512.h:188
Pack operator/=(Pack rhs)
Definition 512.h:221
Pack operator/(Pack rhs) const
Definition 512.h:216
Pack operator+(Pack rhs) const
Definition 512.h:183
double operator[](unsigned i) const
Definition 512.h:178
Pack operator-=(Pack rhs)
Definition 512.h:199
Pack operator-(Pack rhs) const
Definition 512.h:194
Pack & operator=(Pack rhs)
Definition 512.h:172
Pack(std::size_t val)
Definition 512.h:155
Pack(const double *ptr, const index_t *idx)
Definition 512.h:164
std::uint32_t index_t
Definition 256.h:177
Pack(const double *ptr)
Definition 512.h:161
Pack operator*(Pack rhs) const
Definition 512.h:205
Pack & operator*=(Pack rhs)
Definition 512.h:210
Pack(double a, double b, double c, double d, double e, double f, double g, double h)
Definition 512.h:158
Pack operator*(Pack rhs) const
Definition 512.h:313
Pack & operator*=(Pack rhs)
Definition 512.h:318
Pack operator-() const
Definition 512.h:335
Pack & operator+=(Pack rhs)
Definition 512.h:296
Pack operator+(Pack rhs) const
Definition 512.h:291
Pack(std::size_t val)
Definition 512.h:263
Pack(const float *ptr)
Definition 512.h:269
Pack & operator-=(Pack rhs)
Definition 512.h:307
Pack operator/(Pack rhs) const
Definition 512.h:324
Pack operator-(Pack rhs) const
Definition 512.h:302
Pack(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p)
Definition 512.h:266
Pack(const float *ptr, const index_t *idx)
Definition 512.h:272
Pack & operator/=(Pack rhs)
Definition 512.h:329
__m512 sqrt() const
Definition 512.h:340
Pack & operator=(Pack rhs)
Definition 512.h:280
float operator[](unsigned i) const
Definition 512.h:286
std::uint32_t index_t
Definition 256.h:287
void maskstore< double >(double *target, Mask< double > mask, Pack< double > value)
Definition 256.h:435
Pack< T > pow(Pack< T > base, Pack< T > exp)
Definition 256.h:395
void store< double >(double *target, Pack< double > value)
Definition 256.h:451
Pack< T > min(Pack< T > rhs, Pack< T > lhs)
Definition 256.h:406
Pack< T > max(Pack< T > rhs, Pack< T > lhs)
Definition 256.h:416
void maskstore< float >(float *target, Mask< float > mask, Pack< float > value)
Definition 256.h:441
void store(T *target, Pack< T > value)
void store< float >(float *target, Pack< float > value)
Definition 256.h:457
Pack< T > fabs(Pack< T > x)
Definition 256.h:426
void maskstore(T *target, Mask< T > mask, Pack< T > value)
Top level namespace for all of OpenLB.