Nektar++
Functions
Vmath::SIMD Namespace Reference

Functions

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vadd (const size_t n, const T *x, const T *y, T *z)
 Add vector z = x + y. More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmul (const size_t n, const T *x, const T *y, T *z)
 Multiply vector z = x * y. More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvp (const size_t n, const T *w, const T *x, const T *y, T *z)
 vvtvp (vector times vector plus vector): z = w*x + y More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvm (const size_t n, const T *w, const T *x, const T *y, T *z)
 vvtvm (vector times vector minus vector): z = w*x - y More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvvtp (const size_t n, const T *v, const T *w, const T *x, const T *y, T *z)
 vvtvvtp (vector times vector plus vector times vector): More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvvtm (const size_t n, const T *v, const T *w, const T *x, const T *y, T *z)
 vvtvvtm (vector times vector minus vector times vector): More...
 
template<class T , class I , typename = typename std::enable_if<std::is_floating_point<T>::value && std::is_integral<I>::value>::type>
void Gathr (const I n, const T *x, const I *y, T *z)
 Gather vector z[i] = x[y[i]]. More...
 

Function Documentation

◆ Gathr()

template<class T , class I , typename = typename std::enable_if<std::is_floating_point<T>::value && std::is_integral<I>::value>::type>
void Vmath::SIMD::Gathr ( const I  n,
const T *  x,
const I *  y,
T *  z 
)

Gather vector z[i] = x[y[i]].

Definition at line 474 of file VmathSIMD.hpp.

475{
476 using namespace tinysimd;
477 using vec_t = simd<T>;
478 using vec_t_i = simd<I, vec_t::width>;
479
480 I cnt = n;
481 // Unroll 4x Vectorized loop
482 while (cnt >= 4 * vec_t::width)
483 {
484 // load index
485 vec_t_i yChunk0, yChunk1, yChunk2, yChunk3;
486 yChunk0.load(y, is_not_aligned);
487 yChunk1.load(y + vec_t_i::width, is_not_aligned);
488 yChunk2.load(y + 2 * vec_t_i::width, is_not_aligned);
489 yChunk3.load(y + 3 * vec_t_i::width, is_not_aligned);
490
491 // z = x[y[i]]
492 vec_t zChunk0, zChunk1, zChunk2, zChunk3;
493 zChunk0.gather(x, yChunk0);
494 zChunk1.gather(x, yChunk1);
495 zChunk2.gather(x, yChunk2);
496 zChunk3.gather(x, yChunk3);
497
498 // store
499 zChunk0.store(z, is_not_aligned);
500 zChunk1.store(z + vec_t_i::width, is_not_aligned);
501 zChunk2.store(z + 2 * vec_t_i::width, is_not_aligned);
502 zChunk3.store(z + 3 * vec_t_i::width, is_not_aligned);
503
504 // update pointers
505 y += 4 * vec_t_i::width;
506 z += 4 * vec_t::width;
507 cnt -= 4 * vec_t::width;
508 }
509
510 // Unroll 2x Vectorized loop
511 while (cnt >= 2 * vec_t::width)
512 {
513 // load index
514 vec_t_i yChunk0, yChunk1;
515 yChunk0.load(y, is_not_aligned);
516 yChunk1.load(y + vec_t_i::width, is_not_aligned);
517
518 // z = x[y[i]]
519 vec_t zChunk0, zChunk1;
520 zChunk0.gather(x, yChunk0);
521 zChunk1.gather(x, yChunk1);
522
523 // store
524 zChunk0.store(z, is_not_aligned);
525 zChunk1.store(z + vec_t_i::width, is_not_aligned);
526
527 // update pointers
528 y += 2 * vec_t_i::width;
529 z += 2 * vec_t::width;
530 cnt -= 2 * vec_t::width;
531 }
532
533 // Vectorized loop
534 while (cnt >= vec_t::width)
535 {
536 // load index
537 vec_t_i yChunk;
538 yChunk.load(y, is_not_aligned);
539
540 // z = x[y[i]]
541 vec_t zChunk;
542 zChunk.gather(x, yChunk);
543
544 // store
545 zChunk.store(z, is_not_aligned);
546
547 // update pointers
548 y += vec_t_i::width;
549 z += vec_t::width;
550 cnt -= vec_t::width;
551 }
552
553 // spillover loop
554 while (cnt)
555 {
556 // z = x[y[i]]
557 *z = *(x + *y);
558 // update pointers
559 ++y;
560 ++z;
561 --cnt;
562 }
563}
simd< NekDouble > vec_t
std::vector< double > z(NPUPPER)
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80

References tinysimd::is_not_aligned, and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), Vmath::Gathr(), and main().

◆ Vadd()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vadd ( const size_t  n,
const T *  x,
const T *  y,
T *  z 
)

Add vector z = x + y.

Definition at line 49 of file VmathSIMD.hpp.

50{
51 using namespace tinysimd;
52 using vec_t = simd<T>;
53
54 size_t cnt = n;
55 // Vectorized loop unroll 4x
56 while (cnt >= 4 * vec_t::width)
57 {
58 // load
59 vec_t yChunk0, yChunk1, yChunk2, yChunk3;
60 yChunk0.load(y, is_not_aligned);
61 yChunk1.load(y + vec_t::width, is_not_aligned);
62 yChunk2.load(y + 2 * vec_t::width, is_not_aligned);
63 yChunk3.load(y + 3 * vec_t::width, is_not_aligned);
64
65 vec_t xChunk0, xChunk1, xChunk2, xChunk3;
66 xChunk0.load(x, is_not_aligned);
67 xChunk1.load(x + vec_t::width, is_not_aligned);
68 xChunk2.load(x + 2 * vec_t::width, is_not_aligned);
69 xChunk3.load(x + 3 * vec_t::width, is_not_aligned);
70
71 // z = x + y
72 vec_t zChunk0 = xChunk0 + yChunk0;
73 vec_t zChunk1 = xChunk1 + yChunk1;
74 vec_t zChunk2 = xChunk2 + yChunk2;
75 vec_t zChunk3 = xChunk3 + yChunk3;
76
77 // store
78 zChunk0.store(z, is_not_aligned);
79 zChunk1.store(z + vec_t::width, is_not_aligned);
80 zChunk2.store(z + 2 * vec_t::width, is_not_aligned);
81 zChunk3.store(z + 3 * vec_t::width, is_not_aligned);
82
83 // update pointers
84 x += 4 * vec_t::width;
85 y += 4 * vec_t::width;
86 z += 4 * vec_t::width;
87 cnt -= 4 * vec_t::width;
88 }
89
90 // Vectorized loop unroll 2x
91 while (cnt >= 2 * vec_t::width)
92 {
93 // load
94 vec_t yChunk0, yChunk1;
95 yChunk0.load(y, is_not_aligned);
96 yChunk1.load(y + vec_t::width, is_not_aligned);
97
98 vec_t xChunk0, xChunk1;
99 xChunk0.load(x, is_not_aligned);
100 xChunk1.load(x + vec_t::width, is_not_aligned);
101
102 // z = x + y
103 vec_t zChunk0 = xChunk0 + yChunk0;
104 vec_t zChunk1 = xChunk1 + yChunk1;
105
106 // store
107 zChunk0.store(z, is_not_aligned);
108 zChunk1.store(z + vec_t::width, is_not_aligned);
109
110 // update pointers
111 x += 2 * vec_t::width;
112 y += 2 * vec_t::width;
113 z += 2 * vec_t::width;
114 cnt -= 2 * vec_t::width;
115 }
116
117 // Vectorized loop
118 while (cnt >= vec_t::width)
119 {
120 // load
121 vec_t yChunk;
122 yChunk.load(y, is_not_aligned);
123 vec_t xChunk;
124 xChunk.load(x, is_not_aligned);
125
126 // z = x + y
127 vec_t zChunk = xChunk + yChunk;
128
129 // store
130 zChunk.store(z, is_not_aligned);
131
132 // update pointers
133 x += vec_t::width;
134 y += vec_t::width;
135 z += vec_t::width;
136 cnt -= vec_t::width;
137 }
138
139 // spillover loop
140 while (cnt)
141 {
142 // z = x + y;
143 *z = (*x) + (*y);
144 // update pointers
145 ++x;
146 ++y;
147 ++z;
148 --cnt;
149 }
150}

References tinysimd::is_not_aligned, and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vadd().

◆ Vmul()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vmul ( const size_t  n,
const T *  x,
const T *  y,
T *  z 
)

Multiply vector z = x * y.

Definition at line 155 of file VmathSIMD.hpp.

156{
157 using namespace tinysimd;
158 using vec_t = simd<T>;
159
160 size_t cnt = n;
161 // Vectorized loop unroll 4x
162 while (cnt >= 4 * vec_t::width)
163 {
164 // load
165 vec_t yChunk0, yChunk1, yChunk2, yChunk3;
166 yChunk0.load(y, is_not_aligned);
167 yChunk1.load(y + vec_t::width, is_not_aligned);
168 yChunk2.load(y + 2 * vec_t::width, is_not_aligned);
169 yChunk3.load(y + 3 * vec_t::width, is_not_aligned);
170
171 vec_t xChunk0, xChunk1, xChunk2, xChunk3;
172 xChunk0.load(x, is_not_aligned);
173 xChunk1.load(x + vec_t::width, is_not_aligned);
174 xChunk2.load(x + 2 * vec_t::width, is_not_aligned);
175 xChunk3.load(x + 3 * vec_t::width, is_not_aligned);
176
177 // z = x * y
178 vec_t zChunk0 = xChunk0 * yChunk0;
179 vec_t zChunk1 = xChunk1 * yChunk1;
180 vec_t zChunk2 = xChunk2 * yChunk2;
181 vec_t zChunk3 = xChunk3 * yChunk3;
182
183 // store
184 zChunk0.store(z, is_not_aligned);
185 zChunk1.store(z + vec_t::width, is_not_aligned);
186 zChunk2.store(z + 2 * vec_t::width, is_not_aligned);
187 zChunk3.store(z + 3 * vec_t::width, is_not_aligned);
188
189 // update pointers
190 x += 4 * vec_t::width;
191 y += 4 * vec_t::width;
192 z += 4 * vec_t::width;
193 cnt -= 4 * vec_t::width;
194 }
195
196 // Vectorized loop unroll 2x
197 while (cnt >= 2 * vec_t::width)
198 {
199 // load
200 vec_t yChunk0, yChunk1;
201 yChunk0.load(y, is_not_aligned);
202 yChunk1.load(y + vec_t::width, is_not_aligned);
203
204 vec_t xChunk0, xChunk1;
205 xChunk0.load(x, is_not_aligned);
206 xChunk1.load(x + vec_t::width, is_not_aligned);
207
208 // z = x * y
209 vec_t zChunk0 = xChunk0 * yChunk0;
210 vec_t zChunk1 = xChunk1 * yChunk1;
211
212 // store
213 zChunk0.store(z, is_not_aligned);
214 zChunk1.store(z + vec_t::width, is_not_aligned);
215
216 // update pointers
217 x += 2 * vec_t::width;
218 y += 2 * vec_t::width;
219 z += 2 * vec_t::width;
220 cnt -= 2 * vec_t::width;
221 }
222
223 // Vectorized loop
224 while (cnt >= vec_t::width)
225 {
226 // load
227 vec_t yChunk;
228 yChunk.load(y, is_not_aligned);
229 vec_t xChunk;
230 xChunk.load(x, is_not_aligned);
231
232 // z = x * y
233 vec_t zChunk = xChunk * yChunk;
234
235 // store
236 zChunk.store(z, is_not_aligned);
237
238 // update pointers
239 x += vec_t::width;
240 y += vec_t::width;
241 z += vec_t::width;
242 cnt -= vec_t::width;
243 }
244
245 // spillover loop
246 while (cnt)
247 {
248 // z = x * y;
249 *z = (*x) * (*y);
250 // update pointers
251 ++x;
252 ++y;
253 ++z;
254 --cnt;
255 }
256}

References tinysimd::is_not_aligned, and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vmul().

◆ Vvtvm()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvm ( const size_t  n,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)

vvtvm (vector times vector minus vector): z = w*x - y

Definition at line 309 of file VmathSIMD.hpp.

310{
311 using namespace tinysimd;
312 using vec_t = simd<T>;
313
314 size_t cnt = n;
315 // Vectorized loop
316 while (cnt >= vec_t::width)
317 {
318 // load
319 vec_t wChunk;
320 wChunk.load(w, is_not_aligned);
321 vec_t yChunk;
322 yChunk.load(y, is_not_aligned);
323 vec_t xChunk;
324 xChunk.load(x, is_not_aligned);
325
326 // z = w * x - y
327 vec_t zChunk = wChunk * xChunk - yChunk;
328
329 // store
330 zChunk.store(z, is_not_aligned);
331
332 // update pointers
333 w += vec_t::width;
334 x += vec_t::width;
335 y += vec_t::width;
336 z += vec_t::width;
337 cnt -= vec_t::width;
338 }
339
340 // spillover loop
341 while (cnt)
342 {
343 // z = w * x - y;
344 *z = (*w) * (*x) - (*y);
345 // update pointers
346 ++w;
347 ++x;
348 ++y;
349 ++z;
350 --cnt;
351 }
352}
std::vector< double > w(NPUPPER)

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Vmath::Vvtvm().

◆ Vvtvp()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvp ( const size_t  n,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)

vvtvp (vector times vector plus vector): z = w*x + y

Definition at line 261 of file VmathSIMD.hpp.

262{
263 using namespace tinysimd;
264 using vec_t = simd<T>;
265
266 size_t cnt = n;
267 // Vectorized loop
268 while (cnt >= vec_t::width)
269 {
270 // load
271 vec_t wChunk;
272 wChunk.load(w, is_not_aligned);
273 vec_t yChunk;
274 yChunk.load(y, is_not_aligned);
275 vec_t xChunk;
276 xChunk.load(x, is_not_aligned);
277
278 // z = w * x + y
279 vec_t zChunk = wChunk * xChunk + yChunk;
280
281 // store
282 zChunk.store(z, is_not_aligned);
283
284 // update pointers
285 w += vec_t::width;
286 x += vec_t::width;
287 y += vec_t::width;
288 z += vec_t::width;
289 cnt -= vec_t::width;
290 }
291
292 // spillover loop
293 while (cnt)
294 {
295 // z = w * x + y;
296 *z = (*w) * (*x) + (*y);
297 // update pointers
298 ++w;
299 ++x;
300 ++y;
301 ++z;
302 --cnt;
303 }
304}

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vvtvp().

◆ Vvtvvtm()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvvtm ( const size_t  n,
const T *  v,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)
inline

vvtvvtm (vector times vector minus vector times vector):

Definition at line 416 of file VmathSIMD.hpp.

418{
419 using namespace tinysimd;
420 using vec_t = simd<T>;
421
422 size_t cnt = n;
423 // Vectorized loop
424 while (cnt >= vec_t::width)
425 {
426 // load
427 vec_t vChunk;
428 vChunk.load(v, is_not_aligned);
429 vec_t wChunk;
430 wChunk.load(w, is_not_aligned);
431 vec_t yChunk;
432 yChunk.load(y, is_not_aligned);
433 vec_t xChunk;
434 xChunk.load(x, is_not_aligned);
435
436 // z = v * w + x * y;
437 vec_t z1Chunk = vChunk * wChunk;
438 vec_t z2Chunk = xChunk * yChunk;
439 vec_t zChunk = z1Chunk - z2Chunk;
440
441 // store
442 zChunk.store(z, is_not_aligned);
443
444 // update pointers
445 v += vec_t::width;
446 w += vec_t::width;
447 x += vec_t::width;
448 y += vec_t::width;
449 z += vec_t::width;
450 cnt -= vec_t::width;
451 }
452
453 // spillover loop
454 while (cnt)
455 {
456 // z = v * w + x * y;
457 T z1 = (*v) * (*w);
458 T z2 = (*x) * (*y);
459 *z = z1 - z2;
460 // update pointers
461 ++v;
462 ++w;
463 ++x;
464 ++y;
465 ++z;
466 --cnt;
467 }
468}

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Vmath::Vvtvvtm().

◆ Vvtvvtp()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvvtp ( const size_t  n,
const T *  v,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)
inline

vvtvvtp (vector times vector plus vector times vector):

Definition at line 358 of file VmathSIMD.hpp.

360{
361 using namespace tinysimd;
362 using vec_t = simd<T>;
363
364 size_t cnt = n;
365 // Vectorized loop
366 while (cnt >= vec_t::width)
367 {
368 // load
369 vec_t vChunk;
370 vChunk.load(v, is_not_aligned);
371 vec_t wChunk;
372 wChunk.load(w, is_not_aligned);
373 vec_t yChunk;
374 yChunk.load(y, is_not_aligned);
375 vec_t xChunk;
376 xChunk.load(x, is_not_aligned);
377
378 // z = v * w + x * y;
379 vec_t z1Chunk = vChunk * wChunk;
380 vec_t z2Chunk = xChunk * yChunk;
381 vec_t zChunk = z1Chunk + z2Chunk;
382
383 // store
384 zChunk.store(z, is_not_aligned);
385
386 // update pointers
387 v += vec_t::width;
388 w += vec_t::width;
389 x += vec_t::width;
390 y += vec_t::width;
391 z += vec_t::width;
392 cnt -= vec_t::width;
393 }
394
395 // spillover loop
396 while (cnt)
397 {
398 // z = v * w + x * y;
399 T z1 = (*v) * (*w);
400 T z2 = (*x) * (*y);
401 *z = z1 + z2;
402 // update pointers
403 ++v;
404 ++w;
405 ++x;
406 ++y;
407 ++z;
408 --cnt;
409 }
410}

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vvtvvtp().