Nektar++
Functions
Vmath::SIMD Namespace Reference

Functions

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vadd (const size_t n, const T *x, const T *y, T *z)
 Add vector z = x + y. More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmul (const size_t n, const T *x, const T *y, T *z)
 Multiply vector z = x * y. More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvp (const size_t n, const T *w, const T *x, const T *y, T *z)
 vvtvp (vector times vector plus vector): z = w*x + y More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvm (const size_t n, const T *w, const T *x, const T *y, T *z)
 vvtvm (vector times vector minus vector): z = w*x - y More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvvtp (const size_t n, const T *v, const T *w, const T *x, const T *y, T *z)
 vvtvvtp (vector times vector plus vector times vector): More...
 
template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vvtvvtm (const size_t n, const T *v, const T *w, const T *x, const T *y, T *z)
 vvtvvtm (vector times vector minus vector times vector): More...
 
template<class T , class I , typename = typename std::enable_if<std::is_floating_point<T>::value && std::is_integral<I>::value>::type>
void Gathr (const I n, const T *x, const I *y, T *z)
 Gather vector z[i] = x[y[i]]. More...
 

Function Documentation

◆ Gathr()

template<class T , class I , typename = typename std::enable_if<std::is_floating_point<T>::value && std::is_integral<I>::value>::type>
void Vmath::SIMD::Gathr ( const I  n,
const T *  x,
const I *  y,
T *  z 
)

Gather vector z[i] = x[y[i]].

Definition at line 472 of file VmathSIMD.hpp.

473{
474 using namespace tinysimd;
475 using vec_t = simd<T>;
476 using vec_t_i = simd<I, vec_t::width>;
477
478 I cnt = n;
479 // Unroll 4x Vectorized loop
480 while (cnt >= 4 * vec_t::width)
481 {
482 // load index
483 vec_t_i yChunk0, yChunk1, yChunk2, yChunk3;
484 yChunk0.load(y, is_not_aligned);
485 yChunk1.load(y + vec_t_i::width, is_not_aligned);
486 yChunk2.load(y + 2 * vec_t_i::width, is_not_aligned);
487 yChunk3.load(y + 3 * vec_t_i::width, is_not_aligned);
488
489 // z = x[y[i]]
490 vec_t zChunk0, zChunk1, zChunk2, zChunk3;
491 zChunk0.gather(x, yChunk0);
492 zChunk1.gather(x, yChunk1);
493 zChunk2.gather(x, yChunk2);
494 zChunk3.gather(x, yChunk3);
495
496 // store
497 zChunk0.store(z, is_not_aligned);
498 zChunk1.store(z + vec_t_i::width, is_not_aligned);
499 zChunk2.store(z + 2 * vec_t_i::width, is_not_aligned);
500 zChunk3.store(z + 3 * vec_t_i::width, is_not_aligned);
501
502 // update pointers
503 y += 4 * vec_t_i::width;
504 z += 4 * vec_t::width;
505 cnt -= 4 * vec_t::width;
506 }
507
508 // Unroll 2x Vectorized loop
509 while (cnt >= 2 * vec_t::width)
510 {
511 // load index
512 vec_t_i yChunk0, yChunk1;
513 yChunk0.load(y, is_not_aligned);
514 yChunk1.load(y + vec_t_i::width, is_not_aligned);
515
516 // z = x[y[i]]
517 vec_t zChunk0, zChunk1;
518 zChunk0.gather(x, yChunk0);
519 zChunk1.gather(x, yChunk1);
520
521 // store
522 zChunk0.store(z, is_not_aligned);
523 zChunk1.store(z + vec_t_i::width, is_not_aligned);
524
525 // update pointers
526 y += 2 * vec_t_i::width;
527 z += 2 * vec_t::width;
528 cnt -= 2 * vec_t::width;
529 }
530
531 // Vectorized loop
532 while (cnt >= vec_t::width)
533 {
534 // load index
535 vec_t_i yChunk;
536 yChunk.load(y, is_not_aligned);
537
538 // z = x[y[i]]
539 vec_t zChunk;
540 zChunk.gather(x, yChunk);
541
542 // store
543 zChunk.store(z, is_not_aligned);
544
545 // update pointers
546 y += vec_t_i::width;
547 z += vec_t::width;
548 cnt -= vec_t::width;
549 }
550
551 // spillover loop
552 while (cnt)
553 {
554 // z = x[y[i]]
555 *z = *(x + *y);
556 // update pointers
557 ++y;
558 ++z;
559 --cnt;
560 }
561}
simd< NekDouble > vec_t
std::vector< double > z(NPUPPER)
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80

References tinysimd::is_not_aligned, and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), Vmath::Gathr(), and main().

◆ Vadd()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vadd ( const size_t  n,
const T *  x,
const T *  y,
T *  z 
)

Add vector z = x + y.

Definition at line 47 of file VmathSIMD.hpp.

48{
49 using namespace tinysimd;
50 using vec_t = simd<T>;
51
52 size_t cnt = n;
53 // Vectorized loop unroll 4x
54 while (cnt >= 4 * vec_t::width)
55 {
56 // load
57 vec_t yChunk0, yChunk1, yChunk2, yChunk3;
58 yChunk0.load(y, is_not_aligned);
59 yChunk1.load(y + vec_t::width, is_not_aligned);
60 yChunk2.load(y + 2 * vec_t::width, is_not_aligned);
61 yChunk3.load(y + 3 * vec_t::width, is_not_aligned);
62
63 vec_t xChunk0, xChunk1, xChunk2, xChunk3;
64 xChunk0.load(x, is_not_aligned);
65 xChunk1.load(x + vec_t::width, is_not_aligned);
66 xChunk2.load(x + 2 * vec_t::width, is_not_aligned);
67 xChunk3.load(x + 3 * vec_t::width, is_not_aligned);
68
69 // z = x + y
70 vec_t zChunk0 = xChunk0 + yChunk0;
71 vec_t zChunk1 = xChunk1 + yChunk1;
72 vec_t zChunk2 = xChunk2 + yChunk2;
73 vec_t zChunk3 = xChunk3 + yChunk3;
74
75 // store
76 zChunk0.store(z, is_not_aligned);
77 zChunk1.store(z + vec_t::width, is_not_aligned);
78 zChunk2.store(z + 2 * vec_t::width, is_not_aligned);
79 zChunk3.store(z + 3 * vec_t::width, is_not_aligned);
80
81 // update pointers
82 x += 4 * vec_t::width;
83 y += 4 * vec_t::width;
84 z += 4 * vec_t::width;
85 cnt -= 4 * vec_t::width;
86 }
87
88 // Vectorized loop unroll 2x
89 while (cnt >= 2 * vec_t::width)
90 {
91 // load
92 vec_t yChunk0, yChunk1;
93 yChunk0.load(y, is_not_aligned);
94 yChunk1.load(y + vec_t::width, is_not_aligned);
95
96 vec_t xChunk0, xChunk1;
97 xChunk0.load(x, is_not_aligned);
98 xChunk1.load(x + vec_t::width, is_not_aligned);
99
100 // z = x + y
101 vec_t zChunk0 = xChunk0 + yChunk0;
102 vec_t zChunk1 = xChunk1 + yChunk1;
103
104 // store
105 zChunk0.store(z, is_not_aligned);
106 zChunk1.store(z + vec_t::width, is_not_aligned);
107
108 // update pointers
109 x += 2 * vec_t::width;
110 y += 2 * vec_t::width;
111 z += 2 * vec_t::width;
112 cnt -= 2 * vec_t::width;
113 }
114
115 // Vectorized loop
116 while (cnt >= vec_t::width)
117 {
118 // load
119 vec_t yChunk;
120 yChunk.load(y, is_not_aligned);
121 vec_t xChunk;
122 xChunk.load(x, is_not_aligned);
123
124 // z = x + y
125 vec_t zChunk = xChunk + yChunk;
126
127 // store
128 zChunk.store(z, is_not_aligned);
129
130 // update pointers
131 x += vec_t::width;
132 y += vec_t::width;
133 z += vec_t::width;
134 cnt -= vec_t::width;
135 }
136
137 // spillover loop
138 while (cnt)
139 {
140 // z = x + y;
141 *z = (*x) + (*y);
142 // update pointers
143 ++x;
144 ++y;
145 ++z;
146 --cnt;
147 }
148}

References tinysimd::is_not_aligned, and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vadd().

◆ Vmul()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vmul ( const size_t  n,
const T *  x,
const T *  y,
T *  z 
)

Multiply vector z = x * y.

Definition at line 153 of file VmathSIMD.hpp.

154{
155 using namespace tinysimd;
156 using vec_t = simd<T>;
157
158 size_t cnt = n;
159 // Vectorized loop unroll 4x
160 while (cnt >= 4 * vec_t::width)
161 {
162 // load
163 vec_t yChunk0, yChunk1, yChunk2, yChunk3;
164 yChunk0.load(y, is_not_aligned);
165 yChunk1.load(y + vec_t::width, is_not_aligned);
166 yChunk2.load(y + 2 * vec_t::width, is_not_aligned);
167 yChunk3.load(y + 3 * vec_t::width, is_not_aligned);
168
169 vec_t xChunk0, xChunk1, xChunk2, xChunk3;
170 xChunk0.load(x, is_not_aligned);
171 xChunk1.load(x + vec_t::width, is_not_aligned);
172 xChunk2.load(x + 2 * vec_t::width, is_not_aligned);
173 xChunk3.load(x + 3 * vec_t::width, is_not_aligned);
174
175 // z = x * y
176 vec_t zChunk0 = xChunk0 * yChunk0;
177 vec_t zChunk1 = xChunk1 * yChunk1;
178 vec_t zChunk2 = xChunk2 * yChunk2;
179 vec_t zChunk3 = xChunk3 * yChunk3;
180
181 // store
182 zChunk0.store(z, is_not_aligned);
183 zChunk1.store(z + vec_t::width, is_not_aligned);
184 zChunk2.store(z + 2 * vec_t::width, is_not_aligned);
185 zChunk3.store(z + 3 * vec_t::width, is_not_aligned);
186
187 // update pointers
188 x += 4 * vec_t::width;
189 y += 4 * vec_t::width;
190 z += 4 * vec_t::width;
191 cnt -= 4 * vec_t::width;
192 }
193
194 // Vectorized loop unroll 2x
195 while (cnt >= 2 * vec_t::width)
196 {
197 // load
198 vec_t yChunk0, yChunk1;
199 yChunk0.load(y, is_not_aligned);
200 yChunk1.load(y + vec_t::width, is_not_aligned);
201
202 vec_t xChunk0, xChunk1;
203 xChunk0.load(x, is_not_aligned);
204 xChunk1.load(x + vec_t::width, is_not_aligned);
205
206 // z = x * y
207 vec_t zChunk0 = xChunk0 * yChunk0;
208 vec_t zChunk1 = xChunk1 * yChunk1;
209
210 // store
211 zChunk0.store(z, is_not_aligned);
212 zChunk1.store(z + vec_t::width, is_not_aligned);
213
214 // update pointers
215 x += 2 * vec_t::width;
216 y += 2 * vec_t::width;
217 z += 2 * vec_t::width;
218 cnt -= 2 * vec_t::width;
219 }
220
221 // Vectorized loop
222 while (cnt >= vec_t::width)
223 {
224 // load
225 vec_t yChunk;
226 yChunk.load(y, is_not_aligned);
227 vec_t xChunk;
228 xChunk.load(x, is_not_aligned);
229
230 // z = x * y
231 vec_t zChunk = xChunk * yChunk;
232
233 // store
234 zChunk.store(z, is_not_aligned);
235
236 // update pointers
237 x += vec_t::width;
238 y += vec_t::width;
239 z += vec_t::width;
240 cnt -= vec_t::width;
241 }
242
243 // spillover loop
244 while (cnt)
245 {
246 // z = x * y;
247 *z = (*x) * (*y);
248 // update pointers
249 ++x;
250 ++y;
251 ++z;
252 --cnt;
253 }
254}

References tinysimd::is_not_aligned, and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vmul().

◆ Vvtvm()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvm ( const size_t  n,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)

vvtvm (vector times vector minus vector): z = w*x - y

Definition at line 307 of file VmathSIMD.hpp.

308{
309 using namespace tinysimd;
310 using vec_t = simd<T>;
311
312 size_t cnt = n;
313 // Vectorized loop
314 while (cnt >= vec_t::width)
315 {
316 // load
317 vec_t wChunk;
318 wChunk.load(w, is_not_aligned);
319 vec_t yChunk;
320 yChunk.load(y, is_not_aligned);
321 vec_t xChunk;
322 xChunk.load(x, is_not_aligned);
323
324 // z = w * x - y
325 vec_t zChunk = wChunk * xChunk - yChunk;
326
327 // store
328 zChunk.store(z, is_not_aligned);
329
330 // update pointers
331 w += vec_t::width;
332 x += vec_t::width;
333 y += vec_t::width;
334 z += vec_t::width;
335 cnt -= vec_t::width;
336 }
337
338 // spillover loop
339 while (cnt)
340 {
341 // z = w * x - y;
342 *z = (*w) * (*x) - (*y);
343 // update pointers
344 ++w;
345 ++x;
346 ++y;
347 ++z;
348 --cnt;
349 }
350}
std::vector< double > w(NPUPPER)

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Vmath::Vvtvm().

◆ Vvtvp()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvp ( const size_t  n,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)

vvtvp (vector times vector plus vector): z = w*x + y

Definition at line 259 of file VmathSIMD.hpp.

260{
261 using namespace tinysimd;
262 using vec_t = simd<T>;
263
264 size_t cnt = n;
265 // Vectorized loop
266 while (cnt >= vec_t::width)
267 {
268 // load
269 vec_t wChunk;
270 wChunk.load(w, is_not_aligned);
271 vec_t yChunk;
272 yChunk.load(y, is_not_aligned);
273 vec_t xChunk;
274 xChunk.load(x, is_not_aligned);
275
276 // z = w * x + y
277 vec_t zChunk = wChunk * xChunk + yChunk;
278
279 // store
280 zChunk.store(z, is_not_aligned);
281
282 // update pointers
283 w += vec_t::width;
284 x += vec_t::width;
285 y += vec_t::width;
286 z += vec_t::width;
287 cnt -= vec_t::width;
288 }
289
290 // spillover loop
291 while (cnt)
292 {
293 // z = w * x + y;
294 *z = (*w) * (*x) + (*y);
295 // update pointers
296 ++w;
297 ++x;
298 ++y;
299 ++z;
300 --cnt;
301 }
302}

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vvtvp().

◆ Vvtvvtm()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvvtm ( const size_t  n,
const T *  v,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)
inline

vvtvvtm (vector times vector minus vector times vector):

Definition at line 414 of file VmathSIMD.hpp.

416{
417 using namespace tinysimd;
418 using vec_t = simd<T>;
419
420 size_t cnt = n;
421 // Vectorized loop
422 while (cnt >= vec_t::width)
423 {
424 // load
425 vec_t vChunk;
426 vChunk.load(v, is_not_aligned);
427 vec_t wChunk;
428 wChunk.load(w, is_not_aligned);
429 vec_t yChunk;
430 yChunk.load(y, is_not_aligned);
431 vec_t xChunk;
432 xChunk.load(x, is_not_aligned);
433
434 // z = v * w + x * y;
435 vec_t z1Chunk = vChunk * wChunk;
436 vec_t z2Chunk = xChunk * yChunk;
437 vec_t zChunk = z1Chunk - z2Chunk;
438
439 // store
440 zChunk.store(z, is_not_aligned);
441
442 // update pointers
443 v += vec_t::width;
444 w += vec_t::width;
445 x += vec_t::width;
446 y += vec_t::width;
447 z += vec_t::width;
448 cnt -= vec_t::width;
449 }
450
451 // spillover loop
452 while (cnt)
453 {
454 // z = v * w + x * y;
455 T z1 = (*v) * (*w);
456 T z2 = (*x) * (*y);
457 *z = z1 - z2;
458 // update pointers
459 ++v;
460 ++w;
461 ++x;
462 ++y;
463 ++z;
464 --cnt;
465 }
466}

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Vmath::Vvtvvtm().

◆ Vvtvvtp()

template<class T , typename = typename std::enable_if< std::is_floating_point<T>::value>::type>
void Vmath::SIMD::Vvtvvtp ( const size_t  n,
const T *  v,
const T *  w,
const T *  x,
const T *  y,
T *  z 
)
inline

vvtvvtp (vector times vector plus vector times vector):

Definition at line 356 of file VmathSIMD.hpp.

358{
359 using namespace tinysimd;
360 using vec_t = simd<T>;
361
362 size_t cnt = n;
363 // Vectorized loop
364 while (cnt >= vec_t::width)
365 {
366 // load
367 vec_t vChunk;
368 vChunk.load(v, is_not_aligned);
369 vec_t wChunk;
370 wChunk.load(w, is_not_aligned);
371 vec_t yChunk;
372 yChunk.load(y, is_not_aligned);
373 vec_t xChunk;
374 xChunk.load(x, is_not_aligned);
375
376 // z = v * w + x * y;
377 vec_t z1Chunk = vChunk * wChunk;
378 vec_t z2Chunk = xChunk * yChunk;
379 vec_t zChunk = z1Chunk + z2Chunk;
380
381 // store
382 zChunk.store(z, is_not_aligned);
383
384 // update pointers
385 v += vec_t::width;
386 w += vec_t::width;
387 x += vec_t::width;
388 y += vec_t::width;
389 z += vec_t::width;
390 cnt -= vec_t::width;
391 }
392
393 // spillover loop
394 while (cnt)
395 {
396 // z = v * w + x * y;
397 T z1 = (*v) * (*w);
398 T z2 = (*x) * (*y);
399 *z = z1 + z2;
400 // update pointers
401 ++v;
402 ++w;
403 ++x;
404 ++y;
405 ++z;
406 --cnt;
407 }
408}

References tinysimd::is_not_aligned, Nektar::UnitTests::w(), and Nektar::UnitTests::z().

Referenced by Nektar::VmathSIMDUnitTests::BOOST_AUTO_TEST_CASE(), and Vmath::Vvtvvtp().