Nektar++
TestSimdLibDouble.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: TestSimdLibDouble.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20/// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description:
32//
33///////////////////////////////////////////////////////////////////////////////
34
37
38#include <boost/core/ignore_unused.hpp>
39#include <boost/test/tools/floating_point_comparison.hpp>
40#include <boost/test/unit_test.hpp>
41
42#include <array>
43#include <cmath>
44#include <iostream>
45
46// define types in use and number of lanes
47#define NUM_LANES_64BITS 1
48#define ALIGNMENT 8
49#define USING_SCALAR
50#if defined(__x86_64__)
51#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
52#define USING_AVX512
53#undef NUM_LANES_64BITS
54#define NUM_LANES_64BITS 8
55#undef ALIGNMENT
56#define ALIGNMENT 64
57#undef USING_SCALAR
58#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
59#define USING_AVX2
60#undef NUM_LANES_64BITS
61#define NUM_LANES_64BITS 4
62#undef ALIGNMENT
63#define ALIGNMENT 32
64#undef USING_SCALAR
65#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
66#define USING_SSE2
67#undef NUM_LANES_64BITS
68#define NUM_LANES_64BITS 2
69#undef ALIGNMENT
70#define ALIGNMENT 16
71#undef USING_SCALAR
72#endif
73#endif
74#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
75#define USING_SVE
76#undef NUM_LANES_64BITS
77#define NUM_LANES_64BITS __ARM_FEATURE_SVE_BITS / 64
78#undef ALIGNMENT
79#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 8
80#undef USING_SCALAR
81#endif
82
83namespace Nektar
84{
85namespace SimdLibTests
86{
87using namespace tinysimd;
89
90BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
91{
92 std::size_t width, alignment;
93
94#if defined(USING_SCALAR)
95 std::cout << "scalar double" << std::endl;
96#endif
97#if defined(USING_AVX2)
98 std::cout << "avx2 double" << std::endl;
99#endif
100#if defined(USING_AVX512)
101 std::cout << "avx512 double" << std::endl;
102#endif
103#if defined(USING_SVE)
104 std::cout << "sve double" << std::endl;
105#endif
106
107 // double
108 width = simd<double>::width;
109 alignment = simd<double>::alignment;
110 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
111 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
112 // std::int32_t index forcing # of lanes
115 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
116 // std::int64_t index forcing # of lanes
119 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
120 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
121 // std::int64_t default index
124 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
125 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
126}
127
128BOOST_AUTO_TEST_CASE(SimdLibDouble_type_traits)
129{
130 {
131 using namespace details;
132
133 BOOST_CHECK_EQUAL(has_width<double>::value, false);
134 BOOST_CHECK_EQUAL(has_width<vec_t>::value, true);
135
136 BOOST_CHECK_EQUAL(has_alignment<double>::value, false);
137 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value, true);
138
139 BOOST_CHECK_EQUAL(has_scalarType<double>::value, false);
140 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value, true);
141 }
142
143 BOOST_CHECK_EQUAL(is_vector<double>::value, false);
144 BOOST_CHECK_EQUAL(is_vector<vec_t>::value, true);
145
146 BOOST_CHECK_EQUAL(is_vector_floating_point<double>::value, false);
147 BOOST_CHECK_EQUAL(is_vector_floating_point<simd<int>>::value, false);
148 BOOST_CHECK_EQUAL(is_vector_floating_point<vec_t>::value, true);
149}
150
151BOOST_AUTO_TEST_CASE(SimdLibDouble_mem_size)
152{
153 BOOST_CHECK_EQUAL(sizeof(vec_t), sizeof(double) * vec_t::width);
154}
155
156BOOST_AUTO_TEST_CASE(SimdLibDouble_ctors)
157{
158 vec_t avec1;
159
160 vec_t::scalarType ascalar = 0;
161 vec_t avec2(ascalar);
162 vec_t avec3{ascalar};
163 vec_t avec4 = ascalar;
164
165 vec_t avec5(avec2);
166 vec_t avec6{avec4};
167
168 vec_t avec7(avec2._data);
169 vec_t avec8{avec2._data};
170
171 vec_t::vectorType anative;
172 vec_t avec9(anative);
173 vec_t avec10{anative};
174
175 boost::ignore_unused(avec1, avec3, avec5, avec6, avec7, avec8, avec9,
176 avec10);
177}
178
179BOOST_AUTO_TEST_CASE(SimdLibDouble_load)
180{
181 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
182 {}}; // double brace to deal with gcc 4.8.5 ...
183 vec_t avec;
184 avec.load(ascalararr.data());
185}
186
187BOOST_AUTO_TEST_CASE(SimdLibDouble_load_implicit)
188{
189 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
190 {}}; // double brace to deal with gcc 4.8.5 ...
191 vec_t avec;
192 avec = *(reinterpret_cast<vec_t *>(ascalararr.data()));
193}
194
195BOOST_AUTO_TEST_CASE(SimdLibDouble_load_aligned)
196{
197 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
198 {}}; // double brace to deal with gcc 4.8.5 ...
199 vec_t avec;
200 avec.load(ascalararr.data(), is_aligned);
201}
202
203BOOST_AUTO_TEST_CASE(SimdLibDouble_load_unaligned)
204{
205 std::array<double, vec_t::width> ascalararr{
206 {}}; // double brace to deal with gcc 4.8.5 ...
207 vec_t avec;
208 avec.load(ascalararr.data(), is_not_aligned);
209}
210
211BOOST_AUTO_TEST_CASE(SimdLibInt64_load)
212{
213 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
214 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
216 aindex.load(ascalararr.data());
217}
218
219BOOST_AUTO_TEST_CASE(SimdLibInt64_load_aligned)
220{
221 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
222 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
224 aindex.load(ascalararr.data(), is_aligned);
225}
226
227BOOST_AUTO_TEST_CASE(SimdLibInt64_load_unaligned)
228{
229 std::array<std::uint64_t, vec_t::width> ascalararr{
230 {}}; // double brace to deal with gcc 4.8.5 ...
232 aindex.load(ascalararr.data(), is_not_aligned);
233}
234
235BOOST_AUTO_TEST_CASE(SimdLibInt32_load)
236{
237 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
238 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
240 aindex.load(ascalararr.data());
241}
242
243BOOST_AUTO_TEST_CASE(SimdLibInt32_load_aligned)
244{
245 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
246 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
248 aindex.load(ascalararr.data(), is_aligned);
249}
250
251BOOST_AUTO_TEST_CASE(SimdLibInt32_load_unaligned)
252{
253 std::array<std::uint32_t, vec_t::width> ascalararr{
254 {}}; // double brace to deal with gcc 4.8.5 ...
256 aindex.load(ascalararr.data(), is_not_aligned);
257}
258
259BOOST_AUTO_TEST_CASE(SimdLibDouble_store)
260{
261 double val = 4.0;
262 vec_t avec(val);
263 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
264 {}}; // double brace to deal with gcc 4.8.5 ...
265 avec.store(ascalararr.data());
266
267 for (size_t i = 0; i < vec_t::width; ++i)
268 {
269 BOOST_CHECK_EQUAL(ascalararr[i], val);
270 }
271}
272
273BOOST_AUTO_TEST_CASE(SimdLibDouble_store_aligned)
274{
275 double val = 4.0;
276 vec_t avec(val);
277 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
278 {}}; // double brace to deal with gcc 4.8.5 ...
279 avec.store(ascalararr.data(), is_aligned);
280
281 for (size_t i = 0; i < vec_t::width; ++i)
282 {
283 BOOST_CHECK_EQUAL(ascalararr[i], val);
284 }
285}
286
287BOOST_AUTO_TEST_CASE(SimdLibDouble_store_unaligned)
288{
289 double val = 4.0;
290 vec_t avec(val);
291 std::array<double, vec_t::width> ascalararr{
292 {}}; // double brace to deal with gcc 4.8.5 ...
293 avec.store(ascalararr.data(), is_not_aligned);
294
295 for (size_t i = 0; i < vec_t::width; ++i)
296 {
297 BOOST_CHECK_EQUAL(ascalararr[i], val);
298 }
299}
300
301BOOST_AUTO_TEST_CASE(SimdLibDouble_store_non_temporal)
302{
303 double val = 4.0;
304 vec_t avec(val);
305 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
306 {}}; // double brace to deal with gcc 4.8.5 ...
307 avec.store(ascalararr.data(), is_not_reused);
308
309 for (size_t i = 0; i < vec_t::width; ++i)
310 {
311 BOOST_CHECK_EQUAL(ascalararr[i], val);
312 }
313}
314
315BOOST_AUTO_TEST_CASE(SimdLibDouble_broadcast)
316{
317 vec_t::scalarType ascalar{3.333};
318 vec_t avec;
319 avec.broadcast(ascalar);
320}
321
322BOOST_AUTO_TEST_CASE(SimdLibDouble_subscript_assign_read)
323{
324 vec_t avec;
325 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
326 {}}; // double brace to deal with gcc 4.8.5 ...
327
328 for (size_t i = 0; i < vec_t::width; ++i)
329 {
330 ascalararr[i] = i;
331 }
332
333 for (size_t i = 0; i < vec_t::width; ++i)
334 {
335 avec[i] = ascalararr[i];
336 }
337
338 for (size_t i = 0; i < vec_t::width; ++i)
339 {
340 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
341 }
342}
343
344BOOST_AUTO_TEST_CASE(SimdLibDouble_gather64)
345{
346 vec_t avec;
347 using index_t = simd<size_t>;
348 index_t aindexvec;
349
350 // create and fill index
351 std::array<size_t, vec_t::width> aindex;
352 aindex[0] = 0;
353 if (vec_t::width > 2)
354 {
355 aindex[1] = 3;
356 aindex[2] = 5;
357 aindex[3] = 6;
358 }
359 if (vec_t::width > 4)
360 {
361 aindex[4] = 8;
362 aindex[5] = 15;
363 aindex[6] = 16;
364 aindex[7] = 20;
365 }
366
367 // load index
368 aindexvec.load(aindex.data(), is_not_aligned);
369
370 // create and fill scalar array
371 constexpr size_t scalarArraySize = 32;
372 std::array<double, scalarArraySize> ascalararr;
373 for (size_t i = 0; i < scalarArraySize; ++i)
374 {
375 ascalararr[i] = i;
376 }
377
378 avec.gather(ascalararr.data(), aindexvec);
379
380 // check
381 for (size_t i = 0; i < vec_t::width; ++i)
382 {
383 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
384 }
385}
386
387BOOST_AUTO_TEST_CASE(SimdLibDouble_gather32)
388{
389 vec_t avec;
390 using index_t = simd<std::uint32_t, vec_t::width>;
391 index_t aindexvec;
392
393 // create and fill index
394 std::array<std::uint32_t, vec_t::width> aindex;
395 aindex[0] = 0;
396 if (vec_t::width > 2)
397 {
398 aindex[1] = 3;
399 aindex[2] = 5;
400 aindex[3] = 6;
401 }
402 if (vec_t::width > 4)
403 {
404 aindex[4] = 8;
405 aindex[5] = 15;
406 aindex[6] = 16;
407 aindex[7] = 20;
408 }
409
410 // load index
411 aindexvec.load(aindex.data(), is_not_aligned);
412
413 // create and fill scalar array
414 constexpr size_t scalarArraySize = 32;
415 std::array<double, scalarArraySize> ascalararr;
416 for (size_t i = 0; i < scalarArraySize; ++i)
417 {
418 ascalararr[i] = i;
419 }
420
421 avec.gather(ascalararr.data(), aindexvec);
422
423 // check
424 for (size_t i = 0; i < vec_t::width; ++i)
425 {
426 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
427 }
428}
429
430BOOST_AUTO_TEST_CASE(SimdLibDouble_scatter64)
431{
432 vec_t avec;
433 using index_t = simd<size_t>;
434 index_t aindexvec;
435
436 // create and fill index
437 std::array<size_t, vec_t::width> aindex;
438 aindex[0] = 1;
439 if (vec_t::width > 1)
440 {
441 aindex[1] = 3;
442 }
443 if (vec_t::width > 2)
444 {
445 aindex[2] = 5;
446 aindex[3] = 6;
447 }
448 if (vec_t::width > 4)
449 {
450 aindex[4] = 8;
451 aindex[5] = 15;
452 aindex[6] = 20;
453 aindex[7] = 30;
454 }
455
456 // load index
457 aindexvec.load(aindex.data(), is_not_aligned);
458
459 // create scalar array
460 constexpr size_t scalarArraySize = 32;
461 std::array<double, scalarArraySize> ascalararr;
462
463 // fill vector
464 alignas(vec_t::alignment) std::array<double, vec_t::width> avecarr{{}};
465 avecarr[0] = 10;
466 if (vec_t::width > 1)
467 {
468 avecarr[1] = 9;
469 }
470
471 if (vec_t::width > 2)
472 {
473 avecarr[2] = 8;
474 avecarr[3] = 7;
475 }
476 if (vec_t::width > 4)
477 {
478 avecarr[4] = 4;
479 avecarr[5] = 3;
480 avecarr[6] = 2;
481 avecarr[7] = 1;
482 }
483 avec.load(avecarr.data());
484
485 avec.scatter(ascalararr.data(), aindexvec);
486
487 // check
488 for (size_t i = 0; i < vec_t::width; ++i)
489 {
490 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
491 }
492}
493
494BOOST_AUTO_TEST_CASE(SimdLibDouble_add_unary)
495{
496 double val1 = -4.0;
497 double val2 = 2.0;
498 vec_t res(val1);
499 vec_t avec(val2);
500 res += avec;
501 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
502 {}}; // double brace to deal with gcc 4.8.5 ...
503 res.store(ascalararr.data());
504
505 for (size_t i = 0; i < vec_t::width; ++i)
506 {
507 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
508 }
509}
510
511BOOST_AUTO_TEST_CASE(SimdLibDouble_sub_unary)
512{
513 double val1 = -4.0;
514 double val2 = 2.0;
515 vec_t res(val1);
516 vec_t avec(val2);
517 res -= avec;
518 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
519 {}}; // double brace to deal with gcc 4.8.5 ...
520 res.store(ascalararr.data());
521
522 for (size_t i = 0; i < vec_t::width; ++i)
523 {
524 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
525 }
526}
527
528BOOST_AUTO_TEST_CASE(SimdLibDouble_mul_unary)
529{
530 double val1 = -4.0;
531 double val2 = 2.0;
532 vec_t res(val1);
533 vec_t avec(val2);
534 res *= avec;
535 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
536 {}}; // double brace to deal with gcc 4.8.5 ...
537 res.store(ascalararr.data());
538
539 for (size_t i = 0; i < vec_t::width; ++i)
540 {
541 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
542 }
543}
544
545BOOST_AUTO_TEST_CASE(SimdLibDouble_div_unary)
546{
547 double val1 = -4.0;
548 double val2 = 2.0;
549 vec_t res(val1);
550 vec_t avec(val2);
551 res /= avec;
552 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
553 {}}; // double brace to deal with gcc 4.8.5 ...
554 res.store(ascalararr.data());
555
556 for (size_t i = 0; i < vec_t::width; ++i)
557 {
558 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
559 }
560}
561
562BOOST_AUTO_TEST_CASE(SimdLibDouble_add_binary)
563{
564 double val1 = -4.0;
565 double val2 = 2.5;
566 vec_t avec1(val1);
567 vec_t avec2(val2);
568 vec_t res = avec1 + avec2;
569 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
570 {}}; // double brace to deal with gcc 4.8.5 ...
571 res.store(ascalararr.data());
572
573 for (size_t i = 0; i < vec_t::width; ++i)
574 {
575 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
576 }
577}
578
579BOOST_AUTO_TEST_CASE(SimdLibDouble_sub_binary)
580{
581 double val1 = -4.0;
582 double val2 = 2.5;
583 vec_t avec1(val1);
584 vec_t avec2(val2);
585 vec_t res = avec1 - avec2;
586 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
587 {}}; // double brace to deal with gcc 4.8.5 ...
588 res.store(ascalararr.data());
589
590 for (size_t i = 0; i < vec_t::width; ++i)
591 {
592 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
593 }
594}
595
596BOOST_AUTO_TEST_CASE(SimdLibDouble_mul_binary)
597{
598 double val1 = -4.0;
599 double val2 = 2.5;
600 vec_t avec1(val1);
601 vec_t avec2(val2);
602 vec_t res = avec1 * avec2;
603 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
604 {}}; // double brace to deal with gcc 4.8.5 ...
605 res.store(ascalararr.data());
606
607 for (size_t i = 0; i < vec_t::width; ++i)
608 {
609 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
610 }
611}
612
613BOOST_AUTO_TEST_CASE(SimdLibDouble_div_binary)
614{
615 double val1 = -4.0;
616 double val2 = 2.5;
617 vec_t avec1(val1);
618 vec_t avec2(val2);
619 vec_t res = avec1 / avec2;
620 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
621 {}}; // double brace to deal with gcc 4.8.5 ...
622 res.store(ascalararr.data());
623
624 for (size_t i = 0; i < vec_t::width; ++i)
625 {
626 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
627 }
628}
629
630BOOST_AUTO_TEST_CASE(SimdLibDouble_add_mul)
631{
632 double val1 = -4.0;
633 double val2 = 1.5;
634 double val3 = 5.0;
635 vec_t avec1(val1);
636 vec_t avec2(val2);
637 vec_t avec3(val3);
638 vec_t res = avec1 + avec2 * avec3;
639 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
640 {}}; // double brace to deal with gcc 4.8.5 ...
641 res.store(ascalararr.data());
642
643 for (size_t i = 0; i < vec_t::width; ++i)
644 {
645 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
646 }
647}
648
649BOOST_AUTO_TEST_CASE(SimdLibDouble_fused_add_mul)
650{
651 double val1 = -4.0;
652 double val2 = 1.5;
653 double val3 = 5.0;
654 vec_t avec1(val1);
655 vec_t avec2(val2);
656 vec_t avec3(val3);
657 avec1.fma(avec2, avec3);
658 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
659 {}}; // double brace to deal with gcc 4.8.5 ...
660 avec1.store(ascalararr.data());
661
662 for (size_t i = 0; i < vec_t::width; ++i)
663 {
664 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
665 }
666}
667
668BOOST_AUTO_TEST_CASE(SimdLibDouble_sqrt)
669{
670 double val = 4.0;
671 vec_t avec(val);
672 vec_t asqrt = sqrt(avec);
673 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
674 {}}; // double brace to deal with gcc 4.8.5 ...
675 asqrt.store(ascalararr.data());
676
677 for (size_t i = 0; i < vec_t::width; ++i)
678 {
679 BOOST_CHECK_EQUAL(ascalararr[i], std::sqrt(val));
680 }
681}
682
683BOOST_AUTO_TEST_CASE(SimdLibDouble_abs)
684{
685 double val = -4.0;
686 vec_t avec(val);
687 vec_t aabs = abs(avec);
688 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
689 {}}; // double brace to deal with gcc 4.8.5 ...
690 aabs.store(ascalararr.data());
691
692 for (size_t i = 0; i < vec_t::width; ++i)
693 {
694 BOOST_CHECK_EQUAL(ascalararr[i], std::abs(val));
695 }
696}
697
698BOOST_AUTO_TEST_CASE(SimdLibDouble_log)
699{
700 double val = 4.0;
701 vec_t avec(val);
702 vec_t alog = log(avec);
703 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
704 {}}; // double brace to deal with gcc 4.8.5 ...
705 alog.store(ascalararr.data());
706
707 for (size_t i = 0; i < vec_t::width; ++i)
708 {
709 BOOST_CHECK_EQUAL(ascalararr[i], std::log(val));
710 }
711}
712
713BOOST_AUTO_TEST_CASE(SimdLibDouble_greater)
714{
715 double aval = 4.0;
716 vec_t avec(aval);
717 using mask_t = simd<bool, vec_t::width>;
718 mask_t amask;
719
720 amask = avec > avec;
721 // check
722 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
723 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
724 amask.store(ascalararr.data());
725 for (size_t i = 0; i < vec_t::width; ++i)
726 {
727 // type conversion make lvalue in rvalue, needed pre-c++17
728 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
729 }
730
731 double bval = 3.0;
732 vec_t bvec(bval);
733
734 amask = avec > bvec;
735 // check
736 amask.store(ascalararr.data());
737 for (size_t i = 0; i < vec_t::width; ++i)
738 {
739 // type conversion make lvalue in rvalue, needed pre-c++17
740 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::true_v);
741 }
742
743 double cval = 5.0;
744 vec_t cvec(cval);
745
746 amask = avec > cvec;
747 // check
748 amask.store(ascalararr.data());
749 for (size_t i = 0; i < vec_t::width; ++i)
750 {
751 // type conversion make lvalue in rvalue, needed pre-c++17
752 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
753 }
754
755 if (vec_t::width == 4)
756 {
757 alignas(vec_t::alignment) std::array<double, 4> ascalararr2{
758 {1.0, 2.0, 3.0, 4.0}}; // double brace to deal with gcc 4.8.5 ...
759 double dval = 2.0;
760 vec_t dvec(dval);
761 vec_t evec;
762 evec.load(ascalararr2.data());
763
764 amask = dvec > evec;
765 // check
766 for (size_t i = 0; i < vec_t::width; ++i)
767 {
768 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]),
769 dval > ascalararr2[i]);
770 }
771 }
772
773 if (vec_t::width == 8)
774 {
775 alignas(vec_t::alignment) std::array<double, 8> ascalararr2{
776 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
777 1.0}}; // double brace to deal with gcc 4.8.5 ...
778 double dval = 2.0;
779 vec_t dvec(dval);
780 vec_t evec;
781 evec.load(ascalararr2.data());
782
783 amask = dvec > evec;
784 // check
785 for (size_t i = 0; i < vec_t::width; ++i)
786 {
787 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]),
788 dval > ascalararr2[i]);
789 }
790 }
791}
792
793BOOST_AUTO_TEST_CASE(SimdLibDouble_logic_and)
794{
795 double aval = 4.0;
796 vec_t avec(aval);
797 using mask_t = simd<bool, vec_t::width>;
798 mask_t amask;
799
800 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
801 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
802 for (size_t i = 0; i < vec_t::width; ++i)
803 {
804 ascalararr[i] = mask_t::true_v;
805 }
806 amask.load(ascalararr.data());
807
808 // check
809 BOOST_CHECK_EQUAL(amask && false, false);
810 BOOST_CHECK_EQUAL(amask && true, true);
811
812 for (size_t i = 0; i < vec_t::width; ++i)
813 {
814 ascalararr[i] = mask_t::false_v;
815 }
816 amask.load(ascalararr.data());
817
818 // check
819 BOOST_CHECK_EQUAL(amask && false, false);
820 BOOST_CHECK_EQUAL(amask && true, false);
821
822 if (vec_t::width > 1)
823 {
824 ascalararr[0] = mask_t::true_v;
825 // check
826 BOOST_CHECK_EQUAL(amask && false, false);
827 BOOST_CHECK_EQUAL(amask && true, false);
828 }
829}
830
831BOOST_AUTO_TEST_CASE(SimdLibDouble_load_interleave_unload)
832{
833 constexpr size_t nDof{5};
834 // no padding in load_interleave deinterleave_store
835 constexpr size_t nEle{vec_t::width * 5};
836 constexpr size_t nDofBlock = nDof * vec_t::width;
837
838 constexpr size_t size{nDof * nEle};
839 std::array<double, size> dofScalarArr{{}};
840 for (size_t i = 0; i < size; ++i)
841 {
842 dofScalarArr[i] = i;
843 }
844
845 // number of blocks
846 size_t nBlock = nEle / vec_t::width;
847
848 // aligned vector
849 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
850
851 double *dataPtr = dofScalarArr.data();
852 // loop over blocks vec_t::width elements at the time
853 for (size_t b = 0; b < nBlock; ++b)
854 {
855 // load
856 load_interleave(dataPtr, nDof, dofVectorArr);
857
858 // manipulate each block
859 for (size_t j = 0; j < nDof; ++j)
860 {
861 dofVectorArr[j] = dofVectorArr[j] + j;
862 }
863
864 // store
865 deinterleave_store(dofVectorArr, nDof, dataPtr);
866 dataPtr += nDofBlock;
867 }
868
869 // check
870 for (size_t b = 0, i = 0; b < nBlock; ++b)
871 {
872 for (size_t j = 0; j < nDof; ++j, ++i)
873 {
874 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
875 }
876 }
877}
878
879BOOST_AUTO_TEST_CASE(SimdLibDouble_io)
880{
881 vec_t avec(3.14);
882 std::cout << avec << std::endl;
883}
884
885} // namespace SimdLibTests
886} // namespace Nektar
#define ALIGNMENT
#define NUM_LANES_64BITS
The above copyright notice and this permission notice shall be included.
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
void load_interleave(const T *in, size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
Definition: scalar.hpp:309
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80
scalarT< T > log(scalarT< T > in)
Definition: scalar.hpp:303
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:294
static constexpr struct tinysimd::is_not_reused_t is_not_reused
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, size_t dataLen, T *out)
Definition: scalar.hpp:319