Nektar++
Loading...
Searching...
No Matches
TestSimdLibDouble.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: TestSimdLibDouble.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20/// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description:
32//
33///////////////////////////////////////////////////////////////////////////////
34
37
38#include <boost/test/tools/floating_point_comparison.hpp>
39#include <boost/test/unit_test.hpp>
40
41#include <array>
42#include <cmath>
43#include <iostream>
44
45// define types in use and number of lanes
46#define NUM_LANES_64BITS 1
47#define ALIGNMENT 8
48#define USING_SCALAR
49#if defined(__x86_64__)
50#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
51#define USING_AVX512
52#undef NUM_LANES_64BITS
53#define NUM_LANES_64BITS 8
54#undef ALIGNMENT
55#define ALIGNMENT 64
56#undef USING_SCALAR
57#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
58#define USING_AVX2
59#undef NUM_LANES_64BITS
60#define NUM_LANES_64BITS 4
61#undef ALIGNMENT
62#define ALIGNMENT 32
63#undef USING_SCALAR
64#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
65#define USING_SSE2
66#undef NUM_LANES_64BITS
67#define NUM_LANES_64BITS 2
68#undef ALIGNMENT
69#define ALIGNMENT 16
70#undef USING_SCALAR
71#endif
72#endif
73#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
74#define USING_SVE
75#undef NUM_LANES_64BITS
76#define NUM_LANES_64BITS __ARM_FEATURE_SVE_BITS / 64
77#undef ALIGNMENT
78#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 8
79#undef USING_SCALAR
80#endif
81
83{
84using namespace tinysimd;
88
89BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
90{
91 std::size_t width, alignment;
92
93#if defined(USING_SCALAR)
94 std::cout << "scalar double" << std::endl;
95#endif
96#if defined(USING_SSE2)
97 std::cout << "sse2 double" << std::endl;
98#endif
99#if defined(USING_AVX2)
100 std::cout << "avx2 double" << std::endl;
101#endif
102#if defined(USING_AVX512)
103 std::cout << "avx512 double" << std::endl;
104#endif
105#if defined(USING_SVE)
106 std::cout << "sve double" << std::endl;
107#endif
108
109 // double
110 width = simd<double>::width;
111 alignment = simd<double>::alignment;
112 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
113 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
114 // std::int32_t index forcing # of lanes
117 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
118 // std::int64_t index forcing # of lanes
121 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
122 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
123 // std::int64_t default index
126 BOOST_CHECK_EQUAL(width, NUM_LANES_64BITS);
127 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
128}
129
130BOOST_AUTO_TEST_CASE(SimdLibDouble_type_traits)
131{
132 {
133 using namespace details;
134
135 BOOST_CHECK_EQUAL(has_width<double>::value, false);
136 BOOST_CHECK_EQUAL(has_width<vec_t>::value, true);
137
138 BOOST_CHECK_EQUAL(has_alignment<double>::value, false);
139 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value, true);
140
141 BOOST_CHECK_EQUAL(has_scalarType<double>::value, false);
142 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value, true);
143 }
144
145 BOOST_CHECK_EQUAL(is_vector_v<double>, false);
146 BOOST_CHECK_EQUAL(is_vector_v<vec_t>, true);
147
148 BOOST_CHECK_EQUAL(is_vector_floating_point_v<double>, false);
149 BOOST_CHECK_EQUAL(is_vector_floating_point_v<simd<int>>, false);
150 BOOST_CHECK_EQUAL(is_vector_floating_point_v<vec_t>, true);
151}
152
153BOOST_AUTO_TEST_CASE(SimdLibDouble_mem_size)
154{
155 BOOST_CHECK_EQUAL(sizeof(vec_t), sizeof(double) * vec_t::width);
156}
157
158BOOST_AUTO_TEST_CASE(SimdLibDouble_ctors)
159{
160 [[maybe_unused]] vec_t avec1;
161
162 vec_t::scalarType ascalar = 0;
163 vec_t avec2(ascalar);
164 [[maybe_unused]] vec_t avec3{ascalar};
165 vec_t avec4 = ascalar;
166
167 [[maybe_unused]] vec_t avec5(avec2);
168 [[maybe_unused]] vec_t avec6{avec4};
169
170 [[maybe_unused]] vec_t avec7(avec2._data);
171 [[maybe_unused]] vec_t avec8{avec2._data};
172
173 vec_t::vectorType anative;
174 [[maybe_unused]] vec_t avec9(anative);
175 [[maybe_unused]] vec_t avec10{anative};
176}
177
178BOOST_AUTO_TEST_CASE(SimdLibDouble_load)
179{
180 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
181 {}}; // double brace to deal with gcc 4.8.5 ...
182 vec_t avec;
183 avec.load(ascalararr.data());
184}
185
186BOOST_AUTO_TEST_CASE(SimdLibDouble_load_implicit)
187{
188 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
189 {}}; // double brace to deal with gcc 4.8.5 ...
190 vec_t avec;
191 avec = *(reinterpret_cast<vec_t *>(ascalararr.data()));
192}
193
194BOOST_AUTO_TEST_CASE(SimdLibDouble_load_aligned)
195{
196 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
197 {}}; // double brace to deal with gcc 4.8.5 ...
198 vec_t avec;
199 avec.load(ascalararr.data(), is_aligned);
200}
201
202BOOST_AUTO_TEST_CASE(SimdLibDouble_load_unaligned)
203{
204 std::array<double, vec_t::width> ascalararr{
205 {}}; // double brace to deal with gcc 4.8.5 ...
206 vec_t avec;
207 avec.load(ascalararr.data(), is_not_aligned);
208}
209
210BOOST_AUTO_TEST_CASE(SimdLibInt64_load)
211{
212 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
213 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
215 aindex.load(ascalararr.data());
216}
217
218BOOST_AUTO_TEST_CASE(SimdLibInt64_load_aligned)
219{
220 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
221 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
223 aindex.load(ascalararr.data(), is_aligned);
224}
225
226BOOST_AUTO_TEST_CASE(SimdLibInt64_load_unaligned)
227{
228 std::array<std::uint64_t, vec_t::width> ascalararr{
229 {}}; // double brace to deal with gcc 4.8.5 ...
231 aindex.load(ascalararr.data(), is_not_aligned);
232}
233
234BOOST_AUTO_TEST_CASE(SimdLibInt32_load)
235{
236 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
237 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
239 aindex.load(ascalararr.data());
240}
241
242BOOST_AUTO_TEST_CASE(SimdLibInt32_load_aligned)
243{
244 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
245 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
247 aindex.load(ascalararr.data(), is_aligned);
248}
249
250BOOST_AUTO_TEST_CASE(SimdLibInt32_load_unaligned)
251{
252 std::array<std::uint32_t, vec_t::width> ascalararr{
253 {}}; // double brace to deal with gcc 4.8.5 ...
255 aindex.load(ascalararr.data(), is_not_aligned);
256}
257
258BOOST_AUTO_TEST_CASE(SimdLibDouble_store)
259{
260 double val = 4.0;
261 vec_t avec(val);
262 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
263 {}}; // double brace to deal with gcc 4.8.5 ...
264 avec.store(ascalararr.data());
265
266 for (size_t i = 0; i < vec_t::width; ++i)
267 {
268 BOOST_CHECK_EQUAL(ascalararr[i], val);
269 }
270}
271
272BOOST_AUTO_TEST_CASE(SimdLibDouble_store_aligned)
273{
274 double val = 4.0;
275 vec_t avec(val);
276 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
277 {}}; // double brace to deal with gcc 4.8.5 ...
278 avec.store(ascalararr.data(), is_aligned);
279
280 for (size_t i = 0; i < vec_t::width; ++i)
281 {
282 BOOST_CHECK_EQUAL(ascalararr[i], val);
283 }
284}
285
286BOOST_AUTO_TEST_CASE(SimdLibDouble_store_unaligned)
287{
288 double val = 4.0;
289 vec_t avec(val);
290 std::array<double, vec_t::width> ascalararr{
291 {}}; // double brace to deal with gcc 4.8.5 ...
292 avec.store(ascalararr.data(), is_not_aligned);
293
294 for (size_t i = 0; i < vec_t::width; ++i)
295 {
296 BOOST_CHECK_EQUAL(ascalararr[i], val);
297 }
298}
299
300BOOST_AUTO_TEST_CASE(SimdLibDouble_store_non_temporal)
301{
302 double val = 4.0;
303 vec_t avec(val);
304 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
305 {}}; // double brace to deal with gcc 4.8.5 ...
306 avec.store(ascalararr.data(), is_not_reused);
307
308 for (size_t i = 0; i < vec_t::width; ++i)
309 {
310 BOOST_CHECK_EQUAL(ascalararr[i], val);
311 }
312}
313
314BOOST_AUTO_TEST_CASE(SimdLibDouble_broadcast)
315{
316 vec_t::scalarType ascalar{3.333};
317 vec_t avec;
318 avec.broadcast(ascalar);
319}
320
321BOOST_AUTO_TEST_CASE(SimdLibDouble_subscript_assign_read)
322{
323 vec_t avec;
324 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
325 {}}; // double brace to deal with gcc 4.8.5 ...
326
327 for (size_t i = 0; i < vec_t::width; ++i)
328 {
329 ascalararr[i] = i;
330 }
331
332 for (size_t i = 0; i < vec_t::width; ++i)
333 {
334 avec[i] = ascalararr[i];
335 }
336
337 for (size_t i = 0; i < vec_t::width; ++i)
338 {
339 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
340 }
341}
342
343BOOST_AUTO_TEST_CASE(SimdLibDouble_gather64)
344{
345 vec_t avec;
346 using index_t = simd<size_t>;
347 index_t aindexvec;
348
349 // create and fill index
350 std::array<size_t, vec_t::width> aindex;
351 aindex[0] = 0;
352 if (vec_t::width > 2)
353 {
354 aindex[1] = 3;
355 aindex[2] = 5;
356 aindex[3] = 6;
357 }
358 if (vec_t::width > 4)
359 {
360 aindex[4] = 8;
361 aindex[5] = 15;
362 aindex[6] = 16;
363 aindex[7] = 20;
364 }
365
366 // load index
367 aindexvec.load(aindex.data(), is_not_aligned);
368
369 // create and fill scalar array
370 constexpr size_t scalarArraySize = 32;
371 std::array<double, scalarArraySize> ascalararr;
372 for (size_t i = 0; i < scalarArraySize; ++i)
373 {
374 ascalararr[i] = i;
375 }
376
377 avec.gather(ascalararr.data(), aindexvec);
378
379 // check
380 for (size_t i = 0; i < vec_t::width; ++i)
381 {
382 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
383 }
384}
385
386BOOST_AUTO_TEST_CASE(SimdLibDouble_gather32)
387{
388 vec_t avec;
389 using index_t = simd<std::uint32_t, vec_t::width>;
390 index_t aindexvec;
391
392 // create and fill index
393 std::array<std::uint32_t, vec_t::width> aindex;
394 aindex[0] = 0;
395 if (vec_t::width > 2)
396 {
397 aindex[1] = 3;
398 aindex[2] = 5;
399 aindex[3] = 6;
400 }
401 if (vec_t::width > 4)
402 {
403 aindex[4] = 8;
404 aindex[5] = 15;
405 aindex[6] = 16;
406 aindex[7] = 20;
407 }
408
409 // load index
410 aindexvec.load(aindex.data(), is_not_aligned);
411
412 // create and fill scalar array
413 constexpr size_t scalarArraySize = 32;
414 std::array<double, scalarArraySize> ascalararr;
415 for (size_t i = 0; i < scalarArraySize; ++i)
416 {
417 ascalararr[i] = i;
418 }
419
420 avec.gather(ascalararr.data(), aindexvec);
421
422 // check
423 for (size_t i = 0; i < vec_t::width; ++i)
424 {
425 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
426 }
427}
428
429BOOST_AUTO_TEST_CASE(SimdLibDouble_scatter64)
430{
431 vec_t avec;
432 using index_t = simd<size_t>;
433 index_t aindexvec;
434
435 // create and fill index
436 std::array<size_t, vec_t::width> aindex;
437 aindex[0] = 1;
438 if (vec_t::width > 1)
439 {
440 aindex[1] = 3;
441 }
442 if (vec_t::width > 2)
443 {
444 aindex[2] = 5;
445 aindex[3] = 6;
446 }
447 if (vec_t::width > 4)
448 {
449 aindex[4] = 8;
450 aindex[5] = 15;
451 aindex[6] = 20;
452 aindex[7] = 30;
453 }
454
455 // load index
456 aindexvec.load(aindex.data(), is_not_aligned);
457
458 // create scalar array
459 constexpr size_t scalarArraySize = 32;
460 std::array<double, scalarArraySize> ascalararr;
461
462 // fill vector
463 alignas(vec_t::alignment) std::array<double, vec_t::width> avecarr{{}};
464 avecarr[0] = 10;
465 if (vec_t::width > 1)
466 {
467 avecarr[1] = 9;
468 }
469
470 if (vec_t::width > 2)
471 {
472 avecarr[2] = 8;
473 avecarr[3] = 7;
474 }
475 if (vec_t::width > 4)
476 {
477 avecarr[4] = 4;
478 avecarr[5] = 3;
479 avecarr[6] = 2;
480 avecarr[7] = 1;
481 }
482 avec.load(avecarr.data());
483
484 avec.scatter(ascalararr.data(), aindexvec);
485
486 // check
487 for (size_t i = 0; i < vec_t::width; ++i)
488 {
489 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
490 }
491}
492
493BOOST_AUTO_TEST_CASE(SimdLibDouble_add_unary)
494{
495 double val1 = -4.0;
496 double val2 = 2.0;
497 vec_t res(val1);
498 vec_t avec(val2);
499 res += avec;
500 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
501 {}}; // double brace to deal with gcc 4.8.5 ...
502 res.store(ascalararr.data());
503
504 for (size_t i = 0; i < vec_t::width; ++i)
505 {
506 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
507 }
508}
509
510BOOST_AUTO_TEST_CASE(SimdLibDouble_sub_unary)
511{
512 double val1 = -4.0;
513 double val2 = 2.0;
514 vec_t res(val1);
515 vec_t avec(val2);
516 res -= avec;
517 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
518 {}}; // double brace to deal with gcc 4.8.5 ...
519 res.store(ascalararr.data());
520
521 for (size_t i = 0; i < vec_t::width; ++i)
522 {
523 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
524 }
525}
526
527BOOST_AUTO_TEST_CASE(SimdLibDouble_mul_unary)
528{
529 double val1 = -4.0;
530 double val2 = 2.0;
531 vec_t res(val1);
532 vec_t avec(val2);
533 res *= avec;
534 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
535 {}}; // double brace to deal with gcc 4.8.5 ...
536 res.store(ascalararr.data());
537
538 for (size_t i = 0; i < vec_t::width; ++i)
539 {
540 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
541 }
542}
543
544BOOST_AUTO_TEST_CASE(SimdLibDouble_div_unary)
545{
546 double val1 = -4.0;
547 double val2 = 2.0;
548 vec_t res(val1);
549 vec_t avec(val2);
550 res /= avec;
551 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
552 {}}; // double brace to deal with gcc 4.8.5 ...
553 res.store(ascalararr.data());
554
555 for (size_t i = 0; i < vec_t::width; ++i)
556 {
557 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
558 }
559}
560
561BOOST_AUTO_TEST_CASE(SimdLibDouble_add_binary)
562{
563 double val1 = -4.0;
564 double val2 = 2.5;
565 vec_t avec1(val1);
566 vec_t avec2(val2);
567 vec_t res = avec1 + avec2;
568 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
569 {}}; // double brace to deal with gcc 4.8.5 ...
570 res.store(ascalararr.data());
571
572 for (size_t i = 0; i < vec_t::width; ++i)
573 {
574 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
575 }
576}
577
578BOOST_AUTO_TEST_CASE(SimdLibDouble_sub_binary)
579{
580 double val1 = -4.0;
581 double val2 = 2.5;
582 vec_t avec1(val1);
583 vec_t avec2(val2);
584 vec_t res = avec1 - avec2;
585 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
586 {}}; // double brace to deal with gcc 4.8.5 ...
587 res.store(ascalararr.data());
588
589 for (size_t i = 0; i < vec_t::width; ++i)
590 {
591 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
592 }
593}
594
595BOOST_AUTO_TEST_CASE(SimdLibDouble_mul_binary)
596{
597 double val1 = -4.0;
598 double val2 = 2.5;
599 vec_t avec1(val1);
600 vec_t avec2(val2);
601 vec_t res = avec1 * avec2;
602 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
603 {}}; // double brace to deal with gcc 4.8.5 ...
604 res.store(ascalararr.data());
605
606 for (size_t i = 0; i < vec_t::width; ++i)
607 {
608 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
609 }
610}
611
612BOOST_AUTO_TEST_CASE(SimdLibDouble_div_binary)
613{
614 double val1 = -4.0;
615 double val2 = 2.5;
616 vec_t avec1(val1);
617 vec_t avec2(val2);
618 vec_t res = avec1 / avec2;
619 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
620 {}}; // double brace to deal with gcc 4.8.5 ...
621 res.store(ascalararr.data());
622
623 for (size_t i = 0; i < vec_t::width; ++i)
624 {
625 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
626 }
627}
628
629BOOST_AUTO_TEST_CASE(SimdLibDouble_add_mul)
630{
631 double val1 = -4.0;
632 double val2 = 1.5;
633 double val3 = 5.0;
634 vec_t avec1(val1);
635 vec_t avec2(val2);
636 vec_t avec3(val3);
637 vec_t res = avec1 + avec2 * avec3;
638 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
639 {}}; // double brace to deal with gcc 4.8.5 ...
640 res.store(ascalararr.data());
641
642 for (size_t i = 0; i < vec_t::width; ++i)
643 {
644 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
645 }
646}
647
648BOOST_AUTO_TEST_CASE(SimdLibDouble_fused_add_mul)
649{
650 double val1 = -4.0;
651 double val2 = 1.5;
652 double val3 = 5.0;
653 vec_t avec1(val1);
654 vec_t avec2(val2);
655 vec_t avec3(val3);
656 avec1.fma(avec2, avec3);
657 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
658 {}}; // double brace to deal with gcc 4.8.5 ...
659 avec1.store(ascalararr.data());
660
661 for (size_t i = 0; i < vec_t::width; ++i)
662 {
663 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
664 }
665}
666
667BOOST_AUTO_TEST_CASE(SimdLibDouble_sqrt)
668{
669 double val = 4.0;
670 vec_t avec(val);
671 vec_t asqrt = sqrt(avec);
672 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
673 {}}; // double brace to deal with gcc 4.8.5 ...
674 asqrt.store(ascalararr.data());
675
676 for (size_t i = 0; i < vec_t::width; ++i)
677 {
678 BOOST_CHECK_EQUAL(ascalararr[i], std::sqrt(val));
679 }
680}
681
682BOOST_AUTO_TEST_CASE(SimdLibDouble_abs)
683{
684 double val = -4.0;
685 vec_t avec(val);
686 vec_t aabs = abs(avec);
687 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
688 {}}; // double brace to deal with gcc 4.8.5 ...
689 aabs.store(ascalararr.data());
690
691 for (size_t i = 0; i < vec_t::width; ++i)
692 {
693 BOOST_CHECK_EQUAL(ascalararr[i], std::abs(val));
694 }
695}
696
697BOOST_AUTO_TEST_CASE(SimdLibDouble_log)
698{
699 double val = 4.0;
700 vec_t avec(val);
701 vec_t alog = log(avec);
702 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
703 {}}; // double brace to deal with gcc 4.8.5 ...
704 alog.store(ascalararr.data());
705
706 for (size_t i = 0; i < vec_t::width; ++i)
707 {
708 BOOST_CHECK_EQUAL(ascalararr[i], std::log(val));
709 }
710}
711
712BOOST_AUTO_TEST_CASE(SimdLibDouble_greater)
713{
714 double aval = 4.0;
715 vec_t avec(aval);
716 using mask_t = simd<bool, vec_t::width>;
717 mask_t amask;
718
719 amask = avec > avec;
720 // check
721 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
722 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
723 amask.store(ascalararr.data());
724 for (size_t i = 0; i < vec_t::width; ++i)
725 {
726 // type conversion make lvalue in rvalue, needed pre-c++17
727 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
728 }
729
730 double bval = 3.0;
731 vec_t bvec(bval);
732
733 amask = avec > bvec;
734 // check
735 amask.store(ascalararr.data());
736 for (size_t i = 0; i < vec_t::width; ++i)
737 {
738 // type conversion make lvalue in rvalue, needed pre-c++17
739 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::true_v);
740 }
741
742 double cval = 5.0;
743 vec_t cvec(cval);
744
745 amask = avec > cvec;
746 // check
747 amask.store(ascalararr.data());
748 for (size_t i = 0; i < vec_t::width; ++i)
749 {
750 // type conversion make lvalue in rvalue, needed pre-c++17
751 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
752 }
753
754 if (vec_t::width == 4)
755 {
756 alignas(vec_t::alignment) std::array<double, 4> ascalararr2{
757 {1.0, 2.0, 3.0, 4.0}}; // double brace to deal with gcc 4.8.5 ...
758 double dval = 2.0;
759 vec_t dvec(dval);
760 vec_t evec;
761 evec.load(ascalararr2.data());
762
763 amask = dvec > evec;
764 // check
765 for (size_t i = 0; i < vec_t::width; ++i)
766 {
767 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]),
768 dval > ascalararr2[i]);
769 }
770 }
771
772 if (vec_t::width == 8)
773 {
774 alignas(vec_t::alignment) std::array<double, 8> ascalararr2{
775 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
776 1.0}}; // double brace to deal with gcc 4.8.5 ...
777 double dval = 2.0;
778 vec_t dvec(dval);
779 vec_t evec;
780 evec.load(ascalararr2.data());
781
782 amask = dvec > evec;
783 // check
784 for (size_t i = 0; i < vec_t::width; ++i)
785 {
786 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]),
787 dval > ascalararr2[i]);
788 }
789 }
790}
791
792BOOST_AUTO_TEST_CASE(SimdLibDouble_logic_and)
793{
794 double aval = 4.0;
795 vec_t avec(aval);
796 using mask_t = simd<bool, vec_t::width>;
797 mask_t amask;
798
799 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
800 ascalararr{{}}; // double brace to deal with gcc 4.8.5 ...
801 for (size_t i = 0; i < vec_t::width; ++i)
802 {
803 ascalararr[i] = mask_t::true_v;
804 }
805 amask.load(ascalararr.data());
806
807 // check
808 BOOST_CHECK_EQUAL(amask && false, false);
809 BOOST_CHECK_EQUAL(amask && true, true);
810
811 for (size_t i = 0; i < vec_t::width; ++i)
812 {
813 ascalararr[i] = mask_t::false_v;
814 }
815 amask.load(ascalararr.data());
816
817 // check
818 BOOST_CHECK_EQUAL(amask && false, false);
819 BOOST_CHECK_EQUAL(amask && true, false);
820
821 if (vec_t::width > 1)
822 {
823 ascalararr[0] = mask_t::true_v;
824 // check
825 BOOST_CHECK_EQUAL(amask && false, false);
826 BOOST_CHECK_EQUAL(amask && true, false);
827 }
828}
829
830BOOST_AUTO_TEST_CASE(SimdLibDouble_load_interleave_unload)
831{
832 constexpr size_t nDof{5};
833 // no padding in load_interleave deinterleave_store
834 constexpr size_t nEle{vec_t::width * 5};
835 constexpr size_t nDofBlock = nDof * vec_t::width;
836
837 constexpr size_t size{nDof * nEle};
838 std::array<double, size> dofScalarArr{{}};
839 for (size_t i = 0; i < size; ++i)
840 {
841 dofScalarArr[i] = i;
842 }
843
844 // number of blocks
845 size_t nBlock = nEle / vec_t::width;
846
847 // aligned vector
848 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
849
850 double *dataPtr = dofScalarArr.data();
851 // loop over blocks vec_t::width elements at the time
852 for (size_t b = 0; b < nBlock; ++b)
853 {
854 // load
855 load_interleave(dataPtr, nDof, dofVectorArr);
856
857 // manipulate each block
858 for (size_t j = 0; j < nDof; ++j)
859 {
860 dofVectorArr[j] = dofVectorArr[j] + j;
861 }
862
863 // store
864 deinterleave_store(dofVectorArr, nDof, dataPtr);
865 dataPtr += nDofBlock;
866 }
867
868 // check
869 for (size_t b = 0, i = 0; b < nBlock; ++b)
870 {
871 for (size_t j = 0; j < nDof; ++j, ++i)
872 {
873 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
874 }
875 }
876}
877
878BOOST_AUTO_TEST_CASE(SimdLibDouble_longsimd_width)
879{
880 BOOST_CHECK_EQUAL(default_long_vec_t::width,
881 vec_t::width * tinysimd::PACKMULTIPLIER);
882
883 BOOST_CHECK_EQUAL(long_vec_t::width, 3 * vec_t::width);
884 BOOST_CHECK_EQUAL(long_vec_t::alignment, vec_t::alignment);
885 BOOST_CHECK_EQUAL(sizeof(long_vec_t), sizeof(double) * long_vec_t::width);
886 BOOST_CHECK_EQUAL(is_vector_floating_point_v<long_vec_t>, true);
887}
888
889BOOST_AUTO_TEST_CASE(SimdLibDouble_longsimd_mixed_arithmetic)
890{
891 alignas(long_vec_t::alignment) std::array<double, long_vec_t::width>
892 lhsData{{}};
893 alignas(vec_t::alignment) std::array<double, vec_t::width> rhsData{{}};
894
895 for (size_t i = 0; i < lhsData.size(); ++i)
896 {
897 lhsData[i] = static_cast<double>(i + 1);
898 }
899 for (size_t i = 0; i < rhsData.size(); ++i)
900 {
901 rhsData[i] = static_cast<double>(10 * (i + 1));
902 }
903
904 long_vec_t lhs;
905 lhs.load(lhsData.data(), is_aligned);
906
907 vec_t rhs;
908 rhs.load(rhsData.data(), is_aligned);
909
910 auto sum = lhs + rhs;
911 auto reverseSum = rhs + lhs;
912 auto scalarRight = lhs + 2.5;
913 auto scalarLeft = 100.0 - lhs;
914 auto scaled = lhs * 2.0;
915
916 long_vec_t acc(1.0);
917 acc.fma(lhs, rhs);
918
919 alignas(long_vec_t::alignment) std::array<double, long_vec_t::width> tmp{
920 {}};
921 sum.store(tmp.data(), is_aligned);
922 for (size_t i = 0; i < tmp.size(); ++i)
923 {
924 const auto chunkLane = i % vec_t::width;
925 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] + rhsData[chunkLane]);
926 }
927
928 reverseSum.store(tmp.data(), is_aligned);
929 for (size_t i = 0; i < tmp.size(); ++i)
930 {
931 const auto chunkLane = i % vec_t::width;
932 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] + rhsData[chunkLane]);
933 }
934
935 scalarRight.store(tmp.data(), is_aligned);
936 for (size_t i = 0; i < tmp.size(); ++i)
937 {
938 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] + 2.5);
939 }
940
941 scalarLeft.store(tmp.data(), is_aligned);
942 for (size_t i = 0; i < tmp.size(); ++i)
943 {
944 BOOST_CHECK_EQUAL(tmp[i], 100.0 - lhsData[i]);
945 }
946
947 scaled.store(tmp.data(), is_aligned);
948 for (size_t i = 0; i < tmp.size(); ++i)
949 {
950 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] * 2.0);
951 }
952
953 acc.store(tmp.data(), is_aligned);
954 for (size_t i = 0; i < tmp.size(); ++i)
955 {
956 const auto chunkLane = i % vec_t::width;
957 BOOST_CHECK_EQUAL(tmp[i], 1.0 + lhsData[i] * rhsData[chunkLane]);
958 }
959}
960
961BOOST_AUTO_TEST_CASE(SimdLibDouble_longsimd_interleave_helpers)
962{
963 {
964 constexpr size_t nDof = 5;
965 constexpr size_t nBlocks = 3;
966 constexpr size_t nEle = long_vec_t::width * nBlocks;
967 constexpr size_t blockSize = nDof * long_vec_t::width;
968 constexpr size_t size = nDof * nEle;
969
970 std::array<double, size> data{{}};
971 for (size_t i = 0; i < size; ++i)
972 {
973 data[i] = static_cast<double>(i);
974 }
975
976 std::vector<long_vec_t, allocator<long_vec_t>> work(nDof);
977
978 for (size_t block = 0; block < nBlocks; ++block)
979 {
980 double *dataPtr = data.data() + block * blockSize;
981 load_interleave(dataPtr, nDof, work);
982 for (size_t j = 0; j < nDof; ++j)
983 {
984 work[j] += static_cast<double>(j + 1);
985 }
986 deinterleave_store(work, nDof, dataPtr);
987 }
988
989 for (size_t block = 0; block < nBlocks; ++block)
990 {
991 for (size_t lane = 0; lane < long_vec_t::width; ++lane)
992 {
993 for (size_t dof = 0; dof < nDof; ++dof)
994 {
995 const auto idx = block * blockSize + lane * nDof + dof;
996 BOOST_CHECK_EQUAL(data[idx],
997 static_cast<double>(idx + dof + 1));
998 }
999 }
1000 }
1001 }
1002
1003 {
1004 constexpr size_t nDof = 4;
1005 constexpr size_t skipPads = long_vec_t::width > 1 ? 1 : 0;
1006 constexpr size_t activeWidth = long_vec_t::width - skipPads;
1007 constexpr size_t unalignedSize = nDof * long_vec_t::width;
1008 constexpr size_t paddedSize = nDof * activeWidth;
1009
1010 std::array<double, unalignedSize> data{{}};
1011 for (size_t i = 0; i < data.size(); ++i)
1012 {
1013 data[i] = static_cast<double>(100 + i);
1014 }
1015
1016 std::vector<long_vec_t, allocator<long_vec_t>> work(nDof);
1017 load_unalign_interleave(data.data(), nDof, work);
1018 for (size_t j = 0; j < nDof; ++j)
1019 {
1020 work[j] += static_cast<double>(2 * j + 1);
1021 }
1022 std::array<double, unalignedSize> out{{}};
1023 deinterleave_unalign_store(work, nDof, out.data());
1024
1025 for (size_t lane = 0; lane < long_vec_t::width; ++lane)
1026 {
1027 for (size_t dof = 0; dof < nDof; ++dof)
1028 {
1029 const auto idx = lane * nDof + dof;
1030 BOOST_CHECK_EQUAL(out[idx],
1031 data[idx] + static_cast<double>(2 * dof + 1));
1032 }
1033 }
1034
1035 std::array<double, paddedSize> padded{{}};
1036 for (size_t i = 0; i < padded.size(); ++i)
1037 {
1038 padded[i] = static_cast<double>(200 + i);
1039 }
1040
1041 load_unalign_interleave_skipPads(padded.data(), nDof, skipPads, work);
1042 for (auto &value : work)
1043 {
1044 value += 3.0;
1045 }
1046 std::array<double, paddedSize> paddedOut{{}};
1047 deinterleave_unalign_store_skipPads(work, nDof, skipPads,
1048 paddedOut.data());
1049
1050 for (size_t lane = 0; lane < activeWidth; ++lane)
1051 {
1052 for (size_t dof = 0; dof < nDof; ++dof)
1053 {
1054 const auto idx = lane * nDof + dof;
1055 BOOST_CHECK_EQUAL(paddedOut[idx], padded[idx] + 3.0);
1056 }
1057 }
1058 }
1059}
1060
1061BOOST_AUTO_TEST_CASE(SimdLibDouble_io)
1062{
1063 vec_t avec(3.14);
1064 std::cout << avec << std::endl;
1065}
1066
1067} // namespace Nektar::SimdLibTests
#define ALIGNMENT
#define NUM_LANES_64BITS
The above copyright notice and this permission notice shall be included.
longsimd< double, 3 *vec_t::width > long_vec_t
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
longsimd< double > default_long_vec_t
void load_interleave(const T *in, const size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
Definition scalar.hpp:338
constexpr bool is_vector_floating_point_v
Definition traits.hpp:173
scalarT< T > abs(scalarT< T > in)
Definition scalar.hpp:295
static constexpr unsigned int PACKMULTIPLIER
Definition tinysimd.hpp:99
void deinterleave_unalign_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, const size_t dataLen, T *out)
Definition scalar.hpp:348
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition tinysimd.hpp:132
scalarT< T > log(scalarT< T > in)
Definition scalar.hpp:310
void load_unalign_interleave(const T *in, const size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
Definition scalar.hpp:316
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, const size_t dataLen, T *out)
Definition scalar.hpp:370
void deinterleave_unalign_store_skipPads(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, const size_t dataLen, const size_t skipPads, T *out)
Definition scalar.hpp:359
scalarT< T > sqrt(scalarT< T > in)
Definition scalar.hpp:290
void load_unalign_interleave_skipPads(const T *in, const size_t dataLen, const size_t skipPads, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
Definition scalar.hpp:327
static constexpr struct tinysimd::is_not_reused_t is_not_reused
typename details::longsimd_selector< ScalarType, width, abi >::type longsimd
Definition tinysimd.hpp:485