Nektar++
TestSimdLibSingle.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: TestSimdLibSingle.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20/// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description:
32//
33///////////////////////////////////////////////////////////////////////////////
34
37
38#include <boost/core/ignore_unused.hpp>
39#include <boost/test/unit_test.hpp>
40
41#include <array>
42#include <cmath>
43#include <iostream>
44
45// define types in use and number of lanes
46#define NUM_LANES_32BITS 1
47#define ALIGNMENT 4
48#define USING_SCALAR
49#if defined(__x86_64__)
50#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
51#define USING_AVX512
52#undef NUM_LANES_32BITS
53#define NUM_LANES_32BITS 16
54#undef ALIGNMENT
55#define ALIGNMENT 64
56#undef USING_SCALAR
57#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
58#define USING_AVX2
59#undef NUM_LANES_32BITS
60#define NUM_LANES_32BITS 8
61#undef ALIGNMENT
62#define ALIGNMENT 32
63#undef USING_SCALAR
64#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
65#define USING_SSE2
66#undef NUM_LANES_32BITS
67#define NUM_LANES_32BITS 4
68#undef ALIGNMENT
69#define ALIGNMENT 64
70#undef USING_SCALAR
71#endif
72#endif
73#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
74#define USING_SVE
75#undef NUM_LANES_32BITS
76#define NUM_LANES_32BITS __ARM_FEATURE_SVE_BITS / 32
77#undef ALIGNMENT
78#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 4
79#undef USING_SCALAR
80#endif
81
82namespace Nektar
83{
84namespace SimdLibTests
85{
86using namespace tinysimd;
87using vec_t = simd<float>;
88
89BOOST_AUTO_TEST_CASE(SimdLibSingle_width_alignment)
90{
91 std::size_t width, alignment;
92
93#if defined(USING_SCALAR)
94 std::cout << "scalar float" << std::endl;
95#endif
96#if defined(USING_AVX2)
97 std::cout << "avx2 float" << std::endl;
98#endif
99#if defined(USING_AVX512)
100 std::cout << "avx512 float" << std::endl;
101#endif
102#if defined(USING_SVE)
103 std::cout << "sve float" << std::endl;
104#endif
105
106 // float
107 width = simd<float>::width;
108 alignment = simd<float>::alignment;
109 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
110 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
111 // std::int32_t index forcing # of lanes
114 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
115 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
116 // std::int32_t default index
119 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
120 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
121}
122
123BOOST_AUTO_TEST_CASE(SimdLibFloat_type_traits)
124{
125 using namespace details;
126
127 BOOST_CHECK_EQUAL(has_width<float>::value, false);
128 BOOST_CHECK_EQUAL(has_width<vec_t>::value, true);
129
130 BOOST_CHECK_EQUAL(has_alignment<float>::value, false);
131 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value, true);
132
133 BOOST_CHECK_EQUAL(has_scalarType<float>::value, false);
134 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value, true);
135
136 BOOST_CHECK_EQUAL(is_vector<float>::value, false);
137 BOOST_CHECK_EQUAL(is_vector<vec_t>::value, true);
138
139 BOOST_CHECK_EQUAL(is_vector_floating_point<float>::value, false);
140 BOOST_CHECK_EQUAL(is_vector_floating_point<simd<int>>::value, false);
141 BOOST_CHECK_EQUAL(is_vector_floating_point<vec_t>::value, true);
142}
143
144BOOST_AUTO_TEST_CASE(SimdLibFloat_mem_size)
145{
146 BOOST_CHECK_EQUAL(sizeof(vec_t), sizeof(float) * vec_t::width);
147}
148
149BOOST_AUTO_TEST_CASE(SimdLibFloat_ctors)
150{
151 vec_t avec1;
152
153 vec_t::scalarType ascalar = 0;
154 vec_t avec2(ascalar);
155 vec_t avec3{ascalar};
156 vec_t avec4 = ascalar;
157
158 vec_t avec5(avec2);
159 vec_t avec6{avec4};
160
161 vec_t avec7(avec2._data);
162 vec_t avec8{avec2._data};
163
164 vec_t::vectorType anative;
165 vec_t avec9(anative);
166 vec_t avec10{anative};
167
168 boost::ignore_unused(avec1, avec3, avec5, avec6, avec7, avec8, avec9,
169 avec10);
170}
171
172BOOST_AUTO_TEST_CASE(SimdLibFloat_load)
173{
174 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
175 {}}; // double brace to deal with gcc 4.8.5 ...
176 vec_t avec;
177 avec.load(ascalararr.data());
178}
179
180BOOST_AUTO_TEST_CASE(SimdLibFloat_load_implicit)
181{
182 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
183 {}}; // double brace to deal with gcc 4.8.5 ...
184 vec_t avec;
185 avec = *(reinterpret_cast<vec_t *>(ascalararr.data()));
186}
187
188BOOST_AUTO_TEST_CASE(SimdLibFloat_load_aligned)
189{
190 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
191 {}}; // double brace to deal with gcc 4.8.5 ...
192 vec_t avec;
193 avec.load(ascalararr.data(), is_aligned);
194}
195
196BOOST_AUTO_TEST_CASE(SimdLibFloat_load_unaligned)
197{
198 std::array<float, vec_t::width> ascalararr{
199 {}}; // double brace to deal with gcc 4.8.5 ...
200 vec_t avec;
201 avec.load(ascalararr.data(), is_not_aligned);
202}
203
204BOOST_AUTO_TEST_CASE(SimdLibFloat_store)
205{
206 float val = 4.0;
207 vec_t avec(val);
208 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
209 {}}; // float brace to deal with gcc 4.8.5 ...
210 avec.store(ascalararr.data());
211
212 for (size_t i = 0; i < vec_t::width; ++i)
213 {
214 BOOST_CHECK_EQUAL(ascalararr[i], val);
215 }
216}
217
218BOOST_AUTO_TEST_CASE(SimdLibFloat_store_aligned)
219{
220 float val = 4.0;
221 vec_t avec(val);
222 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
223 {}}; // float brace to deal with gcc 4.8.5 ...
224 avec.store(ascalararr.data(), is_aligned);
225
226 for (size_t i = 0; i < vec_t::width; ++i)
227 {
228 BOOST_CHECK_EQUAL(ascalararr[i], val);
229 }
230}
231
232BOOST_AUTO_TEST_CASE(SimdLibFloat_store_unaligned)
233{
234 float val = 4.0;
235 vec_t avec(val);
236 std::array<float, vec_t::width> ascalararr{
237 {}}; // float brace to deal with gcc 4.8.5 ...
238 avec.store(ascalararr.data(), is_not_aligned);
239
240 for (size_t i = 0; i < vec_t::width; ++i)
241 {
242 BOOST_CHECK_EQUAL(ascalararr[i], val);
243 }
244}
245
246BOOST_AUTO_TEST_CASE(SimdLibFloat_store_non_temporal)
247{
248 float val = 4.0;
249 vec_t avec(val);
250 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
251 {}}; // float brace to deal with gcc 4.8.5 ...
252 avec.store(ascalararr.data(), is_not_reused);
253
254 for (size_t i = 0; i < vec_t::width; ++i)
255 {
256 BOOST_CHECK_EQUAL(ascalararr[i], val);
257 }
258}
259
260BOOST_AUTO_TEST_CASE(SimdLibSingle_broadcast)
261{
262 // explictly float literal for
263 // Visual Studio 14 2015 Win64 compiler bug
264 vec_t::scalarType ascalar{3.333f};
265 vec_t avec;
266 avec.broadcast(ascalar);
267}
268
269BOOST_AUTO_TEST_CASE(SimdLibSingle_subscript_assign_read)
270{
271 vec_t avec;
272 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
273 {}}; // double brace to deal with gcc 4.8.5 ...
274
275 for (size_t i = 0; i < vec_t::width; ++i)
276 {
277 ascalararr[i] = i;
278 }
279
280 for (size_t i = 0; i < vec_t::width; ++i)
281 {
282 avec[i] = ascalararr[i];
283 }
284
285 for (size_t i = 0; i < vec_t::width; ++i)
286 {
287 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
288 }
289}
290
291BOOST_AUTO_TEST_CASE(SimdLibFloat_gather32)
292{
293 vec_t avec;
294 using index_t = simd<std::uint32_t>;
295 index_t aindexvec;
296
297 // create and fill index
298 std::array<std::uint32_t, vec_t::width> aindex;
299 aindex[0] = 0;
300 if (vec_t::width > 2)
301 {
302 aindex[1] = 3;
303 aindex[2] = 5;
304 aindex[3] = 6;
305 }
306 if (vec_t::width > 4)
307 {
308 aindex[4] = 8;
309 aindex[5] = 15;
310 aindex[6] = 20;
311 aindex[7] = 23;
312 }
313 if (vec_t::width > 8)
314 {
315 aindex[8] = 24;
316 aindex[9] = 28;
317 aindex[10] = 33;
318 aindex[11] = 40;
319 aindex[12] = 41;
320 aindex[13] = 45;
321 aindex[14] = 60;
322 aindex[15] = 61;
323 }
324
325 // load index
326 aindexvec.load(aindex.data(), is_not_aligned);
327
328 // create and fill scalar array
329 constexpr size_t scalarArraySize = 64;
330 std::array<float, scalarArraySize> ascalararr;
331 for (size_t i = 0; i < scalarArraySize; ++i)
332 {
333 ascalararr[i] = i;
334 }
335
336 avec.gather(ascalararr.data(), aindexvec);
337
338 // check
339 for (size_t i = 0; i < vec_t::width; ++i)
340 {
341 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
342 }
343}
344
345BOOST_AUTO_TEST_CASE(SimdLibFloat_scatter32)
346{
347 vec_t avec;
348 using index_t = simd<std::uint32_t>;
349 index_t aindexvec;
350
351 // create and fill index
352 std::array<std::uint32_t, vec_t::width> aindex;
353 aindex[0] = 1;
354 if (vec_t::width > 1)
355 {
356 aindex[1] = 3;
357 }
358 if (vec_t::width > 2)
359 {
360 aindex[2] = 5;
361 aindex[3] = 6;
362 }
363 if (vec_t::width > 4)
364 {
365 aindex[4] = 8;
366 aindex[5] = 15;
367 aindex[6] = 20;
368 aindex[7] = 30;
369 }
370 if (vec_t::width > 8)
371 {
372 aindex[8] = 31;
373 aindex[9] = 32;
374 aindex[10] = 35;
375 aindex[11] = 40;
376 aindex[12] = 41;
377 aindex[13] = 45;
378 aindex[14] = 60;
379 aindex[15] = 61;
380 }
381
382 // load index
383 aindexvec.load(aindex.data(), is_not_aligned);
384
385 // create scalar array
386 constexpr size_t scalarArraySize = 64;
387 std::array<float, scalarArraySize> ascalararr;
388
389 // fill vector
390 alignas(vec_t::alignment) std::array<float, vec_t::width> avecarr{{}};
391 avecarr[0] = 10;
392 if (vec_t::width > 1)
393 {
394 avecarr[1] = 9;
395 }
396
397 if (vec_t::width > 2)
398 {
399 avec[2] = 8;
400 avec[3] = 7;
401 }
402 if (vec_t::width > 4)
403 {
404 avec[4] = 4;
405 avec[5] = 3;
406 avec[6] = 2;
407 avec[7] = 1;
408 }
409 avec.load(avecarr.data());
410
411 avec.scatter(ascalararr.data(), aindexvec);
412
413 // check
414 for (size_t i = 0; i < vec_t::width; ++i)
415 {
416 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
417 }
418}
419
420BOOST_AUTO_TEST_CASE(SimdLibFloat_add_unary)
421{
422 float val1 = -4.0;
423 float val2 = 2.0;
424 vec_t res(val1);
425 vec_t avec(val2);
426 res += avec;
427 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
428 {}}; // float brace to deal with gcc 4.8.5 ...
429 res.store(ascalararr.data());
430
431 for (size_t i = 0; i < vec_t::width; ++i)
432 {
433 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
434 }
435}
436
437BOOST_AUTO_TEST_CASE(SimdLibFloat_sub_unary)
438{
439 float val1 = -4.0;
440 float val2 = 2.0;
441 vec_t res(val1);
442 vec_t avec(val2);
443 res -= avec;
444 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
445 {}}; // float brace to deal with gcc 4.8.5 ...
446 res.store(ascalararr.data());
447
448 for (size_t i = 0; i < vec_t::width; ++i)
449 {
450 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
451 }
452}
453
454BOOST_AUTO_TEST_CASE(SimdLibFloat_mul_unary)
455{
456 float val1 = -4.0;
457 float val2 = 2.0;
458 vec_t res(val1);
459 vec_t avec(val2);
460 res *= avec;
461 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
462 {}}; // float brace to deal with gcc 4.8.5 ...
463 res.store(ascalararr.data());
464
465 for (size_t i = 0; i < vec_t::width; ++i)
466 {
467 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
468 }
469}
470
471BOOST_AUTO_TEST_CASE(SimdLibFloat_div_unary)
472{
473 float val1 = -4.0;
474 float val2 = 2.0;
475 vec_t res(val1);
476 vec_t avec(val2);
477 res /= avec;
478 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
479 {}}; // float brace to deal with gcc 4.8.5 ...
480 res.store(ascalararr.data());
481
482 for (size_t i = 0; i < vec_t::width; ++i)
483 {
484 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
485 }
486}
487
488BOOST_AUTO_TEST_CASE(SimdLibFloat_add_binary)
489{
490 float val1 = -4.0;
491 float val2 = 2.5;
492 vec_t avec1(val1);
493 vec_t avec2(val2);
494 vec_t res = avec1 + avec2;
495 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
496 {}}; // double brace to deal with gcc 4.8.5 ...
497 res.store(ascalararr.data());
498
499 for (size_t i = 0; i < vec_t::width; ++i)
500 {
501 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
502 }
503}
504
505BOOST_AUTO_TEST_CASE(SimdLibFloat_sub_binary)
506{
507 float val1 = -4.0;
508 float val2 = 2.5;
509 vec_t avec1(val1);
510 vec_t avec2(val2);
511 vec_t res = avec1 - avec2;
512 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
513 {}}; // double brace to deal with gcc 4.8.5 ...
514 res.store(ascalararr.data());
515
516 for (size_t i = 0; i < vec_t::width; ++i)
517 {
518 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
519 }
520}
521
522BOOST_AUTO_TEST_CASE(SimdLibFloat_mul_binary)
523{
524 float val1 = -4.0;
525 float val2 = 2.5;
526 vec_t avec1(val1);
527 vec_t avec2(val2);
528 vec_t res = avec1 * avec2;
529 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
530 {}}; // double brace to deal with gcc 4.8.5 ...
531 res.store(ascalararr.data());
532
533 for (size_t i = 0; i < vec_t::width; ++i)
534 {
535 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
536 }
537}
538
539BOOST_AUTO_TEST_CASE(SimdLibFloat_div_binary)
540{
541 float val1 = -4.0;
542 float val2 = 2.5;
543 vec_t avec1(val1);
544 vec_t avec2(val2);
545 vec_t res = avec1 / avec2;
546 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
547 {}}; // double brace to deal with gcc 4.8.5 ...
548 res.store(ascalararr.data());
549
550 for (size_t i = 0; i < vec_t::width; ++i)
551 {
552 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
553 }
554}
555
556BOOST_AUTO_TEST_CASE(SimdLibFloat_add_mul)
557{
558 float val1 = -4.0;
559 float val2 = 2.0;
560 float val3 = 2.0;
561 vec_t avec1(val1);
562 vec_t avec2(val2);
563 vec_t avec3(val3);
564 vec_t res = avec1 + avec2 * avec3;
565 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
566 {}}; // float brace to deal with gcc 4.8.5 ...
567 res.store(ascalararr.data());
568
569 for (size_t i = 0; i < vec_t::width; ++i)
570 {
571 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 + val3);
572 }
573}
574
575BOOST_AUTO_TEST_CASE(SimdLibFloat_fused_add_mul)
576{
577 float val1 = -4.0;
578 float val2 = 1.5;
579 float val3 = 5.0;
580 vec_t avec1(val1);
581 vec_t avec2(val2);
582 vec_t avec3(val3);
583 avec1.fma(avec2, avec3);
584 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
585 {}}; // double brace to deal with gcc 4.8.5 ...
586 avec1.store(ascalararr.data());
587
588 for (size_t i = 0; i < vec_t::width; ++i)
589 {
590 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
591 }
592}
593
594BOOST_AUTO_TEST_CASE(SimdLibFloat_sqrt)
595{
596 float val = 4.0;
597 vec_t avec(val);
598 vec_t asqrt = sqrt(avec);
599 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
600 {}}; // float brace to deal with gcc 4.8.5 ...
601 asqrt.store(ascalararr.data());
602
603 for (size_t i = 0; i < vec_t::width; ++i)
604 {
605 BOOST_CHECK_EQUAL(ascalararr[i], std::sqrt(val));
606 }
607}
608
609BOOST_AUTO_TEST_CASE(SimdLibFloat_abs)
610{
611 float val = -4.0;
612 vec_t avec(val);
613 vec_t aabs = abs(avec);
614 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
615 {}}; // float brace to deal with gcc 4.8.5 ...
616 aabs.store(ascalararr.data());
617
618 for (size_t i = 0; i < vec_t::width; ++i)
619 {
620 BOOST_CHECK_EQUAL(ascalararr[i], std::abs(val));
621 }
622}
623
624BOOST_AUTO_TEST_CASE(SimdLibFloat_log)
625{
626 float val = 4.0;
627 vec_t avec(val);
628 vec_t alog = log(avec);
629 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
630 {}}; // float brace to deal with gcc 4.8.5 ...
631 alog.store(ascalararr.data());
632
633 for (size_t i = 0; i < vec_t::width; ++i)
634 {
635 BOOST_CHECK_EQUAL(ascalararr[i], std::log(val));
636 }
637}
638
639BOOST_AUTO_TEST_CASE(SimdLibFloat_greater)
640{
641 float aval = 4.0;
642 vec_t avec(aval);
643 using mask_t = simd<bool, vec_t::width>;
644 mask_t amask;
645
646 amask = avec > avec;
647 // check
648 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
649 ascalararr{{}}; // float brace to deal with gcc 4.8.5 ...
650 amask.store(ascalararr.data());
651 for (size_t i = 0; i < vec_t::width; ++i)
652 {
653 // type conversion make lvalue in rvalue, needed pre-c++17
654 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
655 }
656
657 float bval = 3.0;
658 vec_t bvec(bval);
659
660 amask = avec > bvec;
661 // check
662 amask.store(ascalararr.data());
663 for (size_t i = 0; i < vec_t::width; ++i)
664 {
665 // type conversion make lvalue in rvalue, needed pre-c++17
666 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::true_v);
667 }
668
669 float cval = 5.0;
670 vec_t cvec(cval);
671
672 amask = avec > cvec;
673 // check
674 amask.store(ascalararr.data());
675 for (size_t i = 0; i < vec_t::width; ++i)
676 {
677 // type conversion make lvalue in rvalue, needed pre-c++17
678 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
679 }
680
681 if (vec_t::width == 4)
682 {
683 alignas(vec_t::alignment) std::array<float, 4> ascalararr2{
684 {1.0, 2.0, 3.0, 4.0}}; // float brace to deal with gcc 4.8.5 ...
685 float dval = 2.0;
686 vec_t dvec(dval);
687 vec_t evec;
688 evec.load(ascalararr2.data());
689
690 amask = dvec > evec;
691 // check
692 for (size_t i = 0; i < vec_t::width; ++i)
693 {
694 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]), dvec[i] > evec[i]);
695 }
696 }
697
698 if (vec_t::width == 8)
699 {
700 alignas(vec_t::alignment) std::array<float, 8> ascalararr2{
701 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
702 1.0}}; // double brace to deal with gcc 4.8.5 ...
703 float dval = 2.0;
704 vec_t dvec(dval);
705 vec_t evec;
706 evec.load(ascalararr2.data());
707
708 amask = dvec > evec;
709 // check
710 for (size_t i = 0; i < vec_t::width; ++i)
711 {
712 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]),
713 dval > ascalararr2[i]);
714 }
715 }
716}
717
718BOOST_AUTO_TEST_CASE(SimdLibFloat_logic_and)
719{
720 float aval = 4.0;
721 vec_t avec(aval);
722 using mask_t = simd<bool, vec_t::width>;
723 mask_t amask;
724
725 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
726 ascalararr{{}}; // float brace to deal with gcc 4.8.5 ...
727 for (size_t i = 0; i < vec_t::width; ++i)
728 {
729 ascalararr[i] = mask_t::true_v;
730 }
731 amask.load(ascalararr.data());
732
733 // check
734 BOOST_CHECK_EQUAL(amask && false, false);
735 BOOST_CHECK_EQUAL(amask && true, true);
736
737 for (size_t i = 0; i < vec_t::width; ++i)
738 {
739 ascalararr[i] = mask_t::false_v;
740 }
741 amask.load(ascalararr.data());
742
743 // check
744 BOOST_CHECK_EQUAL(amask && false, false);
745 BOOST_CHECK_EQUAL(amask && true, false);
746
747 if (vec_t::width > 1)
748 {
749 ascalararr[0] = mask_t::true_v;
750 // check
751 BOOST_CHECK_EQUAL(amask && false, false);
752 BOOST_CHECK_EQUAL(amask && true, false);
753 }
754}
755
756BOOST_AUTO_TEST_CASE(SimdLibFloat_load_interleave_unload)
757{
758 constexpr size_t nDof{5};
759 // no padding in load_interleave deinterleave_store
760 constexpr size_t nEle{vec_t::width * 5};
761 constexpr size_t nDofBlock = nDof * vec_t::width;
762
763 constexpr size_t size{nDof * nEle};
764 std::array<float, size> dofScalarArr{{}};
765 for (size_t i = 0; i < size; ++i)
766 {
767 dofScalarArr[i] = i;
768 }
769
770 // number of blocks
771 size_t nBlock = nEle / vec_t::width;
772
773 // aligned vector
774 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
775
776 float *dataPtr = dofScalarArr.data();
777 // loop over blocks vec_t::width elements at the time
778 for (size_t b = 0; b < nBlock; ++b)
779 {
780 // load
781 load_interleave(dataPtr, nDof, dofVectorArr);
782
783 // manipulate each block
784 for (size_t j = 0; j < nDof; ++j)
785 {
786 dofVectorArr[j] = dofVectorArr[j] + j;
787 }
788
789 // store
790 deinterleave_store(dofVectorArr, nDof, dataPtr);
791 dataPtr += nDofBlock;
792 }
793
794 // check
795 for (size_t b = 0, i = 0; b < nBlock; ++b)
796 {
797 for (size_t j = 0; j < nDof; ++j, ++i)
798 {
799 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
800 }
801 }
802}
803
804BOOST_AUTO_TEST_CASE(SimdLibFloat_io)
805{
806 vec_t avec(3.14);
807 std::cout << avec << std::endl;
808}
809
810} // namespace SimdLibTests
811} // namespace Nektar
#define NUM_LANES_32BITS
The above copyright notice and this permission notice shall be included.
#define ALIGNMENT
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
void load_interleave(const T *in, size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
Definition: scalar.hpp:309
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80
scalarT< T > log(scalarT< T > in)
Definition: scalar.hpp:303
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:294
static constexpr struct tinysimd::is_not_reused_t is_not_reused
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, size_t dataLen, T *out)
Definition: scalar.hpp:319