Nektar++
TestSimdLibSingle.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: TestSimdLibSingle.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20/// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description:
32//
33///////////////////////////////////////////////////////////////////////////////
34
37
38#include <boost/test/unit_test.hpp>
39
40#include <array>
41#include <cmath>
42#include <iostream>
43
44// define types in use and number of lanes
45#define NUM_LANES_32BITS 1
46#define ALIGNMENT 4
47#define USING_SCALAR
48#if defined(__x86_64__)
49#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
50#define USING_AVX512
51#undef NUM_LANES_32BITS
52#define NUM_LANES_32BITS 16
53#undef ALIGNMENT
54#define ALIGNMENT 64
55#undef USING_SCALAR
56#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
57#define USING_AVX2
58#undef NUM_LANES_32BITS
59#define NUM_LANES_32BITS 8
60#undef ALIGNMENT
61#define ALIGNMENT 32
62#undef USING_SCALAR
63#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
64#define USING_SSE2
65#undef NUM_LANES_32BITS
66#define NUM_LANES_32BITS 4
67#undef ALIGNMENT
68#define ALIGNMENT 64
69#undef USING_SCALAR
70#endif
71#endif
72#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
73#define USING_SVE
74#undef NUM_LANES_32BITS
75#define NUM_LANES_32BITS __ARM_FEATURE_SVE_BITS / 32
76#undef ALIGNMENT
77#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 4
78#undef USING_SCALAR
79#endif
80
82{
83using namespace tinysimd;
84using vec_t = simd<float>;
85
86BOOST_AUTO_TEST_CASE(SimdLibSingle_width_alignment)
87{
88 std::size_t width, alignment;
89
90#if defined(USING_SCALAR)
91 std::cout << "scalar float" << std::endl;
92#endif
93#if defined(USING_AVX2)
94 std::cout << "avx2 float" << std::endl;
95#endif
96#if defined(USING_AVX512)
97 std::cout << "avx512 float" << std::endl;
98#endif
99#if defined(USING_SVE)
100 std::cout << "sve float" << std::endl;
101#endif
102
103 // float
104 width = simd<float>::width;
105 alignment = simd<float>::alignment;
106 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
107 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
108 // std::int32_t index forcing # of lanes
111 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
112 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
113 // std::int32_t default index
116 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
117 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
118}
119
120BOOST_AUTO_TEST_CASE(SimdLibFloat_type_traits)
121{
122 using namespace details;
123
124 BOOST_CHECK_EQUAL(has_width<float>::value, false);
125 BOOST_CHECK_EQUAL(has_width<vec_t>::value, true);
126
127 BOOST_CHECK_EQUAL(has_alignment<float>::value, false);
128 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value, true);
129
130 BOOST_CHECK_EQUAL(has_scalarType<float>::value, false);
131 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value, true);
132
133 BOOST_CHECK_EQUAL(is_vector<float>::value, false);
134 BOOST_CHECK_EQUAL(is_vector<vec_t>::value, true);
135
136 BOOST_CHECK_EQUAL(is_vector_floating_point<float>::value, false);
137 BOOST_CHECK_EQUAL(is_vector_floating_point<simd<int>>::value, false);
138 BOOST_CHECK_EQUAL(is_vector_floating_point<vec_t>::value, true);
139}
140
141BOOST_AUTO_TEST_CASE(SimdLibFloat_mem_size)
142{
143 BOOST_CHECK_EQUAL(sizeof(vec_t), sizeof(float) * vec_t::width);
144}
145
146BOOST_AUTO_TEST_CASE(SimdLibFloat_ctors)
147{
148 [[maybe_unused]] vec_t avec1;
149
150 vec_t::scalarType ascalar = 0;
151 vec_t avec2(ascalar);
152 [[maybe_unused]] vec_t avec3{ascalar};
153 vec_t avec4 = ascalar;
154
155 [[maybe_unused]] vec_t avec5(avec2);
156 [[maybe_unused]] vec_t avec6{avec4};
157
158 [[maybe_unused]] vec_t avec7(avec2._data);
159 [[maybe_unused]] vec_t avec8{avec2._data};
160
161 vec_t::vectorType anative;
162 [[maybe_unused]] vec_t avec9(anative);
163 [[maybe_unused]] vec_t avec10{anative};
164}
165
166BOOST_AUTO_TEST_CASE(SimdLibFloat_load)
167{
168 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
169 {}}; // double brace to deal with gcc 4.8.5 ...
170 vec_t avec;
171 avec.load(ascalararr.data());
172}
173
174BOOST_AUTO_TEST_CASE(SimdLibFloat_load_implicit)
175{
176 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
177 {}}; // double brace to deal with gcc 4.8.5 ...
178 vec_t avec;
179 avec = *(reinterpret_cast<vec_t *>(ascalararr.data()));
180}
181
182BOOST_AUTO_TEST_CASE(SimdLibFloat_load_aligned)
183{
184 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
185 {}}; // double brace to deal with gcc 4.8.5 ...
186 vec_t avec;
187 avec.load(ascalararr.data(), is_aligned);
188}
189
190BOOST_AUTO_TEST_CASE(SimdLibFloat_load_unaligned)
191{
192 std::array<float, vec_t::width> ascalararr{
193 {}}; // double brace to deal with gcc 4.8.5 ...
194 vec_t avec;
195 avec.load(ascalararr.data(), is_not_aligned);
196}
197
198BOOST_AUTO_TEST_CASE(SimdLibFloat_store)
199{
200 float val = 4.0;
201 vec_t avec(val);
202 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
203 {}}; // float brace to deal with gcc 4.8.5 ...
204 avec.store(ascalararr.data());
205
206 for (size_t i = 0; i < vec_t::width; ++i)
207 {
208 BOOST_CHECK_EQUAL(ascalararr[i], val);
209 }
210}
211
212BOOST_AUTO_TEST_CASE(SimdLibFloat_store_aligned)
213{
214 float val = 4.0;
215 vec_t avec(val);
216 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
217 {}}; // float brace to deal with gcc 4.8.5 ...
218 avec.store(ascalararr.data(), is_aligned);
219
220 for (size_t i = 0; i < vec_t::width; ++i)
221 {
222 BOOST_CHECK_EQUAL(ascalararr[i], val);
223 }
224}
225
226BOOST_AUTO_TEST_CASE(SimdLibFloat_store_unaligned)
227{
228 float val = 4.0;
229 vec_t avec(val);
230 std::array<float, vec_t::width> ascalararr{
231 {}}; // float brace to deal with gcc 4.8.5 ...
232 avec.store(ascalararr.data(), is_not_aligned);
233
234 for (size_t i = 0; i < vec_t::width; ++i)
235 {
236 BOOST_CHECK_EQUAL(ascalararr[i], val);
237 }
238}
239
240BOOST_AUTO_TEST_CASE(SimdLibFloat_store_non_temporal)
241{
242 float val = 4.0;
243 vec_t avec(val);
244 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
245 {}}; // float brace to deal with gcc 4.8.5 ...
246 avec.store(ascalararr.data(), is_not_reused);
247
248 for (size_t i = 0; i < vec_t::width; ++i)
249 {
250 BOOST_CHECK_EQUAL(ascalararr[i], val);
251 }
252}
253
254BOOST_AUTO_TEST_CASE(SimdLibSingle_broadcast)
255{
256 // explictly float literal for
257 // Visual Studio 14 2015 Win64 compiler bug
258 vec_t::scalarType ascalar{3.333f};
259 vec_t avec;
260 avec.broadcast(ascalar);
261}
262
263BOOST_AUTO_TEST_CASE(SimdLibSingle_subscript_assign_read)
264{
265 vec_t avec;
266 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
267 {}}; // double brace to deal with gcc 4.8.5 ...
268
269 for (size_t i = 0; i < vec_t::width; ++i)
270 {
271 ascalararr[i] = i;
272 }
273
274 for (size_t i = 0; i < vec_t::width; ++i)
275 {
276 avec[i] = ascalararr[i];
277 }
278
279 for (size_t i = 0; i < vec_t::width; ++i)
280 {
281 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
282 }
283}
284
285BOOST_AUTO_TEST_CASE(SimdLibFloat_gather32)
286{
287 vec_t avec;
288 using index_t = simd<std::uint32_t>;
289 index_t aindexvec;
290
291 // create and fill index
292 std::array<std::uint32_t, vec_t::width> aindex;
293 aindex[0] = 0;
294 if (vec_t::width > 2)
295 {
296 aindex[1] = 3;
297 aindex[2] = 5;
298 aindex[3] = 6;
299 }
300 if (vec_t::width > 4)
301 {
302 aindex[4] = 8;
303 aindex[5] = 15;
304 aindex[6] = 20;
305 aindex[7] = 23;
306 }
307 if (vec_t::width > 8)
308 {
309 aindex[8] = 24;
310 aindex[9] = 28;
311 aindex[10] = 33;
312 aindex[11] = 40;
313 aindex[12] = 41;
314 aindex[13] = 45;
315 aindex[14] = 60;
316 aindex[15] = 61;
317 }
318
319 // load index
320 aindexvec.load(aindex.data(), is_not_aligned);
321
322 // create and fill scalar array
323 constexpr size_t scalarArraySize = 64;
324 std::array<float, scalarArraySize> ascalararr;
325 for (size_t i = 0; i < scalarArraySize; ++i)
326 {
327 ascalararr[i] = i;
328 }
329
330 avec.gather(ascalararr.data(), aindexvec);
331
332 // check
333 for (size_t i = 0; i < vec_t::width; ++i)
334 {
335 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
336 }
337}
338
339BOOST_AUTO_TEST_CASE(SimdLibFloat_scatter32)
340{
341 vec_t avec;
342 using index_t = simd<std::uint32_t>;
343 index_t aindexvec;
344
345 // create and fill index
346 std::array<std::uint32_t, vec_t::width> aindex;
347 aindex[0] = 1;
348 if (vec_t::width > 1)
349 {
350 aindex[1] = 3;
351 }
352 if (vec_t::width > 2)
353 {
354 aindex[2] = 5;
355 aindex[3] = 6;
356 }
357 if (vec_t::width > 4)
358 {
359 aindex[4] = 8;
360 aindex[5] = 15;
361 aindex[6] = 20;
362 aindex[7] = 30;
363 }
364 if (vec_t::width > 8)
365 {
366 aindex[8] = 31;
367 aindex[9] = 32;
368 aindex[10] = 35;
369 aindex[11] = 40;
370 aindex[12] = 41;
371 aindex[13] = 45;
372 aindex[14] = 60;
373 aindex[15] = 61;
374 }
375
376 // load index
377 aindexvec.load(aindex.data(), is_not_aligned);
378
379 // create scalar array
380 constexpr size_t scalarArraySize = 64;
381 std::array<float, scalarArraySize> ascalararr;
382
383 // fill vector
384 alignas(vec_t::alignment) std::array<float, vec_t::width> avecarr{{}};
385 avecarr[0] = 10;
386 if (vec_t::width > 1)
387 {
388 avecarr[1] = 9;
389 }
390
391 if (vec_t::width > 2)
392 {
393 avec[2] = 8;
394 avec[3] = 7;
395 }
396 if (vec_t::width > 4)
397 {
398 avec[4] = 4;
399 avec[5] = 3;
400 avec[6] = 2;
401 avec[7] = 1;
402 }
403 avec.load(avecarr.data());
404
405 avec.scatter(ascalararr.data(), aindexvec);
406
407 // check
408 for (size_t i = 0; i < vec_t::width; ++i)
409 {
410 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
411 }
412}
413
414BOOST_AUTO_TEST_CASE(SimdLibFloat_add_unary)
415{
416 float val1 = -4.0;
417 float val2 = 2.0;
418 vec_t res(val1);
419 vec_t avec(val2);
420 res += avec;
421 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
422 {}}; // float brace to deal with gcc 4.8.5 ...
423 res.store(ascalararr.data());
424
425 for (size_t i = 0; i < vec_t::width; ++i)
426 {
427 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
428 }
429}
430
431BOOST_AUTO_TEST_CASE(SimdLibFloat_sub_unary)
432{
433 float val1 = -4.0;
434 float val2 = 2.0;
435 vec_t res(val1);
436 vec_t avec(val2);
437 res -= avec;
438 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
439 {}}; // float brace to deal with gcc 4.8.5 ...
440 res.store(ascalararr.data());
441
442 for (size_t i = 0; i < vec_t::width; ++i)
443 {
444 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
445 }
446}
447
448BOOST_AUTO_TEST_CASE(SimdLibFloat_mul_unary)
449{
450 float val1 = -4.0;
451 float val2 = 2.0;
452 vec_t res(val1);
453 vec_t avec(val2);
454 res *= avec;
455 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
456 {}}; // float brace to deal with gcc 4.8.5 ...
457 res.store(ascalararr.data());
458
459 for (size_t i = 0; i < vec_t::width; ++i)
460 {
461 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
462 }
463}
464
465BOOST_AUTO_TEST_CASE(SimdLibFloat_div_unary)
466{
467 float val1 = -4.0;
468 float val2 = 2.0;
469 vec_t res(val1);
470 vec_t avec(val2);
471 res /= avec;
472 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
473 {}}; // float brace to deal with gcc 4.8.5 ...
474 res.store(ascalararr.data());
475
476 for (size_t i = 0; i < vec_t::width; ++i)
477 {
478 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
479 }
480}
481
482BOOST_AUTO_TEST_CASE(SimdLibFloat_add_binary)
483{
484 float val1 = -4.0;
485 float val2 = 2.5;
486 vec_t avec1(val1);
487 vec_t avec2(val2);
488 vec_t res = avec1 + avec2;
489 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
490 {}}; // double brace to deal with gcc 4.8.5 ...
491 res.store(ascalararr.data());
492
493 for (size_t i = 0; i < vec_t::width; ++i)
494 {
495 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
496 }
497}
498
499BOOST_AUTO_TEST_CASE(SimdLibFloat_sub_binary)
500{
501 float val1 = -4.0;
502 float val2 = 2.5;
503 vec_t avec1(val1);
504 vec_t avec2(val2);
505 vec_t res = avec1 - avec2;
506 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
507 {}}; // double brace to deal with gcc 4.8.5 ...
508 res.store(ascalararr.data());
509
510 for (size_t i = 0; i < vec_t::width; ++i)
511 {
512 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
513 }
514}
515
516BOOST_AUTO_TEST_CASE(SimdLibFloat_mul_binary)
517{
518 float val1 = -4.0;
519 float val2 = 2.5;
520 vec_t avec1(val1);
521 vec_t avec2(val2);
522 vec_t res = avec1 * avec2;
523 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
524 {}}; // double brace to deal with gcc 4.8.5 ...
525 res.store(ascalararr.data());
526
527 for (size_t i = 0; i < vec_t::width; ++i)
528 {
529 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
530 }
531}
532
533BOOST_AUTO_TEST_CASE(SimdLibFloat_div_binary)
534{
535 float val1 = -4.0;
536 float val2 = 2.5;
537 vec_t avec1(val1);
538 vec_t avec2(val2);
539 vec_t res = avec1 / avec2;
540 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
541 {}}; // double brace to deal with gcc 4.8.5 ...
542 res.store(ascalararr.data());
543
544 for (size_t i = 0; i < vec_t::width; ++i)
545 {
546 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
547 }
548}
549
550BOOST_AUTO_TEST_CASE(SimdLibFloat_add_mul)
551{
552 float val1 = -4.0;
553 float val2 = 2.0;
554 float val3 = 2.0;
555 vec_t avec1(val1);
556 vec_t avec2(val2);
557 vec_t avec3(val3);
558 vec_t res = avec1 + avec2 * avec3;
559 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
560 {}}; // float brace to deal with gcc 4.8.5 ...
561 res.store(ascalararr.data());
562
563 for (size_t i = 0; i < vec_t::width; ++i)
564 {
565 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 + val3);
566 }
567}
568
569BOOST_AUTO_TEST_CASE(SimdLibFloat_fused_add_mul)
570{
571 float val1 = -4.0;
572 float val2 = 1.5;
573 float val3 = 5.0;
574 vec_t avec1(val1);
575 vec_t avec2(val2);
576 vec_t avec3(val3);
577 avec1.fma(avec2, avec3);
578 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
579 {}}; // double brace to deal with gcc 4.8.5 ...
580 avec1.store(ascalararr.data());
581
582 for (size_t i = 0; i < vec_t::width; ++i)
583 {
584 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
585 }
586}
587
588BOOST_AUTO_TEST_CASE(SimdLibFloat_sqrt)
589{
590 float val = 4.0;
591 vec_t avec(val);
592 vec_t asqrt = sqrt(avec);
593 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
594 {}}; // float brace to deal with gcc 4.8.5 ...
595 asqrt.store(ascalararr.data());
596
597 for (size_t i = 0; i < vec_t::width; ++i)
598 {
599 BOOST_CHECK_EQUAL(ascalararr[i], std::sqrt(val));
600 }
601}
602
603BOOST_AUTO_TEST_CASE(SimdLibFloat_abs)
604{
605 float val = -4.0;
606 vec_t avec(val);
607 vec_t aabs = abs(avec);
608 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
609 {}}; // float brace to deal with gcc 4.8.5 ...
610 aabs.store(ascalararr.data());
611
612 for (size_t i = 0; i < vec_t::width; ++i)
613 {
614 BOOST_CHECK_EQUAL(ascalararr[i], std::abs(val));
615 }
616}
617
618BOOST_AUTO_TEST_CASE(SimdLibFloat_log)
619{
620 float val = 4.0;
621 vec_t avec(val);
622 vec_t alog = log(avec);
623 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
624 {}}; // float brace to deal with gcc 4.8.5 ...
625 alog.store(ascalararr.data());
626
627 for (size_t i = 0; i < vec_t::width; ++i)
628 {
629 BOOST_CHECK_EQUAL(ascalararr[i], std::log(val));
630 }
631}
632
633BOOST_AUTO_TEST_CASE(SimdLibFloat_greater)
634{
635 float aval = 4.0;
636 vec_t avec(aval);
637 using mask_t = simd<bool, vec_t::width>;
638 mask_t amask;
639
640 amask = avec > avec;
641 // check
642 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
643 ascalararr{{}}; // float brace to deal with gcc 4.8.5 ...
644 amask.store(ascalararr.data());
645 for (size_t i = 0; i < vec_t::width; ++i)
646 {
647 // type conversion make lvalue in rvalue, needed pre-c++17
648 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
649 }
650
651 float bval = 3.0;
652 vec_t bvec(bval);
653
654 amask = avec > bvec;
655 // check
656 amask.store(ascalararr.data());
657 for (size_t i = 0; i < vec_t::width; ++i)
658 {
659 // type conversion make lvalue in rvalue, needed pre-c++17
660 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::true_v);
661 }
662
663 float cval = 5.0;
664 vec_t cvec(cval);
665
666 amask = avec > cvec;
667 // check
668 amask.store(ascalararr.data());
669 for (size_t i = 0; i < vec_t::width; ++i)
670 {
671 // type conversion make lvalue in rvalue, needed pre-c++17
672 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
673 }
674
675 if (vec_t::width == 4)
676 {
677 alignas(vec_t::alignment) std::array<float, 4> ascalararr2{
678 {1.0, 2.0, 3.0, 4.0}}; // float brace to deal with gcc 4.8.5 ...
679 float dval = 2.0;
680 vec_t dvec(dval);
681 vec_t evec;
682 evec.load(ascalararr2.data());
683
684 amask = dvec > evec;
685 // check
686 for (size_t i = 0; i < vec_t::width; ++i)
687 {
688 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]), dvec[i] > evec[i]);
689 }
690 }
691
692 if (vec_t::width == 8)
693 {
694 alignas(vec_t::alignment) std::array<float, 8> ascalararr2{
695 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
696 1.0}}; // double brace to deal with gcc 4.8.5 ...
697 float dval = 2.0;
698 vec_t dvec(dval);
699 vec_t evec;
700 evec.load(ascalararr2.data());
701
702 amask = dvec > evec;
703 // check
704 for (size_t i = 0; i < vec_t::width; ++i)
705 {
706 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]),
707 dval > ascalararr2[i]);
708 }
709 }
710}
711
712BOOST_AUTO_TEST_CASE(SimdLibFloat_logic_and)
713{
714 float aval = 4.0;
715 vec_t avec(aval);
716 using mask_t = simd<bool, vec_t::width>;
717 mask_t amask;
718
719 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
720 ascalararr{{}}; // float brace to deal with gcc 4.8.5 ...
721 for (size_t i = 0; i < vec_t::width; ++i)
722 {
723 ascalararr[i] = mask_t::true_v;
724 }
725 amask.load(ascalararr.data());
726
727 // check
728 BOOST_CHECK_EQUAL(amask && false, false);
729 BOOST_CHECK_EQUAL(amask && true, true);
730
731 for (size_t i = 0; i < vec_t::width; ++i)
732 {
733 ascalararr[i] = mask_t::false_v;
734 }
735 amask.load(ascalararr.data());
736
737 // check
738 BOOST_CHECK_EQUAL(amask && false, false);
739 BOOST_CHECK_EQUAL(amask && true, false);
740
741 if (vec_t::width > 1)
742 {
743 ascalararr[0] = mask_t::true_v;
744 // check
745 BOOST_CHECK_EQUAL(amask && false, false);
746 BOOST_CHECK_EQUAL(amask && true, false);
747 }
748}
749
750BOOST_AUTO_TEST_CASE(SimdLibFloat_load_interleave_unload)
751{
752 constexpr size_t nDof{5};
753 // no padding in load_interleave deinterleave_store
754 constexpr size_t nEle{vec_t::width * 5};
755 constexpr size_t nDofBlock = nDof * vec_t::width;
756
757 constexpr size_t size{nDof * nEle};
758 std::array<float, size> dofScalarArr{{}};
759 for (size_t i = 0; i < size; ++i)
760 {
761 dofScalarArr[i] = i;
762 }
763
764 // number of blocks
765 size_t nBlock = nEle / vec_t::width;
766
767 // aligned vector
768 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
769
770 float *dataPtr = dofScalarArr.data();
771 // loop over blocks vec_t::width elements at the time
772 for (size_t b = 0; b < nBlock; ++b)
773 {
774 // load
775 load_interleave(dataPtr, nDof, dofVectorArr);
776
777 // manipulate each block
778 for (size_t j = 0; j < nDof; ++j)
779 {
780 dofVectorArr[j] = dofVectorArr[j] + j;
781 }
782
783 // store
784 deinterleave_store(dofVectorArr, nDof, dataPtr);
785 dataPtr += nDofBlock;
786 }
787
788 // check
789 for (size_t b = 0, i = 0; b < nBlock; ++b)
790 {
791 for (size_t j = 0; j < nDof; ++j, ++i)
792 {
793 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
794 }
795 }
796}
797
798BOOST_AUTO_TEST_CASE(SimdLibFloat_io)
799{
800 vec_t avec(3.14);
801 std::cout << avec << std::endl;
802}
803
804} // namespace Nektar::SimdLibTests
#define NUM_LANES_32BITS
The above copyright notice and this permission notice shall be included.
#define ALIGNMENT
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
void load_interleave(const T *in, size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
Definition: scalar.hpp:309
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80
scalarT< T > log(scalarT< T > in)
Definition: scalar.hpp:303
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:294
static constexpr struct tinysimd::is_not_reused_t is_not_reused
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, size_t dataLen, T *out)
Definition: scalar.hpp:319