Nektar++
Loading...
Searching...
No Matches
TestSimdLibSingle.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: TestSimdLibSingle.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20/// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description:
32//
33///////////////////////////////////////////////////////////////////////////////
34
37
38#include <boost/test/unit_test.hpp>
39
40#include <array>
41#include <cmath>
42#include <iostream>
43
44// define types in use and number of lanes
45#define NUM_LANES_32BITS 1
46#define ALIGNMENT 4
47#define USING_SCALAR
48#if defined(__x86_64__)
49#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
50#define USING_AVX512
51#undef NUM_LANES_32BITS
52#define NUM_LANES_32BITS 16
53#undef ALIGNMENT
54#define ALIGNMENT 64
55#undef USING_SCALAR
56#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
57#define USING_AVX2
58#undef NUM_LANES_32BITS
59#define NUM_LANES_32BITS 8
60#undef ALIGNMENT
61#define ALIGNMENT 32
62#undef USING_SCALAR
63#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
64#define USING_SSE2
65#undef NUM_LANES_32BITS
66#define NUM_LANES_32BITS 4
67#undef ALIGNMENT
68#define ALIGNMENT 64
69#undef USING_SCALAR
70#endif
71#endif
72#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
73#define USING_SVE
74#undef NUM_LANES_32BITS
75#define NUM_LANES_32BITS __ARM_FEATURE_SVE_BITS / 32
76#undef ALIGNMENT
77#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 4
78#undef USING_SCALAR
79#endif
80
82{
83using namespace tinysimd;
84using vec_t = simd<float>;
86
87BOOST_AUTO_TEST_CASE(SimdLibSingle_width_alignment)
88{
89 std::size_t width, alignment;
90
91#if defined(USING_SCALAR)
92 std::cout << "scalar float" << std::endl;
93#endif
94#if defined(USING_AVX2)
95 std::cout << "avx2 float" << std::endl;
96#endif
97#if defined(USING_AVX512)
98 std::cout << "avx512 float" << std::endl;
99#endif
100#if defined(USING_SVE)
101 std::cout << "sve float" << std::endl;
102#endif
103
104 // float
105 width = simd<float>::width;
106 alignment = simd<float>::alignment;
107 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
108 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
109 // std::int32_t index forcing # of lanes
112 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
113 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
114 // std::int32_t default index
117 BOOST_CHECK_EQUAL(width, NUM_LANES_32BITS);
118 BOOST_CHECK_EQUAL(alignment, ALIGNMENT);
119}
120
121BOOST_AUTO_TEST_CASE(SimdLibFloat_type_traits)
122{
123 using namespace details;
124
125 BOOST_CHECK_EQUAL(has_width<float>::value, false);
126 BOOST_CHECK_EQUAL(has_width<vec_t>::value, true);
127
128 BOOST_CHECK_EQUAL(has_alignment<float>::value, false);
129 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value, true);
130
131 BOOST_CHECK_EQUAL(has_scalarType<float>::value, false);
132 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value, true);
133
134 BOOST_CHECK_EQUAL(is_vector_v<float>, false);
135 BOOST_CHECK_EQUAL(is_vector_v<vec_t>, true);
136
137 BOOST_CHECK_EQUAL(is_vector_floating_point_v<float>, false);
138 BOOST_CHECK_EQUAL(is_vector_floating_point_v<simd<int>>, false);
139 BOOST_CHECK_EQUAL(is_vector_floating_point_v<vec_t>, true);
140}
141
142BOOST_AUTO_TEST_CASE(SimdLibFloat_mem_size)
143{
144 BOOST_CHECK_EQUAL(sizeof(vec_t), sizeof(float) * vec_t::width);
145}
146
147BOOST_AUTO_TEST_CASE(SimdLibFloat_ctors)
148{
149 [[maybe_unused]] vec_t avec1;
150
151 vec_t::scalarType ascalar = 0;
152 vec_t avec2(ascalar);
153 [[maybe_unused]] vec_t avec3{ascalar};
154 vec_t avec4 = ascalar;
155
156 [[maybe_unused]] vec_t avec5(avec2);
157 [[maybe_unused]] vec_t avec6{avec4};
158
159 [[maybe_unused]] vec_t avec7(avec2._data);
160 [[maybe_unused]] vec_t avec8{avec2._data};
161
162 vec_t::vectorType anative;
163 [[maybe_unused]] vec_t avec9(anative);
164 [[maybe_unused]] vec_t avec10{anative};
165}
166
167BOOST_AUTO_TEST_CASE(SimdLibFloat_load)
168{
169 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
170 {}}; // double brace to deal with gcc 4.8.5 ...
171 vec_t avec;
172 avec.load(ascalararr.data());
173}
174
175BOOST_AUTO_TEST_CASE(SimdLibFloat_load_implicit)
176{
177 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
178 {}}; // double brace to deal with gcc 4.8.5 ...
179 vec_t avec;
180 avec = *(reinterpret_cast<vec_t *>(ascalararr.data()));
181}
182
183BOOST_AUTO_TEST_CASE(SimdLibFloat_load_aligned)
184{
185 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
186 {}}; // double brace to deal with gcc 4.8.5 ...
187 vec_t avec;
188 avec.load(ascalararr.data(), is_aligned);
189}
190
191BOOST_AUTO_TEST_CASE(SimdLibFloat_load_unaligned)
192{
193 std::array<float, vec_t::width> ascalararr{
194 {}}; // double brace to deal with gcc 4.8.5 ...
195 vec_t avec;
196 avec.load(ascalararr.data(), is_not_aligned);
197}
198
199BOOST_AUTO_TEST_CASE(SimdLibFloat_store)
200{
201 float val = 4.0;
202 vec_t avec(val);
203 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
204 {}}; // float brace to deal with gcc 4.8.5 ...
205 avec.store(ascalararr.data());
206
207 for (size_t i = 0; i < vec_t::width; ++i)
208 {
209 BOOST_CHECK_EQUAL(ascalararr[i], val);
210 }
211}
212
213BOOST_AUTO_TEST_CASE(SimdLibFloat_store_aligned)
214{
215 float val = 4.0;
216 vec_t avec(val);
217 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
218 {}}; // float brace to deal with gcc 4.8.5 ...
219 avec.store(ascalararr.data(), is_aligned);
220
221 for (size_t i = 0; i < vec_t::width; ++i)
222 {
223 BOOST_CHECK_EQUAL(ascalararr[i], val);
224 }
225}
226
227BOOST_AUTO_TEST_CASE(SimdLibFloat_store_unaligned)
228{
229 float val = 4.0;
230 vec_t avec(val);
231 std::array<float, vec_t::width> ascalararr{
232 {}}; // float brace to deal with gcc 4.8.5 ...
233 avec.store(ascalararr.data(), is_not_aligned);
234
235 for (size_t i = 0; i < vec_t::width; ++i)
236 {
237 BOOST_CHECK_EQUAL(ascalararr[i], val);
238 }
239}
240
241BOOST_AUTO_TEST_CASE(SimdLibFloat_store_non_temporal)
242{
243 float val = 4.0;
244 vec_t avec(val);
245 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
246 {}}; // float brace to deal with gcc 4.8.5 ...
247 avec.store(ascalararr.data(), is_not_reused);
248
249 for (size_t i = 0; i < vec_t::width; ++i)
250 {
251 BOOST_CHECK_EQUAL(ascalararr[i], val);
252 }
253}
254
255BOOST_AUTO_TEST_CASE(SimdLibSingle_broadcast)
256{
257 // explictly float literal for
258 // Visual Studio 14 2015 Win64 compiler bug
259 vec_t::scalarType ascalar{3.333f};
260 vec_t avec;
261 avec.broadcast(ascalar);
262}
263
264BOOST_AUTO_TEST_CASE(SimdLibSingle_subscript_assign_read)
265{
266 vec_t avec;
267 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
268 {}}; // double brace to deal with gcc 4.8.5 ...
269
270 for (size_t i = 0; i < vec_t::width; ++i)
271 {
272 ascalararr[i] = i;
273 }
274
275 for (size_t i = 0; i < vec_t::width; ++i)
276 {
277 avec[i] = ascalararr[i];
278 }
279
280 for (size_t i = 0; i < vec_t::width; ++i)
281 {
282 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
283 }
284}
285
286BOOST_AUTO_TEST_CASE(SimdLibFloat_gather32)
287{
288 vec_t avec;
289 using index_t = simd<std::uint32_t>;
290 index_t aindexvec;
291
292 // create and fill index
293 std::array<std::uint32_t, vec_t::width> aindex;
294 aindex[0] = 0;
295 if (vec_t::width > 2)
296 {
297 aindex[1] = 3;
298 aindex[2] = 5;
299 aindex[3] = 6;
300 }
301 if (vec_t::width > 4)
302 {
303 aindex[4] = 8;
304 aindex[5] = 15;
305 aindex[6] = 20;
306 aindex[7] = 23;
307 }
308 if (vec_t::width > 8)
309 {
310 aindex[8] = 24;
311 aindex[9] = 28;
312 aindex[10] = 33;
313 aindex[11] = 40;
314 aindex[12] = 41;
315 aindex[13] = 45;
316 aindex[14] = 60;
317 aindex[15] = 61;
318 }
319
320 // load index
321 aindexvec.load(aindex.data(), is_not_aligned);
322
323 // create and fill scalar array
324 constexpr size_t scalarArraySize = 64;
325 std::array<float, scalarArraySize> ascalararr;
326 for (size_t i = 0; i < scalarArraySize; ++i)
327 {
328 ascalararr[i] = i;
329 }
330
331 avec.gather(ascalararr.data(), aindexvec);
332
333 // check
334 for (size_t i = 0; i < vec_t::width; ++i)
335 {
336 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
337 }
338}
339
340BOOST_AUTO_TEST_CASE(SimdLibFloat_scatter32)
341{
342 vec_t avec;
343 using index_t = simd<std::uint32_t>;
344 index_t aindexvec;
345
346 // create and fill index
347 std::array<std::uint32_t, vec_t::width> aindex;
348 aindex[0] = 1;
349 if (vec_t::width > 1)
350 {
351 aindex[1] = 3;
352 }
353 if (vec_t::width > 2)
354 {
355 aindex[2] = 5;
356 aindex[3] = 6;
357 }
358 if (vec_t::width > 4)
359 {
360 aindex[4] = 8;
361 aindex[5] = 15;
362 aindex[6] = 20;
363 aindex[7] = 30;
364 }
365 if (vec_t::width > 8)
366 {
367 aindex[8] = 31;
368 aindex[9] = 32;
369 aindex[10] = 35;
370 aindex[11] = 40;
371 aindex[12] = 41;
372 aindex[13] = 45;
373 aindex[14] = 60;
374 aindex[15] = 61;
375 }
376
377 // load index
378 aindexvec.load(aindex.data(), is_not_aligned);
379
380 // create scalar array
381 constexpr size_t scalarArraySize = 64;
382 std::array<float, scalarArraySize> ascalararr;
383
384 // fill vector
385 alignas(vec_t::alignment) std::array<float, vec_t::width> avecarr{{}};
386 avecarr[0] = 10;
387 if (vec_t::width > 1)
388 {
389 avecarr[1] = 9;
390 }
391
392 if (vec_t::width > 2)
393 {
394 avec[2] = 8;
395 avec[3] = 7;
396 }
397 if (vec_t::width > 4)
398 {
399 avec[4] = 4;
400 avec[5] = 3;
401 avec[6] = 2;
402 avec[7] = 1;
403 }
404 avec.load(avecarr.data());
405
406 avec.scatter(ascalararr.data(), aindexvec);
407
408 // check
409 for (size_t i = 0; i < vec_t::width; ++i)
410 {
411 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
412 }
413}
414
415BOOST_AUTO_TEST_CASE(SimdLibFloat_add_unary)
416{
417 float val1 = -4.0;
418 float val2 = 2.0;
419 vec_t res(val1);
420 vec_t avec(val2);
421 res += avec;
422 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
423 {}}; // float brace to deal with gcc 4.8.5 ...
424 res.store(ascalararr.data());
425
426 for (size_t i = 0; i < vec_t::width; ++i)
427 {
428 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
429 }
430}
431
432BOOST_AUTO_TEST_CASE(SimdLibFloat_sub_unary)
433{
434 float val1 = -4.0;
435 float val2 = 2.0;
436 vec_t res(val1);
437 vec_t avec(val2);
438 res -= avec;
439 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
440 {}}; // float brace to deal with gcc 4.8.5 ...
441 res.store(ascalararr.data());
442
443 for (size_t i = 0; i < vec_t::width; ++i)
444 {
445 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
446 }
447}
448
449BOOST_AUTO_TEST_CASE(SimdLibFloat_mul_unary)
450{
451 float val1 = -4.0;
452 float val2 = 2.0;
453 vec_t res(val1);
454 vec_t avec(val2);
455 res *= avec;
456 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
457 {}}; // float brace to deal with gcc 4.8.5 ...
458 res.store(ascalararr.data());
459
460 for (size_t i = 0; i < vec_t::width; ++i)
461 {
462 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
463 }
464}
465
466BOOST_AUTO_TEST_CASE(SimdLibFloat_div_unary)
467{
468 float val1 = -4.0;
469 float val2 = 2.0;
470 vec_t res(val1);
471 vec_t avec(val2);
472 res /= avec;
473 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
474 {}}; // float brace to deal with gcc 4.8.5 ...
475 res.store(ascalararr.data());
476
477 for (size_t i = 0; i < vec_t::width; ++i)
478 {
479 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
480 }
481}
482
483BOOST_AUTO_TEST_CASE(SimdLibFloat_add_binary)
484{
485 float val1 = -4.0;
486 float val2 = 2.5;
487 vec_t avec1(val1);
488 vec_t avec2(val2);
489 vec_t res = avec1 + avec2;
490 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
491 {}}; // double brace to deal with gcc 4.8.5 ...
492 res.store(ascalararr.data());
493
494 for (size_t i = 0; i < vec_t::width; ++i)
495 {
496 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
497 }
498}
499
500BOOST_AUTO_TEST_CASE(SimdLibFloat_sub_binary)
501{
502 float val1 = -4.0;
503 float val2 = 2.5;
504 vec_t avec1(val1);
505 vec_t avec2(val2);
506 vec_t res = avec1 - avec2;
507 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
508 {}}; // double brace to deal with gcc 4.8.5 ...
509 res.store(ascalararr.data());
510
511 for (size_t i = 0; i < vec_t::width; ++i)
512 {
513 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
514 }
515}
516
517BOOST_AUTO_TEST_CASE(SimdLibFloat_mul_binary)
518{
519 float val1 = -4.0;
520 float val2 = 2.5;
521 vec_t avec1(val1);
522 vec_t avec2(val2);
523 vec_t res = avec1 * avec2;
524 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
525 {}}; // double brace to deal with gcc 4.8.5 ...
526 res.store(ascalararr.data());
527
528 for (size_t i = 0; i < vec_t::width; ++i)
529 {
530 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
531 }
532}
533
534BOOST_AUTO_TEST_CASE(SimdLibFloat_div_binary)
535{
536 float val1 = -4.0;
537 float val2 = 2.5;
538 vec_t avec1(val1);
539 vec_t avec2(val2);
540 vec_t res = avec1 / avec2;
541 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
542 {}}; // double brace to deal with gcc 4.8.5 ...
543 res.store(ascalararr.data());
544
545 for (size_t i = 0; i < vec_t::width; ++i)
546 {
547 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
548 }
549}
550
551BOOST_AUTO_TEST_CASE(SimdLibFloat_add_mul)
552{
553 float val1 = -4.0;
554 float val2 = 2.0;
555 float val3 = 2.0;
556 vec_t avec1(val1);
557 vec_t avec2(val2);
558 vec_t avec3(val3);
559 vec_t res = avec1 + avec2 * avec3;
560 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
561 {}}; // float brace to deal with gcc 4.8.5 ...
562 res.store(ascalararr.data());
563
564 for (size_t i = 0; i < vec_t::width; ++i)
565 {
566 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 + val3);
567 }
568}
569
570BOOST_AUTO_TEST_CASE(SimdLibFloat_fused_add_mul)
571{
572 float val1 = -4.0;
573 float val2 = 1.5;
574 float val3 = 5.0;
575 vec_t avec1(val1);
576 vec_t avec2(val2);
577 vec_t avec3(val3);
578 avec1.fma(avec2, avec3);
579 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
580 {}}; // double brace to deal with gcc 4.8.5 ...
581 avec1.store(ascalararr.data());
582
583 for (size_t i = 0; i < vec_t::width; ++i)
584 {
585 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
586 }
587}
588
589BOOST_AUTO_TEST_CASE(SimdLibFloat_sqrt)
590{
591 float val = 4.0;
592 vec_t avec(val);
593 vec_t asqrt = sqrt(avec);
594 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
595 {}}; // float brace to deal with gcc 4.8.5 ...
596 asqrt.store(ascalararr.data());
597
598 for (size_t i = 0; i < vec_t::width; ++i)
599 {
600 BOOST_CHECK_EQUAL(ascalararr[i], std::sqrt(val));
601 }
602}
603
604BOOST_AUTO_TEST_CASE(SimdLibFloat_abs)
605{
606 float val = -4.0;
607 vec_t avec(val);
608 vec_t aabs = abs(avec);
609 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
610 {}}; // float brace to deal with gcc 4.8.5 ...
611 aabs.store(ascalararr.data());
612
613 for (size_t i = 0; i < vec_t::width; ++i)
614 {
615 BOOST_CHECK_EQUAL(ascalararr[i], std::abs(val));
616 }
617}
618
619BOOST_AUTO_TEST_CASE(SimdLibFloat_log)
620{
621 float val = 4.0;
622 vec_t avec(val);
623 vec_t alog = log(avec);
624 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
625 {}}; // float brace to deal with gcc 4.8.5 ...
626 alog.store(ascalararr.data());
627
628 for (size_t i = 0; i < vec_t::width; ++i)
629 {
630 BOOST_CHECK_EQUAL(ascalararr[i], std::log(val));
631 }
632}
633
634BOOST_AUTO_TEST_CASE(SimdLibFloat_greater)
635{
636 float aval = 4.0;
637 vec_t avec(aval);
638 using mask_t = simd<bool, vec_t::width>;
639 mask_t amask;
640
641 amask = avec > avec;
642 // check
643 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
644 ascalararr{{}}; // float brace to deal with gcc 4.8.5 ...
645 amask.store(ascalararr.data());
646 for (size_t i = 0; i < vec_t::width; ++i)
647 {
648 // type conversion make lvalue in rvalue, needed pre-c++17
649 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
650 }
651
652 float bval = 3.0;
653 vec_t bvec(bval);
654
655 amask = avec > bvec;
656 // check
657 amask.store(ascalararr.data());
658 for (size_t i = 0; i < vec_t::width; ++i)
659 {
660 // type conversion make lvalue in rvalue, needed pre-c++17
661 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::true_v);
662 }
663
664 float cval = 5.0;
665 vec_t cvec(cval);
666
667 amask = avec > cvec;
668 // check
669 amask.store(ascalararr.data());
670 for (size_t i = 0; i < vec_t::width; ++i)
671 {
672 // type conversion make lvalue in rvalue, needed pre-c++17
673 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
674 }
675
676 if (vec_t::width == 4)
677 {
678 alignas(vec_t::alignment) std::array<float, 4> ascalararr2{
679 {1.0, 2.0, 3.0, 4.0}}; // float brace to deal with gcc 4.8.5 ...
680 float dval = 2.0;
681 vec_t dvec(dval);
682 vec_t evec;
683 evec.load(ascalararr2.data());
684
685 amask = dvec > evec;
686 // check
687 for (size_t i = 0; i < vec_t::width; ++i)
688 {
689 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]), dvec[i] > evec[i]);
690 }
691 }
692
693 if (vec_t::width == 8)
694 {
695 alignas(vec_t::alignment) std::array<float, 8> ascalararr2{
696 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
697 1.0}}; // double brace to deal with gcc 4.8.5 ...
698 float dval = 2.0;
699 vec_t dvec(dval);
700 vec_t evec;
701 evec.load(ascalararr2.data());
702
703 amask = dvec > evec;
704 // check
705 for (size_t i = 0; i < vec_t::width; ++i)
706 {
707 BOOST_CHECK_EQUAL(static_cast<bool>(amask[i]),
708 dval > ascalararr2[i]);
709 }
710 }
711}
712
713BOOST_AUTO_TEST_CASE(SimdLibFloat_logic_and)
714{
715 float aval = 4.0;
716 vec_t avec(aval);
717 using mask_t = simd<bool, vec_t::width>;
718 mask_t amask;
719
720 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
721 ascalararr{{}}; // float brace to deal with gcc 4.8.5 ...
722 for (size_t i = 0; i < vec_t::width; ++i)
723 {
724 ascalararr[i] = mask_t::true_v;
725 }
726 amask.load(ascalararr.data());
727
728 // check
729 BOOST_CHECK_EQUAL(amask && false, false);
730 BOOST_CHECK_EQUAL(amask && true, true);
731
732 for (size_t i = 0; i < vec_t::width; ++i)
733 {
734 ascalararr[i] = mask_t::false_v;
735 }
736 amask.load(ascalararr.data());
737
738 // check
739 BOOST_CHECK_EQUAL(amask && false, false);
740 BOOST_CHECK_EQUAL(amask && true, false);
741
742 if (vec_t::width > 1)
743 {
744 ascalararr[0] = mask_t::true_v;
745 // check
746 BOOST_CHECK_EQUAL(amask && false, false);
747 BOOST_CHECK_EQUAL(amask && true, false);
748 }
749}
750
751BOOST_AUTO_TEST_CASE(SimdLibFloat_load_interleave_unload)
752{
753 constexpr size_t nDof{5};
754 // no padding in load_interleave deinterleave_store
755 constexpr size_t nEle{vec_t::width * 5};
756 constexpr size_t nDofBlock = nDof * vec_t::width;
757
758 constexpr size_t size{nDof * nEle};
759 std::array<float, size> dofScalarArr{{}};
760 for (size_t i = 0; i < size; ++i)
761 {
762 dofScalarArr[i] = i;
763 }
764
765 // number of blocks
766 size_t nBlock = nEle / vec_t::width;
767
768 // aligned vector
769 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
770
771 float *dataPtr = dofScalarArr.data();
772 // loop over blocks vec_t::width elements at the time
773 for (size_t b = 0; b < nBlock; ++b)
774 {
775 // load
776 load_interleave(dataPtr, nDof, dofVectorArr);
777
778 // manipulate each block
779 for (size_t j = 0; j < nDof; ++j)
780 {
781 dofVectorArr[j] = dofVectorArr[j] + j;
782 }
783
784 // store
785 deinterleave_store(dofVectorArr, nDof, dataPtr);
786 dataPtr += nDofBlock;
787 }
788
789 // check
790 for (size_t b = 0, i = 0; b < nBlock; ++b)
791 {
792 for (size_t j = 0; j < nDof; ++j, ++i)
793 {
794 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
795 }
796 }
797}
798
799BOOST_AUTO_TEST_CASE(SimdLibFloat_longsimd_mixed_arithmetic)
800{
801 alignas(long_vec_t::alignment) std::array<float, long_vec_t::width> lhsData{
802 {}};
803 alignas(vec_t::alignment) std::array<float, vec_t::width> rhsData{{}};
804
805 for (size_t i = 0; i < lhsData.size(); ++i)
806 {
807 lhsData[i] = static_cast<float>(i + 1);
808 }
809 for (size_t i = 0; i < rhsData.size(); ++i)
810 {
811 rhsData[i] = static_cast<float>(i + 1);
812 }
813
814 long_vec_t lhs;
815 lhs.load(lhsData.data(), is_aligned);
816
817 vec_t rhs;
818 rhs.load(rhsData.data(), is_aligned);
819
820 auto mixed = lhs + rhs;
821 mixed += rhs;
822 mixed += 2.0f;
823
824 alignas(long_vec_t::alignment) std::array<float, long_vec_t::width> tmp{{}};
825 mixed.store(tmp.data(), is_aligned);
826 for (size_t i = 0; i < tmp.size(); ++i)
827 {
828 const auto chunkLane = i % vec_t::width;
829 BOOST_CHECK_EQUAL(tmp[i],
830 lhsData[i] + 2.0f * rhsData[chunkLane] + 2.0f);
831 }
832}
833
834BOOST_AUTO_TEST_CASE(SimdLibFloat_longsimd_interleave_helpers)
835{
836 constexpr size_t nDof = 3;
837 constexpr size_t nBlocks = 2;
838 constexpr size_t nEle = long_vec_t::width * nBlocks;
839 constexpr size_t blockSize = nDof * long_vec_t::width;
840 constexpr size_t size = nDof * nEle;
841
842 std::array<float, size> data{{}};
843 for (size_t i = 0; i < size; ++i)
844 {
845 data[i] = static_cast<float>(i);
846 }
847
848 std::vector<long_vec_t, allocator<long_vec_t>> work(nDof);
849
850 for (size_t block = 0; block < nBlocks; ++block)
851 {
852 float *dataPtr = data.data() + block * blockSize;
853 load_interleave(dataPtr, nDof, work);
854 for (size_t j = 0; j < nDof; ++j)
855 {
856 work[j] += static_cast<float>(j + 1);
857 }
858 deinterleave_store(work, nDof, dataPtr);
859 }
860
861 for (size_t block = 0; block < nBlocks; ++block)
862 {
863 for (size_t lane = 0; lane < long_vec_t::width; ++lane)
864 {
865 for (size_t dof = 0; dof < nDof; ++dof)
866 {
867 const auto idx = block * blockSize + lane * nDof + dof;
868 BOOST_CHECK_EQUAL(data[idx], static_cast<float>(idx + dof + 1));
869 }
870 }
871 }
872}
873
874BOOST_AUTO_TEST_CASE(SimdLibFloat_io)
875{
876 vec_t avec(3.14);
877 std::cout << avec << std::endl;
878}
879
880} // namespace Nektar::SimdLibTests
#define ALIGNMENT
#define NUM_LANES_32BITS
The above copyright notice and this permission notice shall be included.
longsimd< double, 3 *vec_t::width > long_vec_t
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
void load_interleave(const T *in, const size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
Definition scalar.hpp:338
constexpr bool is_vector_floating_point_v
Definition traits.hpp:173
scalarT< T > abs(scalarT< T > in)
Definition scalar.hpp:295
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition tinysimd.hpp:132
scalarT< T > log(scalarT< T > in)
Definition scalar.hpp:310
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, const size_t dataLen, T *out)
Definition scalar.hpp:370
scalarT< T > sqrt(scalarT< T > in)
Definition scalar.hpp:290
static constexpr struct tinysimd::is_not_reused_t is_not_reused
typename details::longsimd_selector< ScalarType, width, abi >::type longsimd
Definition tinysimd.hpp:485