38#include <boost/core/ignore_unused.hpp>
39#include <boost/test/unit_test.hpp>
46#define NUM_LANES_32BITS 1
49#if defined(__x86_64__)
50#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
52#undef NUM_LANES_32BITS
53#define NUM_LANES_32BITS 16
57#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
59#undef NUM_LANES_32BITS
60#define NUM_LANES_32BITS 8
64#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
66#undef NUM_LANES_32BITS
67#define NUM_LANES_32BITS 4
73#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
75#undef NUM_LANES_32BITS
76#define NUM_LANES_32BITS __ARM_FEATURE_SVE_BITS / 32
78#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 4
91 std::size_t width, alignment;
93#if defined(USING_SCALAR)
94 std::cout <<
"scalar float" << std::endl;
96#if defined(USING_AVX2)
97 std::cout <<
"avx2 float" << std::endl;
99#if defined(USING_AVX512)
100 std::cout <<
"avx512 float" << std::endl;
102#if defined(USING_SVE)
103 std::cout <<
"sve float" << std::endl;
125 using namespace details;
127 BOOST_CHECK_EQUAL(has_width<float>::value,
false);
128 BOOST_CHECK_EQUAL(has_width<vec_t>::value,
true);
130 BOOST_CHECK_EQUAL(has_alignment<float>::value,
false);
131 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value,
true);
133 BOOST_CHECK_EQUAL(has_scalarType<float>::value,
false);
134 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value,
true);
146 BOOST_CHECK_EQUAL(
sizeof(
vec_t),
sizeof(
float) * vec_t::width);
153 vec_t::scalarType ascalar = 0;
154 vec_t avec2(ascalar);
155 vec_t avec3{ascalar};
156 vec_t avec4 = ascalar;
161 vec_t avec7(avec2._data);
162 vec_t avec8{avec2._data};
164 vec_t::vectorType anative;
165 vec_t avec9(anative);
166 vec_t avec10{anative};
168 boost::ignore_unused(avec1, avec3, avec5, avec6, avec7, avec8, avec9,
174 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
177 avec.load(ascalararr.data());
182 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
185 avec = *(
reinterpret_cast<vec_t *
>(ascalararr.data()));
190 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
198 std::array<float, vec_t::width> ascalararr{
208 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
210 avec.store(ascalararr.data());
212 for (
size_t i = 0; i < vec_t::width; ++i)
214 BOOST_CHECK_EQUAL(ascalararr[i], val);
222 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
226 for (
size_t i = 0; i < vec_t::width; ++i)
228 BOOST_CHECK_EQUAL(ascalararr[i], val);
236 std::array<float, vec_t::width> ascalararr{
240 for (
size_t i = 0; i < vec_t::width; ++i)
242 BOOST_CHECK_EQUAL(ascalararr[i], val);
250 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
254 for (
size_t i = 0; i < vec_t::width; ++i)
256 BOOST_CHECK_EQUAL(ascalararr[i], val);
264 vec_t::scalarType ascalar{3.333f};
266 avec.broadcast(ascalar);
272 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
275 for (
size_t i = 0; i < vec_t::width; ++i)
280 for (
size_t i = 0; i < vec_t::width; ++i)
282 avec[i] = ascalararr[i];
285 for (
size_t i = 0; i < vec_t::width; ++i)
287 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
298 std::array<std::uint32_t, vec_t::width> aindex;
300 if (vec_t::width > 2)
306 if (vec_t::width > 4)
313 if (vec_t::width > 8)
329 constexpr size_t scalarArraySize = 64;
330 std::array<float, scalarArraySize> ascalararr;
331 for (
size_t i = 0; i < scalarArraySize; ++i)
336 avec.gather(ascalararr.data(), aindexvec);
339 for (
size_t i = 0; i < vec_t::width; ++i)
341 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
352 std::array<std::uint32_t, vec_t::width> aindex;
354 if (vec_t::width > 1)
358 if (vec_t::width > 2)
363 if (vec_t::width > 4)
370 if (vec_t::width > 8)
386 constexpr size_t scalarArraySize = 64;
387 std::array<float, scalarArraySize> ascalararr;
390 alignas(vec_t::alignment) std::array<float, vec_t::width> avecarr{{}};
392 if (vec_t::width > 1)
397 if (vec_t::width > 2)
402 if (vec_t::width > 4)
409 avec.load(avecarr.data());
411 avec.scatter(ascalararr.data(), aindexvec);
414 for (
size_t i = 0; i < vec_t::width; ++i)
416 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
427 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
429 res.store(ascalararr.data());
431 for (
size_t i = 0; i < vec_t::width; ++i)
433 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
444 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
446 res.store(ascalararr.data());
448 for (
size_t i = 0; i < vec_t::width; ++i)
450 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
461 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
463 res.store(ascalararr.data());
465 for (
size_t i = 0; i < vec_t::width; ++i)
467 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
478 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
480 res.store(ascalararr.data());
482 for (
size_t i = 0; i < vec_t::width; ++i)
484 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
494 vec_t res = avec1 + avec2;
495 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
497 res.store(ascalararr.data());
499 for (
size_t i = 0; i < vec_t::width; ++i)
501 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
511 vec_t res = avec1 - avec2;
512 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
514 res.store(ascalararr.data());
516 for (
size_t i = 0; i < vec_t::width; ++i)
518 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
528 vec_t res = avec1 * avec2;
529 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
531 res.store(ascalararr.data());
533 for (
size_t i = 0; i < vec_t::width; ++i)
535 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
545 vec_t res = avec1 / avec2;
546 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
548 res.store(ascalararr.data());
550 for (
size_t i = 0; i < vec_t::width; ++i)
552 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
564 vec_t res = avec1 + avec2 * avec3;
565 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
567 res.store(ascalararr.data());
569 for (
size_t i = 0; i < vec_t::width; ++i)
571 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 + val3);
583 avec1.fma(avec2, avec3);
584 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
586 avec1.store(ascalararr.data());
588 for (
size_t i = 0; i < vec_t::width; ++i)
590 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
599 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
601 asqrt.store(ascalararr.data());
603 for (
size_t i = 0; i < vec_t::width; ++i)
605 BOOST_CHECK_EQUAL(ascalararr[i],
std::sqrt(val));
614 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
616 aabs.store(ascalararr.data());
618 for (
size_t i = 0; i < vec_t::width; ++i)
620 BOOST_CHECK_EQUAL(ascalararr[i],
std::abs(val));
629 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
631 alog.store(ascalararr.data());
633 for (
size_t i = 0; i < vec_t::width; ++i)
635 BOOST_CHECK_EQUAL(ascalararr[i],
std::log(val));
648 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
650 amask.store(ascalararr.data());
651 for (
size_t i = 0; i < vec_t::width; ++i)
654 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
662 amask.store(ascalararr.data());
663 for (
size_t i = 0; i < vec_t::width; ++i)
666 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::true_v);
674 amask.store(ascalararr.data());
675 for (
size_t i = 0; i < vec_t::width; ++i)
678 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
681 if (vec_t::width == 4)
683 alignas(vec_t::alignment) std::array<float, 4> ascalararr2{
684 {1.0, 2.0, 3.0, 4.0}};
688 evec.load(ascalararr2.data());
692 for (
size_t i = 0; i < vec_t::width; ++i)
694 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]), dvec[i] > evec[i]);
698 if (vec_t::width == 8)
700 alignas(vec_t::alignment) std::array<float, 8> ascalararr2{
701 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
706 evec.load(ascalararr2.data());
710 for (
size_t i = 0; i < vec_t::width; ++i)
712 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
713 dval > ascalararr2[i]);
725 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
727 for (
size_t i = 0; i < vec_t::width; ++i)
729 ascalararr[i] = mask_t::true_v;
731 amask.load(ascalararr.data());
734 BOOST_CHECK_EQUAL(amask &&
false,
false);
735 BOOST_CHECK_EQUAL(amask &&
true,
true);
737 for (
size_t i = 0; i < vec_t::width; ++i)
739 ascalararr[i] = mask_t::false_v;
741 amask.load(ascalararr.data());
744 BOOST_CHECK_EQUAL(amask &&
false,
false);
745 BOOST_CHECK_EQUAL(amask &&
true,
false);
747 if (vec_t::width > 1)
749 ascalararr[0] = mask_t::true_v;
751 BOOST_CHECK_EQUAL(amask &&
false,
false);
752 BOOST_CHECK_EQUAL(amask &&
true,
false);
758 constexpr size_t nDof{5};
760 constexpr size_t nEle{vec_t::width * 5};
761 constexpr size_t nDofBlock = nDof * vec_t::width;
763 constexpr size_t size{nDof * nEle};
764 std::array<float, size> dofScalarArr{{}};
765 for (
size_t i = 0; i < size; ++i)
771 size_t nBlock = nEle / vec_t::width;
774 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
776 float *dataPtr = dofScalarArr.data();
778 for (
size_t b = 0; b < nBlock; ++b)
784 for (
size_t j = 0; j < nDof; ++j)
786 dofVectorArr[j] = dofVectorArr[j] + j;
791 dataPtr += nDofBlock;
795 for (
size_t b = 0, i = 0; b < nBlock; ++b)
797 for (
size_t j = 0; j < nDof; ++j, ++i)
799 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
807 std::cout << avec << std::endl;
#define NUM_LANES_32BITS
The above copyright notice and this permission notice shall be included.
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
The above copyright notice and this permission notice shall be included.
void load_interleave(const T *in, size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
scalarT< T > abs(scalarT< T > in)
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
scalarT< T > log(scalarT< T > in)
scalarT< T > sqrt(scalarT< T > in)
static constexpr struct tinysimd::is_not_reused_t is_not_reused
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, size_t dataLen, T *out)