38#include <boost/test/unit_test.hpp>
45#define NUM_LANES_32BITS 1
48#if defined(__x86_64__)
49#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
51#undef NUM_LANES_32BITS
52#define NUM_LANES_32BITS 16
56#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
58#undef NUM_LANES_32BITS
59#define NUM_LANES_32BITS 8
63#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
65#undef NUM_LANES_32BITS
66#define NUM_LANES_32BITS 4
72#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
74#undef NUM_LANES_32BITS
75#define NUM_LANES_32BITS __ARM_FEATURE_SVE_BITS / 32
77#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 4
88 std::size_t width, alignment;
90#if defined(USING_SCALAR)
91 std::cout <<
"scalar float" << std::endl;
93#if defined(USING_AVX2)
94 std::cout <<
"avx2 float" << std::endl;
96#if defined(USING_AVX512)
97 std::cout <<
"avx512 float" << std::endl;
100 std::cout <<
"sve float" << std::endl;
122 using namespace details;
124 BOOST_CHECK_EQUAL(has_width<float>::value,
false);
125 BOOST_CHECK_EQUAL(has_width<vec_t>::value,
true);
127 BOOST_CHECK_EQUAL(has_alignment<float>::value,
false);
128 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value,
true);
130 BOOST_CHECK_EQUAL(has_scalarType<float>::value,
false);
131 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value,
true);
143 BOOST_CHECK_EQUAL(
sizeof(
vec_t),
sizeof(
float) * vec_t::width);
148 [[maybe_unused]]
vec_t avec1;
150 vec_t::scalarType ascalar = 0;
151 vec_t avec2(ascalar);
152 [[maybe_unused]]
vec_t avec3{ascalar};
153 vec_t avec4 = ascalar;
155 [[maybe_unused]]
vec_t avec5(avec2);
156 [[maybe_unused]]
vec_t avec6{avec4};
158 [[maybe_unused]]
vec_t avec7(avec2._data);
159 [[maybe_unused]]
vec_t avec8{avec2._data};
161 vec_t::vectorType anative;
162 [[maybe_unused]]
vec_t avec9(anative);
163 [[maybe_unused]]
vec_t avec10{anative};
168 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
171 avec.load(ascalararr.data());
176 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
179 avec = *(
reinterpret_cast<vec_t *
>(ascalararr.data()));
184 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
192 std::array<float, vec_t::width> ascalararr{
202 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
204 avec.store(ascalararr.data());
206 for (
size_t i = 0; i < vec_t::width; ++i)
208 BOOST_CHECK_EQUAL(ascalararr[i], val);
216 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
220 for (
size_t i = 0; i < vec_t::width; ++i)
222 BOOST_CHECK_EQUAL(ascalararr[i], val);
230 std::array<float, vec_t::width> ascalararr{
234 for (
size_t i = 0; i < vec_t::width; ++i)
236 BOOST_CHECK_EQUAL(ascalararr[i], val);
244 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
248 for (
size_t i = 0; i < vec_t::width; ++i)
250 BOOST_CHECK_EQUAL(ascalararr[i], val);
258 vec_t::scalarType ascalar{3.333f};
260 avec.broadcast(ascalar);
266 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
269 for (
size_t i = 0; i < vec_t::width; ++i)
274 for (
size_t i = 0; i < vec_t::width; ++i)
276 avec[i] = ascalararr[i];
279 for (
size_t i = 0; i < vec_t::width; ++i)
281 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
292 std::array<std::uint32_t, vec_t::width> aindex;
294 if (vec_t::width > 2)
300 if (vec_t::width > 4)
307 if (vec_t::width > 8)
323 constexpr size_t scalarArraySize = 64;
324 std::array<float, scalarArraySize> ascalararr;
325 for (
size_t i = 0; i < scalarArraySize; ++i)
330 avec.gather(ascalararr.data(), aindexvec);
333 for (
size_t i = 0; i < vec_t::width; ++i)
335 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
346 std::array<std::uint32_t, vec_t::width> aindex;
348 if (vec_t::width > 1)
352 if (vec_t::width > 2)
357 if (vec_t::width > 4)
364 if (vec_t::width > 8)
380 constexpr size_t scalarArraySize = 64;
381 std::array<float, scalarArraySize> ascalararr;
384 alignas(vec_t::alignment) std::array<float, vec_t::width> avecarr{{}};
386 if (vec_t::width > 1)
391 if (vec_t::width > 2)
396 if (vec_t::width > 4)
403 avec.load(avecarr.data());
405 avec.scatter(ascalararr.data(), aindexvec);
408 for (
size_t i = 0; i < vec_t::width; ++i)
410 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
421 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
423 res.store(ascalararr.data());
425 for (
size_t i = 0; i < vec_t::width; ++i)
427 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
438 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
440 res.store(ascalararr.data());
442 for (
size_t i = 0; i < vec_t::width; ++i)
444 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
455 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
457 res.store(ascalararr.data());
459 for (
size_t i = 0; i < vec_t::width; ++i)
461 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
472 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
474 res.store(ascalararr.data());
476 for (
size_t i = 0; i < vec_t::width; ++i)
478 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
488 vec_t res = avec1 + avec2;
489 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
491 res.store(ascalararr.data());
493 for (
size_t i = 0; i < vec_t::width; ++i)
495 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
505 vec_t res = avec1 - avec2;
506 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
508 res.store(ascalararr.data());
510 for (
size_t i = 0; i < vec_t::width; ++i)
512 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
522 vec_t res = avec1 * avec2;
523 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
525 res.store(ascalararr.data());
527 for (
size_t i = 0; i < vec_t::width; ++i)
529 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
539 vec_t res = avec1 / avec2;
540 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
542 res.store(ascalararr.data());
544 for (
size_t i = 0; i < vec_t::width; ++i)
546 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
558 vec_t res = avec1 + avec2 * avec3;
559 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
561 res.store(ascalararr.data());
563 for (
size_t i = 0; i < vec_t::width; ++i)
565 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 + val3);
577 avec1.fma(avec2, avec3);
578 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
580 avec1.store(ascalararr.data());
582 for (
size_t i = 0; i < vec_t::width; ++i)
584 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
593 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
595 asqrt.store(ascalararr.data());
597 for (
size_t i = 0; i < vec_t::width; ++i)
599 BOOST_CHECK_EQUAL(ascalararr[i],
std::sqrt(val));
608 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
610 aabs.store(ascalararr.data());
612 for (
size_t i = 0; i < vec_t::width; ++i)
614 BOOST_CHECK_EQUAL(ascalararr[i],
std::abs(val));
623 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
625 alog.store(ascalararr.data());
627 for (
size_t i = 0; i < vec_t::width; ++i)
629 BOOST_CHECK_EQUAL(ascalararr[i],
std::log(val));
642 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
644 amask.store(ascalararr.data());
645 for (
size_t i = 0; i < vec_t::width; ++i)
648 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
656 amask.store(ascalararr.data());
657 for (
size_t i = 0; i < vec_t::width; ++i)
660 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::true_v);
668 amask.store(ascalararr.data());
669 for (
size_t i = 0; i < vec_t::width; ++i)
672 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
675 if (vec_t::width == 4)
677 alignas(vec_t::alignment) std::array<float, 4> ascalararr2{
678 {1.0, 2.0, 3.0, 4.0}};
682 evec.load(ascalararr2.data());
686 for (
size_t i = 0; i < vec_t::width; ++i)
688 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]), dvec[i] > evec[i]);
692 if (vec_t::width == 8)
694 alignas(vec_t::alignment) std::array<float, 8> ascalararr2{
695 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
700 evec.load(ascalararr2.data());
704 for (
size_t i = 0; i < vec_t::width; ++i)
706 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
707 dval > ascalararr2[i]);
719 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
721 for (
size_t i = 0; i < vec_t::width; ++i)
723 ascalararr[i] = mask_t::true_v;
725 amask.load(ascalararr.data());
728 BOOST_CHECK_EQUAL(amask &&
false,
false);
729 BOOST_CHECK_EQUAL(amask &&
true,
true);
731 for (
size_t i = 0; i < vec_t::width; ++i)
733 ascalararr[i] = mask_t::false_v;
735 amask.load(ascalararr.data());
738 BOOST_CHECK_EQUAL(amask &&
false,
false);
739 BOOST_CHECK_EQUAL(amask &&
true,
false);
741 if (vec_t::width > 1)
743 ascalararr[0] = mask_t::true_v;
745 BOOST_CHECK_EQUAL(amask &&
false,
false);
746 BOOST_CHECK_EQUAL(amask &&
true,
false);
752 constexpr size_t nDof{5};
754 constexpr size_t nEle{vec_t::width * 5};
755 constexpr size_t nDofBlock = nDof * vec_t::width;
757 constexpr size_t size{nDof * nEle};
758 std::array<float, size> dofScalarArr{{}};
759 for (
size_t i = 0; i < size; ++i)
765 size_t nBlock = nEle / vec_t::width;
768 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
770 float *dataPtr = dofScalarArr.data();
772 for (
size_t b = 0; b < nBlock; ++b)
778 for (
size_t j = 0; j < nDof; ++j)
780 dofVectorArr[j] = dofVectorArr[j] + j;
785 dataPtr += nDofBlock;
789 for (
size_t b = 0, i = 0; b < nBlock; ++b)
791 for (
size_t j = 0; j < nDof; ++j, ++i)
793 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
801 std::cout << avec << std::endl;
#define NUM_LANES_32BITS
The above copyright notice and this permission notice shall be included.
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
void load_interleave(const T *in, const size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
scalarT< T > abs(scalarT< T > in)
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
scalarT< T > log(scalarT< T > in)
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, const size_t dataLen, T *out)
scalarT< T > sqrt(scalarT< T > in)
static constexpr struct tinysimd::is_not_reused_t is_not_reused