38#include <boost/core/ignore_unused.hpp>
39#include <boost/test/tools/floating_point_comparison.hpp>
40#include <boost/test/unit_test.hpp>
47#define NUM_LANES_64BITS 1
50#if defined(__x86_64__)
51#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
53#undef NUM_LANES_64BITS
54#define NUM_LANES_64BITS 8
58#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
60#undef NUM_LANES_64BITS
61#define NUM_LANES_64BITS 4
65#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
67#undef NUM_LANES_64BITS
68#define NUM_LANES_64BITS 2
74#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
76#undef NUM_LANES_64BITS
77#define NUM_LANES_64BITS __ARM_FEATURE_SVE_BITS / 64
79#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 8
92 std::size_t width, alignment;
94#if defined(USING_SCALAR)
95 std::cout <<
"scalar double" << std::endl;
97#if defined(USING_AVX2)
98 std::cout <<
"avx2 double" << std::endl;
100#if defined(USING_AVX512)
101 std::cout <<
"avx512 double" << std::endl;
103#if defined(USING_SVE)
104 std::cout <<
"sve double" << std::endl;
131 using namespace details;
133 BOOST_CHECK_EQUAL(has_width<double>::value,
false);
134 BOOST_CHECK_EQUAL(has_width<vec_t>::value,
true);
136 BOOST_CHECK_EQUAL(has_alignment<double>::value,
false);
137 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value,
true);
139 BOOST_CHECK_EQUAL(has_scalarType<double>::value,
false);
140 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value,
true);
153 BOOST_CHECK_EQUAL(
sizeof(
vec_t),
sizeof(
double) * vec_t::width);
160 vec_t::scalarType ascalar = 0;
161 vec_t avec2(ascalar);
162 vec_t avec3{ascalar};
163 vec_t avec4 = ascalar;
168 vec_t avec7(avec2._data);
169 vec_t avec8{avec2._data};
171 vec_t::vectorType anative;
172 vec_t avec9(anative);
173 vec_t avec10{anative};
175 boost::ignore_unused(avec1, avec3, avec5, avec6, avec7, avec8, avec9,
181 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
184 avec.load(ascalararr.data());
189 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
192 avec = *(
reinterpret_cast<vec_t *
>(ascalararr.data()));
197 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
205 std::array<double, vec_t::width> ascalararr{
213 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
216 aindex.load(ascalararr.data());
221 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
229 std::array<std::uint64_t, vec_t::width> ascalararr{
237 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
240 aindex.load(ascalararr.data());
245 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
253 std::array<std::uint32_t, vec_t::width> ascalararr{
263 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
265 avec.store(ascalararr.data());
267 for (
size_t i = 0; i < vec_t::width; ++i)
269 BOOST_CHECK_EQUAL(ascalararr[i], val);
277 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
281 for (
size_t i = 0; i < vec_t::width; ++i)
283 BOOST_CHECK_EQUAL(ascalararr[i], val);
291 std::array<double, vec_t::width> ascalararr{
295 for (
size_t i = 0; i < vec_t::width; ++i)
297 BOOST_CHECK_EQUAL(ascalararr[i], val);
305 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
309 for (
size_t i = 0; i < vec_t::width; ++i)
311 BOOST_CHECK_EQUAL(ascalararr[i], val);
317 vec_t::scalarType ascalar{3.333};
319 avec.broadcast(ascalar);
325 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
328 for (
size_t i = 0; i < vec_t::width; ++i)
333 for (
size_t i = 0; i < vec_t::width; ++i)
335 avec[i] = ascalararr[i];
338 for (
size_t i = 0; i < vec_t::width; ++i)
340 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
351 std::array<size_t, vec_t::width> aindex;
353 if (vec_t::width > 2)
359 if (vec_t::width > 4)
371 constexpr size_t scalarArraySize = 32;
372 std::array<double, scalarArraySize> ascalararr;
373 for (
size_t i = 0; i < scalarArraySize; ++i)
378 avec.gather(ascalararr.data(), aindexvec);
381 for (
size_t i = 0; i < vec_t::width; ++i)
383 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
394 std::array<std::uint32_t, vec_t::width> aindex;
396 if (vec_t::width > 2)
402 if (vec_t::width > 4)
414 constexpr size_t scalarArraySize = 32;
415 std::array<double, scalarArraySize> ascalararr;
416 for (
size_t i = 0; i < scalarArraySize; ++i)
421 avec.gather(ascalararr.data(), aindexvec);
424 for (
size_t i = 0; i < vec_t::width; ++i)
426 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
437 std::array<size_t, vec_t::width> aindex;
439 if (vec_t::width > 1)
443 if (vec_t::width > 2)
448 if (vec_t::width > 4)
460 constexpr size_t scalarArraySize = 32;
461 std::array<double, scalarArraySize> ascalararr;
464 alignas(vec_t::alignment) std::array<double, vec_t::width> avecarr{{}};
466 if (vec_t::width > 1)
471 if (vec_t::width > 2)
476 if (vec_t::width > 4)
483 avec.load(avecarr.data());
485 avec.scatter(ascalararr.data(), aindexvec);
488 for (
size_t i = 0; i < vec_t::width; ++i)
490 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
501 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
503 res.store(ascalararr.data());
505 for (
size_t i = 0; i < vec_t::width; ++i)
507 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
518 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
520 res.store(ascalararr.data());
522 for (
size_t i = 0; i < vec_t::width; ++i)
524 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
535 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
537 res.store(ascalararr.data());
539 for (
size_t i = 0; i < vec_t::width; ++i)
541 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
552 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
554 res.store(ascalararr.data());
556 for (
size_t i = 0; i < vec_t::width; ++i)
558 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
568 vec_t res = avec1 + avec2;
569 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
571 res.store(ascalararr.data());
573 for (
size_t i = 0; i < vec_t::width; ++i)
575 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
585 vec_t res = avec1 - avec2;
586 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
588 res.store(ascalararr.data());
590 for (
size_t i = 0; i < vec_t::width; ++i)
592 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
602 vec_t res = avec1 * avec2;
603 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
605 res.store(ascalararr.data());
607 for (
size_t i = 0; i < vec_t::width; ++i)
609 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
619 vec_t res = avec1 / avec2;
620 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
622 res.store(ascalararr.data());
624 for (
size_t i = 0; i < vec_t::width; ++i)
626 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
638 vec_t res = avec1 + avec2 * avec3;
639 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
641 res.store(ascalararr.data());
643 for (
size_t i = 0; i < vec_t::width; ++i)
645 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
657 avec1.fma(avec2, avec3);
658 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
660 avec1.store(ascalararr.data());
662 for (
size_t i = 0; i < vec_t::width; ++i)
664 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
673 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
675 asqrt.store(ascalararr.data());
677 for (
size_t i = 0; i < vec_t::width; ++i)
679 BOOST_CHECK_EQUAL(ascalararr[i],
std::sqrt(val));
688 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
690 aabs.store(ascalararr.data());
692 for (
size_t i = 0; i < vec_t::width; ++i)
694 BOOST_CHECK_EQUAL(ascalararr[i],
std::abs(val));
703 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
705 alog.store(ascalararr.data());
707 for (
size_t i = 0; i < vec_t::width; ++i)
709 BOOST_CHECK_EQUAL(ascalararr[i],
std::log(val));
722 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
724 amask.store(ascalararr.data());
725 for (
size_t i = 0; i < vec_t::width; ++i)
728 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
736 amask.store(ascalararr.data());
737 for (
size_t i = 0; i < vec_t::width; ++i)
740 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::true_v);
748 amask.store(ascalararr.data());
749 for (
size_t i = 0; i < vec_t::width; ++i)
752 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
755 if (vec_t::width == 4)
757 alignas(vec_t::alignment) std::array<double, 4> ascalararr2{
758 {1.0, 2.0, 3.0, 4.0}};
762 evec.load(ascalararr2.data());
766 for (
size_t i = 0; i < vec_t::width; ++i)
768 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
769 dval > ascalararr2[i]);
773 if (vec_t::width == 8)
775 alignas(vec_t::alignment) std::array<double, 8> ascalararr2{
776 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
781 evec.load(ascalararr2.data());
785 for (
size_t i = 0; i < vec_t::width; ++i)
787 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
788 dval > ascalararr2[i]);
800 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
802 for (
size_t i = 0; i < vec_t::width; ++i)
804 ascalararr[i] = mask_t::true_v;
806 amask.load(ascalararr.data());
809 BOOST_CHECK_EQUAL(amask &&
false,
false);
810 BOOST_CHECK_EQUAL(amask &&
true,
true);
812 for (
size_t i = 0; i < vec_t::width; ++i)
814 ascalararr[i] = mask_t::false_v;
816 amask.load(ascalararr.data());
819 BOOST_CHECK_EQUAL(amask &&
false,
false);
820 BOOST_CHECK_EQUAL(amask &&
true,
false);
822 if (vec_t::width > 1)
824 ascalararr[0] = mask_t::true_v;
826 BOOST_CHECK_EQUAL(amask &&
false,
false);
827 BOOST_CHECK_EQUAL(amask &&
true,
false);
833 constexpr size_t nDof{5};
835 constexpr size_t nEle{vec_t::width * 5};
836 constexpr size_t nDofBlock = nDof * vec_t::width;
838 constexpr size_t size{nDof * nEle};
839 std::array<double, size> dofScalarArr{{}};
840 for (
size_t i = 0; i < size; ++i)
846 size_t nBlock = nEle / vec_t::width;
849 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
851 double *dataPtr = dofScalarArr.data();
853 for (
size_t b = 0; b < nBlock; ++b)
859 for (
size_t j = 0; j < nDof; ++j)
861 dofVectorArr[j] = dofVectorArr[j] + j;
866 dataPtr += nDofBlock;
870 for (
size_t b = 0, i = 0; b < nBlock; ++b)
872 for (
size_t j = 0; j < nDof; ++j, ++i)
874 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
882 std::cout << avec << std::endl;
#define NUM_LANES_64BITS
The above copyright notice and this permission notice shall be included.
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
The above copyright notice and this permission notice shall be included.
void load_interleave(const T *in, size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
scalarT< T > abs(scalarT< T > in)
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
scalarT< T > log(scalarT< T > in)
scalarT< T > sqrt(scalarT< T > in)
static constexpr struct tinysimd::is_not_reused_t is_not_reused
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, size_t dataLen, T *out)