38#include <boost/test/tools/floating_point_comparison.hpp>
39#include <boost/test/unit_test.hpp>
46#define NUM_LANES_64BITS 1
49#if defined(__x86_64__)
50#if defined(__AVX512F__) && defined(NEKTAR_ENABLE_SIMD_AVX512)
52#undef NUM_LANES_64BITS
53#define NUM_LANES_64BITS 8
57#elif defined(__AVX2__) && defined(NEKTAR_ENABLE_SIMD_AVX2)
59#undef NUM_LANES_64BITS
60#define NUM_LANES_64BITS 4
64#elif defined(__SSE2__) && defined(NEKTAR_ENABLE_SIMD_SSE2)
66#undef NUM_LANES_64BITS
67#define NUM_LANES_64BITS 2
73#if defined(__ARM_FEATURE_SVE) && defined(NEKTAR_ENABLE_SIMD_SVE)
75#undef NUM_LANES_64BITS
76#define NUM_LANES_64BITS __ARM_FEATURE_SVE_BITS / 64
78#define ALIGNMENT __ARM_FEATURE_SVE_BITS / 8
89 std::size_t width, alignment;
91#if defined(USING_SCALAR)
92 std::cout <<
"scalar double" << std::endl;
94#if defined(USING_AVX2)
95 std::cout <<
"avx2 double" << std::endl;
97#if defined(USING_AVX512)
98 std::cout <<
"avx512 double" << std::endl;
100#if defined(USING_SVE)
101 std::cout <<
"sve double" << std::endl;
128 using namespace details;
130 BOOST_CHECK_EQUAL(has_width<double>::value,
false);
131 BOOST_CHECK_EQUAL(has_width<vec_t>::value,
true);
133 BOOST_CHECK_EQUAL(has_alignment<double>::value,
false);
134 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value,
true);
136 BOOST_CHECK_EQUAL(has_scalarType<double>::value,
false);
137 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value,
true);
150 BOOST_CHECK_EQUAL(
sizeof(
vec_t),
sizeof(
double) * vec_t::width);
155 [[maybe_unused]]
vec_t avec1;
157 vec_t::scalarType ascalar = 0;
158 vec_t avec2(ascalar);
159 [[maybe_unused]]
vec_t avec3{ascalar};
160 vec_t avec4 = ascalar;
162 [[maybe_unused]]
vec_t avec5(avec2);
163 [[maybe_unused]]
vec_t avec6{avec4};
165 [[maybe_unused]]
vec_t avec7(avec2._data);
166 [[maybe_unused]]
vec_t avec8{avec2._data};
168 vec_t::vectorType anative;
169 [[maybe_unused]]
vec_t avec9(anative);
170 [[maybe_unused]]
vec_t avec10{anative};
175 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
178 avec.load(ascalararr.data());
183 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
186 avec = *(
reinterpret_cast<vec_t *
>(ascalararr.data()));
191 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
199 std::array<double, vec_t::width> ascalararr{
207 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
210 aindex.load(ascalararr.data());
215 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
223 std::array<std::uint64_t, vec_t::width> ascalararr{
231 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
234 aindex.load(ascalararr.data());
239 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
247 std::array<std::uint32_t, vec_t::width> ascalararr{
257 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
259 avec.store(ascalararr.data());
261 for (
size_t i = 0; i < vec_t::width; ++i)
263 BOOST_CHECK_EQUAL(ascalararr[i], val);
271 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
275 for (
size_t i = 0; i < vec_t::width; ++i)
277 BOOST_CHECK_EQUAL(ascalararr[i], val);
285 std::array<double, vec_t::width> ascalararr{
289 for (
size_t i = 0; i < vec_t::width; ++i)
291 BOOST_CHECK_EQUAL(ascalararr[i], val);
299 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
303 for (
size_t i = 0; i < vec_t::width; ++i)
305 BOOST_CHECK_EQUAL(ascalararr[i], val);
311 vec_t::scalarType ascalar{3.333};
313 avec.broadcast(ascalar);
319 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
322 for (
size_t i = 0; i < vec_t::width; ++i)
327 for (
size_t i = 0; i < vec_t::width; ++i)
329 avec[i] = ascalararr[i];
332 for (
size_t i = 0; i < vec_t::width; ++i)
334 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
345 std::array<size_t, vec_t::width> aindex;
347 if (vec_t::width > 2)
353 if (vec_t::width > 4)
365 constexpr size_t scalarArraySize = 32;
366 std::array<double, scalarArraySize> ascalararr;
367 for (
size_t i = 0; i < scalarArraySize; ++i)
372 avec.gather(ascalararr.data(), aindexvec);
375 for (
size_t i = 0; i < vec_t::width; ++i)
377 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
388 std::array<std::uint32_t, vec_t::width> aindex;
390 if (vec_t::width > 2)
396 if (vec_t::width > 4)
408 constexpr size_t scalarArraySize = 32;
409 std::array<double, scalarArraySize> ascalararr;
410 for (
size_t i = 0; i < scalarArraySize; ++i)
415 avec.gather(ascalararr.data(), aindexvec);
418 for (
size_t i = 0; i < vec_t::width; ++i)
420 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
431 std::array<size_t, vec_t::width> aindex;
433 if (vec_t::width > 1)
437 if (vec_t::width > 2)
442 if (vec_t::width > 4)
454 constexpr size_t scalarArraySize = 32;
455 std::array<double, scalarArraySize> ascalararr;
458 alignas(vec_t::alignment) std::array<double, vec_t::width> avecarr{{}};
460 if (vec_t::width > 1)
465 if (vec_t::width > 2)
470 if (vec_t::width > 4)
477 avec.load(avecarr.data());
479 avec.scatter(ascalararr.data(), aindexvec);
482 for (
size_t i = 0; i < vec_t::width; ++i)
484 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
495 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
497 res.store(ascalararr.data());
499 for (
size_t i = 0; i < vec_t::width; ++i)
501 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
512 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
514 res.store(ascalararr.data());
516 for (
size_t i = 0; i < vec_t::width; ++i)
518 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
529 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
531 res.store(ascalararr.data());
533 for (
size_t i = 0; i < vec_t::width; ++i)
535 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
546 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
548 res.store(ascalararr.data());
550 for (
size_t i = 0; i < vec_t::width; ++i)
552 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
562 vec_t res = avec1 + avec2;
563 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
565 res.store(ascalararr.data());
567 for (
size_t i = 0; i < vec_t::width; ++i)
569 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
579 vec_t res = avec1 - avec2;
580 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
582 res.store(ascalararr.data());
584 for (
size_t i = 0; i < vec_t::width; ++i)
586 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
596 vec_t res = avec1 * avec2;
597 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
599 res.store(ascalararr.data());
601 for (
size_t i = 0; i < vec_t::width; ++i)
603 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
613 vec_t res = avec1 / avec2;
614 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
616 res.store(ascalararr.data());
618 for (
size_t i = 0; i < vec_t::width; ++i)
620 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
632 vec_t res = avec1 + avec2 * avec3;
633 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
635 res.store(ascalararr.data());
637 for (
size_t i = 0; i < vec_t::width; ++i)
639 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
651 avec1.fma(avec2, avec3);
652 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
654 avec1.store(ascalararr.data());
656 for (
size_t i = 0; i < vec_t::width; ++i)
658 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
667 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
669 asqrt.store(ascalararr.data());
671 for (
size_t i = 0; i < vec_t::width; ++i)
673 BOOST_CHECK_EQUAL(ascalararr[i],
std::sqrt(val));
682 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
684 aabs.store(ascalararr.data());
686 for (
size_t i = 0; i < vec_t::width; ++i)
688 BOOST_CHECK_EQUAL(ascalararr[i],
std::abs(val));
697 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
699 alog.store(ascalararr.data());
701 for (
size_t i = 0; i < vec_t::width; ++i)
703 BOOST_CHECK_EQUAL(ascalararr[i],
std::log(val));
716 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
718 amask.store(ascalararr.data());
719 for (
size_t i = 0; i < vec_t::width; ++i)
722 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
730 amask.store(ascalararr.data());
731 for (
size_t i = 0; i < vec_t::width; ++i)
734 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::true_v);
742 amask.store(ascalararr.data());
743 for (
size_t i = 0; i < vec_t::width; ++i)
746 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
749 if (vec_t::width == 4)
751 alignas(vec_t::alignment) std::array<double, 4> ascalararr2{
752 {1.0, 2.0, 3.0, 4.0}};
756 evec.load(ascalararr2.data());
760 for (
size_t i = 0; i < vec_t::width; ++i)
762 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
763 dval > ascalararr2[i]);
767 if (vec_t::width == 8)
769 alignas(vec_t::alignment) std::array<double, 8> ascalararr2{
770 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
775 evec.load(ascalararr2.data());
779 for (
size_t i = 0; i < vec_t::width; ++i)
781 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
782 dval > ascalararr2[i]);
794 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
796 for (
size_t i = 0; i < vec_t::width; ++i)
798 ascalararr[i] = mask_t::true_v;
800 amask.load(ascalararr.data());
803 BOOST_CHECK_EQUAL(amask &&
false,
false);
804 BOOST_CHECK_EQUAL(amask &&
true,
true);
806 for (
size_t i = 0; i < vec_t::width; ++i)
808 ascalararr[i] = mask_t::false_v;
810 amask.load(ascalararr.data());
813 BOOST_CHECK_EQUAL(amask &&
false,
false);
814 BOOST_CHECK_EQUAL(amask &&
true,
false);
816 if (vec_t::width > 1)
818 ascalararr[0] = mask_t::true_v;
820 BOOST_CHECK_EQUAL(amask &&
false,
false);
821 BOOST_CHECK_EQUAL(amask &&
true,
false);
827 constexpr size_t nDof{5};
829 constexpr size_t nEle{vec_t::width * 5};
830 constexpr size_t nDofBlock = nDof * vec_t::width;
832 constexpr size_t size{nDof * nEle};
833 std::array<double, size> dofScalarArr{{}};
834 for (
size_t i = 0; i < size; ++i)
840 size_t nBlock = nEle / vec_t::width;
843 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
845 double *dataPtr = dofScalarArr.data();
847 for (
size_t b = 0; b < nBlock; ++b)
853 for (
size_t j = 0; j < nDof; ++j)
855 dofVectorArr[j] = dofVectorArr[j] + j;
860 dataPtr += nDofBlock;
864 for (
size_t b = 0, i = 0; b < nBlock; ++b)
866 for (
size_t j = 0; j < nDof; ++j, ++i)
868 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
876 std::cout << avec << std::endl;
#define NUM_LANES_64BITS
The above copyright notice and this permission notice shall be included.
BOOST_AUTO_TEST_CASE(SimdLibDouble_width_alignment)
void load_interleave(const T *in, const size_t dataLen, std::vector< scalarT< T >, allocator< scalarT< T > > > &out)
scalarT< T > abs(scalarT< T > in)
static constexpr struct tinysimd::is_aligned_t is_aligned
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
scalarT< T > log(scalarT< T > in)
void deinterleave_store(const std::vector< scalarT< T >, allocator< scalarT< T > > > &in, const size_t dataLen, T *out)
scalarT< T > sqrt(scalarT< T > in)
static constexpr struct tinysimd::is_not_reused_t is_not_reused