133 using namespace details;
135 BOOST_CHECK_EQUAL(has_width<double>::value,
false);
136 BOOST_CHECK_EQUAL(has_width<vec_t>::value,
true);
138 BOOST_CHECK_EQUAL(has_alignment<double>::value,
false);
139 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value,
true);
141 BOOST_CHECK_EQUAL(has_scalarType<double>::value,
false);
142 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value,
true);
145 BOOST_CHECK_EQUAL(is_vector_v<double>,
false);
146 BOOST_CHECK_EQUAL(is_vector_v<vec_t>,
true);
148 BOOST_CHECK_EQUAL(is_vector_floating_point_v<double>,
false);
150 BOOST_CHECK_EQUAL(is_vector_floating_point_v<vec_t>,
true);
276 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
280 for (
size_t i = 0; i < vec_t::width; ++i)
282 BOOST_CHECK_EQUAL(ascalararr[i], val);
290 std::array<double, vec_t::width> ascalararr{
294 for (
size_t i = 0; i < vec_t::width; ++i)
296 BOOST_CHECK_EQUAL(ascalararr[i], val);
304 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
308 for (
size_t i = 0; i < vec_t::width; ++i)
310 BOOST_CHECK_EQUAL(ascalararr[i], val);
324 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
327 for (
size_t i = 0; i < vec_t::width; ++i)
332 for (
size_t i = 0; i < vec_t::width; ++i)
334 avec[i] = ascalararr[i];
337 for (
size_t i = 0; i < vec_t::width; ++i)
339 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
350 std::array<size_t, vec_t::width> aindex;
352 if (vec_t::width > 2)
358 if (vec_t::width > 4)
370 constexpr size_t scalarArraySize = 32;
371 std::array<double, scalarArraySize> ascalararr;
372 for (
size_t i = 0; i < scalarArraySize; ++i)
377 avec.gather(ascalararr.data(), aindexvec);
380 for (
size_t i = 0; i < vec_t::width; ++i)
382 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
393 std::array<std::uint32_t, vec_t::width> aindex;
395 if (vec_t::width > 2)
401 if (vec_t::width > 4)
413 constexpr size_t scalarArraySize = 32;
414 std::array<double, scalarArraySize> ascalararr;
415 for (
size_t i = 0; i < scalarArraySize; ++i)
420 avec.gather(ascalararr.data(), aindexvec);
423 for (
size_t i = 0; i < vec_t::width; ++i)
425 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
436 std::array<size_t, vec_t::width> aindex;
438 if (vec_t::width > 1)
442 if (vec_t::width > 2)
447 if (vec_t::width > 4)
459 constexpr size_t scalarArraySize = 32;
460 std::array<double, scalarArraySize> ascalararr;
463 alignas(vec_t::alignment) std::array<double, vec_t::width> avecarr{{}};
465 if (vec_t::width > 1)
470 if (vec_t::width > 2)
475 if (vec_t::width > 4)
482 avec.load(avecarr.data());
484 avec.scatter(ascalararr.data(), aindexvec);
487 for (
size_t i = 0; i < vec_t::width; ++i)
489 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
500 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
502 res.store(ascalararr.data());
504 for (
size_t i = 0; i < vec_t::width; ++i)
506 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
517 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
519 res.store(ascalararr.data());
521 for (
size_t i = 0; i < vec_t::width; ++i)
523 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
534 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
536 res.store(ascalararr.data());
538 for (
size_t i = 0; i < vec_t::width; ++i)
540 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
551 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
553 res.store(ascalararr.data());
555 for (
size_t i = 0; i < vec_t::width; ++i)
557 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
567 vec_t res = avec1 + avec2;
568 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
570 res.store(ascalararr.data());
572 for (
size_t i = 0; i < vec_t::width; ++i)
574 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
584 vec_t res = avec1 - avec2;
585 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
587 res.store(ascalararr.data());
589 for (
size_t i = 0; i < vec_t::width; ++i)
591 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
601 vec_t res = avec1 * avec2;
602 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
604 res.store(ascalararr.data());
606 for (
size_t i = 0; i < vec_t::width; ++i)
608 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
618 vec_t res = avec1 / avec2;
619 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
621 res.store(ascalararr.data());
623 for (
size_t i = 0; i < vec_t::width; ++i)
625 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
637 vec_t res = avec1 + avec2 * avec3;
638 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
640 res.store(ascalararr.data());
642 for (
size_t i = 0; i < vec_t::width; ++i)
644 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
656 avec1.fma(avec2, avec3);
657 alignas(vec_t::alignment) std::array<double, vec_t::width> ascalararr{
659 avec1.store(ascalararr.data());
661 for (
size_t i = 0; i < vec_t::width; ++i)
663 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
721 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
723 amask.store(ascalararr.data());
724 for (
size_t i = 0; i < vec_t::width; ++i)
727 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
735 amask.store(ascalararr.data());
736 for (
size_t i = 0; i < vec_t::width; ++i)
739 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::true_v);
747 amask.store(ascalararr.data());
748 for (
size_t i = 0; i < vec_t::width; ++i)
751 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint64_t)mask_t::false_v);
754 if (vec_t::width == 4)
756 alignas(vec_t::alignment) std::array<double, 4> ascalararr2{
757 {1.0, 2.0, 3.0, 4.0}};
761 evec.load(ascalararr2.data());
765 for (
size_t i = 0; i < vec_t::width; ++i)
767 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
768 dval > ascalararr2[i]);
772 if (vec_t::width == 8)
774 alignas(vec_t::alignment) std::array<double, 8> ascalararr2{
775 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
780 evec.load(ascalararr2.data());
784 for (
size_t i = 0; i < vec_t::width; ++i)
786 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
787 dval > ascalararr2[i]);
799 alignas(vec_t::alignment) std::array<std::uint64_t, vec_t::width>
801 for (
size_t i = 0; i < vec_t::width; ++i)
803 ascalararr[i] = mask_t::true_v;
805 amask.load(ascalararr.data());
808 BOOST_CHECK_EQUAL(amask &&
false,
false);
809 BOOST_CHECK_EQUAL(amask &&
true,
true);
811 for (
size_t i = 0; i < vec_t::width; ++i)
813 ascalararr[i] = mask_t::false_v;
815 amask.load(ascalararr.data());
818 BOOST_CHECK_EQUAL(amask &&
false,
false);
819 BOOST_CHECK_EQUAL(amask &&
true,
false);
821 if (vec_t::width > 1)
823 ascalararr[0] = mask_t::true_v;
825 BOOST_CHECK_EQUAL(amask &&
false,
false);
826 BOOST_CHECK_EQUAL(amask &&
true,
false);
832 constexpr size_t nDof{5};
834 constexpr size_t nEle{vec_t::width * 5};
835 constexpr size_t nDofBlock = nDof * vec_t::width;
837 constexpr size_t size{nDof * nEle};
838 std::array<double, size> dofScalarArr{{}};
839 for (
size_t i = 0; i < size; ++i)
845 size_t nBlock = nEle / vec_t::width;
848 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
850 double *dataPtr = dofScalarArr.data();
852 for (
size_t b = 0; b < nBlock; ++b)
858 for (
size_t j = 0; j < nDof; ++j)
860 dofVectorArr[j] = dofVectorArr[j] + j;
865 dataPtr += nDofBlock;
869 for (
size_t b = 0, i = 0; b < nBlock; ++b)
871 for (
size_t j = 0; j < nDof; ++j, ++i)
873 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
880 BOOST_CHECK_EQUAL(default_long_vec_t::width,
883 BOOST_CHECK_EQUAL(long_vec_t::width, 3 * vec_t::width);
884 BOOST_CHECK_EQUAL(long_vec_t::alignment, vec_t::alignment);
885 BOOST_CHECK_EQUAL(
sizeof(
long_vec_t),
sizeof(
double) * long_vec_t::width);
886 BOOST_CHECK_EQUAL(is_vector_floating_point_v<long_vec_t>,
true);
891 alignas(long_vec_t::alignment) std::array<double, long_vec_t::width>
893 alignas(vec_t::alignment) std::array<double, vec_t::width> rhsData{{}};
895 for (
size_t i = 0; i < lhsData.size(); ++i)
897 lhsData[i] =
static_cast<double>(i + 1);
899 for (
size_t i = 0; i < rhsData.size(); ++i)
901 rhsData[i] =
static_cast<double>(10 * (i + 1));
910 auto sum = lhs + rhs;
911 auto reverseSum = rhs + lhs;
912 auto scalarRight = lhs + 2.5;
913 auto scalarLeft = 100.0 - lhs;
914 auto scaled = lhs * 2.0;
919 alignas(long_vec_t::alignment) std::array<double, long_vec_t::width> tmp{
922 for (
size_t i = 0; i < tmp.size(); ++i)
924 const auto chunkLane = i % vec_t::width;
925 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] + rhsData[chunkLane]);
929 for (
size_t i = 0; i < tmp.size(); ++i)
931 const auto chunkLane = i % vec_t::width;
932 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] + rhsData[chunkLane]);
936 for (
size_t i = 0; i < tmp.size(); ++i)
938 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] + 2.5);
942 for (
size_t i = 0; i < tmp.size(); ++i)
944 BOOST_CHECK_EQUAL(tmp[i], 100.0 - lhsData[i]);
948 for (
size_t i = 0; i < tmp.size(); ++i)
950 BOOST_CHECK_EQUAL(tmp[i], lhsData[i] * 2.0);
954 for (
size_t i = 0; i < tmp.size(); ++i)
956 const auto chunkLane = i % vec_t::width;
957 BOOST_CHECK_EQUAL(tmp[i], 1.0 + lhsData[i] * rhsData[chunkLane]);
964 constexpr size_t nDof = 5;
965 constexpr size_t nBlocks = 3;
966 constexpr size_t nEle = long_vec_t::width * nBlocks;
967 constexpr size_t blockSize = nDof * long_vec_t::width;
968 constexpr size_t size = nDof * nEle;
970 std::array<double, size> data{{}};
971 for (
size_t i = 0; i < size; ++i)
973 data[i] =
static_cast<double>(i);
976 std::vector<long_vec_t, allocator<long_vec_t>> work(nDof);
978 for (
size_t block = 0; block < nBlocks; ++block)
980 double *dataPtr = data.data() + block * blockSize;
982 for (
size_t j = 0; j < nDof; ++j)
984 work[j] +=
static_cast<double>(j + 1);
989 for (
size_t block = 0; block < nBlocks; ++block)
991 for (
size_t lane = 0; lane < long_vec_t::width; ++lane)
993 for (
size_t dof = 0; dof < nDof; ++dof)
995 const auto idx = block * blockSize + lane * nDof + dof;
996 BOOST_CHECK_EQUAL(data[idx],
997 static_cast<double>(idx + dof + 1));
1004 constexpr size_t nDof = 4;
1005 constexpr size_t skipPads = long_vec_t::width > 1 ? 1 : 0;
1006 constexpr size_t activeWidth = long_vec_t::width - skipPads;
1007 constexpr size_t unalignedSize = nDof * long_vec_t::width;
1008 constexpr size_t paddedSize = nDof * activeWidth;
1010 std::array<double, unalignedSize> data{{}};
1011 for (
size_t i = 0; i < data.size(); ++i)
1013 data[i] =
static_cast<double>(100 + i);
1016 std::vector<long_vec_t, allocator<long_vec_t>> work(nDof);
1018 for (
size_t j = 0; j < nDof; ++j)
1020 work[j] +=
static_cast<double>(2 * j + 1);
1022 std::array<double, unalignedSize> out{{}};
1025 for (
size_t lane = 0; lane < long_vec_t::width; ++lane)
1027 for (
size_t dof = 0; dof < nDof; ++dof)
1029 const auto idx = lane * nDof + dof;
1030 BOOST_CHECK_EQUAL(out[idx],
1031 data[idx] +
static_cast<double>(2 * dof + 1));
1035 std::array<double, paddedSize> padded{{}};
1036 for (
size_t i = 0; i < padded.size(); ++i)
1038 padded[i] =
static_cast<double>(200 + i);
1042 for (
auto &value : work)
1046 std::array<double, paddedSize> paddedOut{{}};
1050 for (
size_t lane = 0; lane < activeWidth; ++lane)
1052 for (
size_t dof = 0; dof < nDof; ++dof)
1054 const auto idx = lane * nDof + dof;
1055 BOOST_CHECK_EQUAL(paddedOut[idx], padded[idx] + 3.0);