123 using namespace details;
125 BOOST_CHECK_EQUAL(has_width<float>::value,
false);
126 BOOST_CHECK_EQUAL(has_width<vec_t>::value,
true);
128 BOOST_CHECK_EQUAL(has_alignment<float>::value,
false);
129 BOOST_CHECK_EQUAL(has_alignment<vec_t>::value,
true);
131 BOOST_CHECK_EQUAL(has_scalarType<float>::value,
false);
132 BOOST_CHECK_EQUAL(has_scalarType<vec_t>::value,
true);
134 BOOST_CHECK_EQUAL(is_vector_v<float>,
false);
135 BOOST_CHECK_EQUAL(is_vector_v<vec_t>,
true);
137 BOOST_CHECK_EQUAL(is_vector_floating_point_v<float>,
false);
139 BOOST_CHECK_EQUAL(is_vector_floating_point_v<vec_t>,
true);
217 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
221 for (
size_t i = 0; i < vec_t::width; ++i)
223 BOOST_CHECK_EQUAL(ascalararr[i], val);
231 std::array<float, vec_t::width> ascalararr{
235 for (
size_t i = 0; i < vec_t::width; ++i)
237 BOOST_CHECK_EQUAL(ascalararr[i], val);
245 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
249 for (
size_t i = 0; i < vec_t::width; ++i)
251 BOOST_CHECK_EQUAL(ascalararr[i], val);
267 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
270 for (
size_t i = 0; i < vec_t::width; ++i)
275 for (
size_t i = 0; i < vec_t::width; ++i)
277 avec[i] = ascalararr[i];
280 for (
size_t i = 0; i < vec_t::width; ++i)
282 BOOST_CHECK_EQUAL(ascalararr[i], avec[i]);
293 std::array<std::uint32_t, vec_t::width> aindex;
295 if (vec_t::width > 2)
301 if (vec_t::width > 4)
308 if (vec_t::width > 8)
324 constexpr size_t scalarArraySize = 64;
325 std::array<float, scalarArraySize> ascalararr;
326 for (
size_t i = 0; i < scalarArraySize; ++i)
331 avec.gather(ascalararr.data(), aindexvec);
334 for (
size_t i = 0; i < vec_t::width; ++i)
336 BOOST_CHECK_EQUAL(ascalararr[aindex[i]], avec[i]);
347 std::array<std::uint32_t, vec_t::width> aindex;
349 if (vec_t::width > 1)
353 if (vec_t::width > 2)
358 if (vec_t::width > 4)
365 if (vec_t::width > 8)
381 constexpr size_t scalarArraySize = 64;
382 std::array<float, scalarArraySize> ascalararr;
385 alignas(vec_t::alignment) std::array<float, vec_t::width> avecarr{{}};
387 if (vec_t::width > 1)
392 if (vec_t::width > 2)
397 if (vec_t::width > 4)
404 avec.load(avecarr.data());
406 avec.scatter(ascalararr.data(), aindexvec);
409 for (
size_t i = 0; i < vec_t::width; ++i)
411 BOOST_CHECK_EQUAL(avec[i], ascalararr[aindex[i]]);
489 vec_t res = avec1 + avec2;
490 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
492 res.store(ascalararr.data());
494 for (
size_t i = 0; i < vec_t::width; ++i)
496 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2);
506 vec_t res = avec1 - avec2;
507 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
509 res.store(ascalararr.data());
511 for (
size_t i = 0; i < vec_t::width; ++i)
513 BOOST_CHECK_EQUAL(ascalararr[i], val1 - val2);
523 vec_t res = avec1 * avec2;
524 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
526 res.store(ascalararr.data());
528 for (
size_t i = 0; i < vec_t::width; ++i)
530 BOOST_CHECK_EQUAL(ascalararr[i], val1 * val2);
540 vec_t res = avec1 / avec2;
541 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
543 res.store(ascalararr.data());
545 for (
size_t i = 0; i < vec_t::width; ++i)
547 BOOST_CHECK_EQUAL(ascalararr[i], val1 / val2);
559 vec_t res = avec1 + avec2 * avec3;
560 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
562 res.store(ascalararr.data());
564 for (
size_t i = 0; i < vec_t::width; ++i)
566 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 + val3);
578 avec1.fma(avec2, avec3);
579 alignas(vec_t::alignment) std::array<float, vec_t::width> ascalararr{
581 avec1.store(ascalararr.data());
583 for (
size_t i = 0; i < vec_t::width; ++i)
585 BOOST_CHECK_EQUAL(ascalararr[i], val1 + val2 * val3);
643 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
645 amask.store(ascalararr.data());
646 for (
size_t i = 0; i < vec_t::width; ++i)
649 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
657 amask.store(ascalararr.data());
658 for (
size_t i = 0; i < vec_t::width; ++i)
661 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::true_v);
669 amask.store(ascalararr.data());
670 for (
size_t i = 0; i < vec_t::width; ++i)
673 BOOST_CHECK_EQUAL(ascalararr[i], (std::uint32_t)mask_t::false_v);
676 if (vec_t::width == 4)
678 alignas(vec_t::alignment) std::array<float, 4> ascalararr2{
679 {1.0, 2.0, 3.0, 4.0}};
683 evec.load(ascalararr2.data());
687 for (
size_t i = 0; i < vec_t::width; ++i)
689 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]), dvec[i] > evec[i]);
693 if (vec_t::width == 8)
695 alignas(vec_t::alignment) std::array<float, 8> ascalararr2{
696 {1.0, 2.0, 3.0, 4.0, 3.0, 2.0,
701 evec.load(ascalararr2.data());
705 for (
size_t i = 0; i < vec_t::width; ++i)
707 BOOST_CHECK_EQUAL(
static_cast<bool>(amask[i]),
708 dval > ascalararr2[i]);
720 alignas(vec_t::alignment) std::array<std::uint32_t, vec_t::width>
722 for (
size_t i = 0; i < vec_t::width; ++i)
724 ascalararr[i] = mask_t::true_v;
726 amask.load(ascalararr.data());
729 BOOST_CHECK_EQUAL(amask &&
false,
false);
730 BOOST_CHECK_EQUAL(amask &&
true,
true);
732 for (
size_t i = 0; i < vec_t::width; ++i)
734 ascalararr[i] = mask_t::false_v;
736 amask.load(ascalararr.data());
739 BOOST_CHECK_EQUAL(amask &&
false,
false);
740 BOOST_CHECK_EQUAL(amask &&
true,
false);
742 if (vec_t::width > 1)
744 ascalararr[0] = mask_t::true_v;
746 BOOST_CHECK_EQUAL(amask &&
false,
false);
747 BOOST_CHECK_EQUAL(amask &&
true,
false);
753 constexpr size_t nDof{5};
755 constexpr size_t nEle{vec_t::width * 5};
756 constexpr size_t nDofBlock = nDof * vec_t::width;
758 constexpr size_t size{nDof * nEle};
759 std::array<float, size> dofScalarArr{{}};
760 for (
size_t i = 0; i < size; ++i)
766 size_t nBlock = nEle / vec_t::width;
769 std::vector<vec_t, allocator<vec_t>> dofVectorArr(nDof);
771 float *dataPtr = dofScalarArr.data();
773 for (
size_t b = 0; b < nBlock; ++b)
779 for (
size_t j = 0; j < nDof; ++j)
781 dofVectorArr[j] = dofVectorArr[j] + j;
786 dataPtr += nDofBlock;
790 for (
size_t b = 0, i = 0; b < nBlock; ++b)
792 for (
size_t j = 0; j < nDof; ++j, ++i)
794 BOOST_CHECK_EQUAL(dofScalarArr[i], i + j);
801 alignas(long_vec_t::alignment) std::array<float, long_vec_t::width> lhsData{
803 alignas(vec_t::alignment) std::array<float, vec_t::width> rhsData{{}};
805 for (
size_t i = 0; i < lhsData.size(); ++i)
807 lhsData[i] =
static_cast<float>(i + 1);
809 for (
size_t i = 0; i < rhsData.size(); ++i)
811 rhsData[i] =
static_cast<float>(i + 1);
820 auto mixed = lhs + rhs;
824 alignas(long_vec_t::alignment) std::array<float, long_vec_t::width> tmp{{}};
826 for (
size_t i = 0; i < tmp.size(); ++i)
828 const auto chunkLane = i % vec_t::width;
829 BOOST_CHECK_EQUAL(tmp[i],
830 lhsData[i] + 2.0f * rhsData[chunkLane] + 2.0f);
836 constexpr size_t nDof = 3;
837 constexpr size_t nBlocks = 2;
838 constexpr size_t nEle = long_vec_t::width * nBlocks;
839 constexpr size_t blockSize = nDof * long_vec_t::width;
840 constexpr size_t size = nDof * nEle;
842 std::array<float, size> data{{}};
843 for (
size_t i = 0; i < size; ++i)
845 data[i] =
static_cast<float>(i);
848 std::vector<long_vec_t, allocator<long_vec_t>> work(nDof);
850 for (
size_t block = 0; block < nBlocks; ++block)
852 float *dataPtr = data.data() + block * blockSize;
854 for (
size_t j = 0; j < nDof; ++j)
856 work[j] +=
static_cast<float>(j + 1);
861 for (
size_t block = 0; block < nBlocks; ++block)
863 for (
size_t lane = 0; lane < long_vec_t::width; ++lane)
865 for (
size_t dof = 0; dof < nDof; ++dof)
867 const auto idx = block * blockSize + lane * nDof + dof;
868 BOOST_CHECK_EQUAL(data[idx],
static_cast<float>(idx + dof + 1));