35 #include <boost/core/ignore_unused.hpp>
37 #include <MatrixFreeOps/Operator.hpp>
77 int nPhys = m_stdExp->GetTotPoints();
78 int ntot = m_numElmt * nPhys;
86 for (
int i = 0; i < m_dim; ++i)
88 Diff[i] = wsp + i * ntot;
92 for (
int i = 0; i < m_dim; ++i)
94 Blas::Dgemm(
'N',
'N', m_derivMat[i]->GetRows(), m_numElmt,
95 m_derivMat[i]->GetColumns(), 1.0,
96 m_derivMat[i]->GetRawPtr(), m_derivMat[i]->GetRows(),
97 input.get(), nPhys, 0.0, &Diff[i][0], nPhys);
103 for (
int i = 0; i < m_coordim; ++i)
106 for (
int j = 0; j < m_dim; ++j)
108 Vmath::Vvtvp(ntot, m_derivFac[i * m_dim + j], 1, Diff[j], 1,
109 out[i], 1, out[i], 1);
116 for (
int i = 0; i < m_coordim; ++i)
119 for (
int e = 0; e < m_numElmt; ++e)
121 for (
int j = 0; j < m_dim; ++j)
124 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
125 1, t = out[i] + e * m_nqe, 1);
136 int nPhys = m_stdExp->GetTotPoints();
137 int ntot = m_numElmt * nPhys;
141 for (
int i = 0; i < m_dim; ++i)
143 Diff[i] = wsp + i * ntot;
147 for (
int i = 0; i < m_dim; ++i)
149 Blas::Dgemm(
'N',
'N', m_derivMat[i]->GetRows(), m_numElmt,
150 m_derivMat[i]->GetColumns(), 1.0,
151 m_derivMat[i]->GetRawPtr(), m_derivMat[i]->GetRows(),
152 input.get(), nPhys, 0.0, &Diff[i][0], nPhys);
159 for (
int j = 0; j < m_dim; ++j)
161 Vmath::Vvtvp(ntot, m_derivFac[dir * m_dim + j], 1, Diff[j], 1,
162 output, 1, output, 1);
168 for (
int e = 0; e < m_numElmt; ++e)
170 for (
int j = 0; j < m_dim; ++j)
173 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
174 t = output + e * m_nqe, 1);
181 int coll_phys_offset)
override
183 boost::ignore_unused(factors, coll_phys_offset);
184 ASSERTL0(
false,
"Not valid for this operator.");
197 :
Operator(pCollExp, pGeomData, factors)
201 m_dim = PtsKey.size();
202 m_coordim = pCollExp[0]->GetCoordim();
204 for (
int i = 0; i < m_dim; ++i)
206 nqtot *= PtsKey[i].GetNumPoints();
210 for (
int i = 0; i < m_dim; ++i)
215 for (
int j = 0; j < nqtot; ++j)
219 m_stdExp->PhysDeriv(i, tmp, tmp1);
221 &(m_derivMat[i]->GetPtr())[0] + j * nqtot, 1);
224 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
225 m_wspSize = 3 * nqtot * m_numElmt;
233 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Seg"),
236 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Tri"),
239 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_NodalTri"),
242 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Quad"),
245 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Tet"),
248 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_NodalTet"),
251 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Pyr"),
254 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Prism"),
257 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_NodalPrism"),
260 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Hex"),
263 PhysDeriv_StdMat::create,
"PhysDeriv_SumFac_Pyr")};
283 boost::ignore_unused(wsp);
289 (*m_oper)(m_input, m_output);
293 (*m_oper)(input, m_output);
314 NEKERROR(ErrorUtil::efatal,
"Unknown coordinate dimension");
323 boost::ignore_unused(wsp);
328 (*m_oper)(m_input, m_output);
332 (*m_oper)(input, m_output);
338 int coll_phys_offset)
override
340 boost::ignore_unused(factors, coll_phys_offset);
341 ASSERTL0(
false,
"Not valid for this operator.");
345 std::shared_ptr<MatrixFree::PhysDeriv>
m_oper;
350 :
Operator(pCollExp, pGeomData, factors),
352 pCollExp[0]->GetStdExp()->GetTotPoints(),
353 pCollExp[0]->GetStdExp()->GetTotPoints(),
357 bool deformed{pGeomData->IsDeformed(pCollExp)};
358 const auto dim = pCollExp[0]->GetStdExp()->GetShapeDimension();
360 if (m_isPadded ==
false)
362 int nOut = pCollExp[0]->GetStdExp()->GetTotPoints();
369 else if (m_coordim == 3)
377 std::vector<LibUtilities::BasisSharedPtr> basis(dim);
378 for (
unsigned int i = 0; i < dim; ++i)
380 basis[i] = pCollExp[0]->GetBasis(i);
384 auto shapeType = pCollExp[0]->GetStdExp()->DetShapeType();
387 std::string op_string =
"PhysDeriv";
388 op_string += MatrixFree::GetOpstring(shapeType, deformed);
390 op_string, basis, m_nElmtPad);
393 oper->SetDF(pGeomData->GetDerivFactorsInterLeave(pCollExp, m_nElmtPad));
395 m_oper = std::dynamic_pointer_cast<MatrixFree::PhysDeriv>(oper);
396 ASSERTL0(m_oper,
"Failed to cast pointer.");
404 PhysDeriv_MatrixFree::create,
"PhysDeriv_MatrixFree_Seg"),
407 PhysDeriv_MatrixFree::create,
"PhysDeriv_MatrixFree_Tri"),
410 PhysDeriv_MatrixFree::create,
"PhysDeriv_MatrixFree_Quad"),
413 PhysDeriv_MatrixFree::create,
"PhysDeriv_MatrixFree_Hex"),
416 PhysDeriv_MatrixFree::create,
"PhysDeriv_MatrixFree_Prism"),
419 PhysDeriv_MatrixFree::create,
"PhysDeriv_MatrixFree_Pyr"),
422 PhysDeriv_MatrixFree::create,
"PhysDeriv_MatrixFree_Tet")
445 int nPhys = m_stdExp->GetTotPoints();
446 int ntot = m_numElmt * nPhys;
454 for (
int i = 0; i < m_dim; ++i)
456 Diff[i] = wsp + i * ntot;
460 for (
int i = 0; i < m_numElmt; ++i)
462 m_stdExp->PhysDeriv(input + i * nPhys, tmp0 = Diff[0] + i * nPhys,
463 tmp1 = Diff[1] + i * nPhys,
464 tmp2 = Diff[2] + i * nPhys);
470 for (
int i = 0; i < m_coordim; ++i)
472 Vmath::Vmul(ntot, m_derivFac[i * m_dim], 1, Diff[0], 1, out[i],
474 for (
int j = 1; j < m_dim; ++j)
476 Vmath::Vvtvp(ntot, m_derivFac[i * m_dim + j], 1, Diff[j], 1,
477 out[i], 1, out[i], 1);
484 for (
int e = 0; e < m_numElmt; ++e)
486 for (
int i = 0; i < m_coordim; ++i)
489 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
491 for (
int j = 1; j < m_dim; ++j)
494 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
495 1, t = out[i] + e * m_nqe, 1);
506 int nPhys = m_stdExp->GetTotPoints();
507 int ntot = m_numElmt * nPhys;
511 for (
int i = 0; i < m_dim; ++i)
513 Diff[i] = wsp + i * ntot;
517 for (
int i = 0; i < m_numElmt; ++i)
519 m_stdExp->PhysDeriv(input + i * nPhys, tmp0 = Diff[0] + i * nPhys,
520 tmp1 = Diff[1] + i * nPhys,
521 tmp2 = Diff[2] + i * nPhys);
527 for (
int j = 0; j < m_dim; ++j)
529 Vmath::Vvtvp(ntot, m_derivFac[dir * m_dim + j], 1, Diff[j], 1,
530 output, 1, output, 1);
536 for (
int e = 0; e < m_numElmt; ++e)
538 for (
int j = 0; j < m_dim; ++j)
541 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
542 t = output + e * m_nqe, 1);
549 int coll_phys_offset)
override
551 boost::ignore_unused(factors, coll_phys_offset);
552 ASSERTL0(
false,
"Not valid for this operator.");
564 :
Operator(pCollExp, pGeomData, factors)
568 m_dim = PtsKey.size();
569 m_coordim = pCollExp[0]->GetCoordim();
571 for (
int i = 0; i < m_dim; ++i)
573 nqtot *= PtsKey[i].GetNumPoints();
575 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
576 m_wspSize = 3 * nqtot * m_numElmt;
584 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Seg"),
587 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Tri"),
590 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_NodalTri"),
593 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Quad"),
596 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Tet"),
599 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_NodalTet"),
602 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Pyr"),
605 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Prism"),
608 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_NodalPrism"),
611 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Hex")};
631 boost::ignore_unused(wsp);
633 const int nPhys = m_expList[0]->GetTotPoints();
637 switch (m_expList[0]->GetShapeDimension())
641 for (
int i = 0; i < m_numElmt; ++i)
643 m_expList[i]->PhysDeriv(input + i * nPhys,
644 tmp0 = output0 + i * nPhys);
650 for (
int i = 0; i < m_numElmt; ++i)
652 m_expList[i]->PhysDeriv(input + i * nPhys,
653 tmp0 = output0 + i * nPhys,
654 tmp1 = output1 + i * nPhys);
660 for (
int i = 0; i < m_numElmt; ++i)
662 m_expList[i]->PhysDeriv(
663 input + i * nPhys, tmp0 = output0 + i * nPhys,
664 tmp1 = output1 + i * nPhys, tmp2 = output2 + i * nPhys);
669 ASSERTL0(
false,
"Unknown dimension.");
677 boost::ignore_unused(wsp);
679 const int nPhys = m_expList[0]->GetTotPoints();
683 for (
int i = 0; i < m_numElmt; ++i)
685 m_expList[i]->PhysDeriv(dir, input + i * nPhys,
686 tmp = output + i * nPhys);
691 int coll_phys_offset)
override
693 boost::ignore_unused(factors, coll_phys_offset);
694 ASSERTL0(
false,
"Not valid for this operator.");
704 :
Operator(pCollExp, pGeomData, factors)
706 m_expList = pCollExp;
711 OperatorKey PhysDeriv_NoCollection::m_typeArr[] = {
714 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Seg"),
717 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Tri"),
720 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_NodalTri"),
723 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Quad"),
726 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Tet"),
729 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_NodalTet"),
732 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Pyr"),
735 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Prism"),
738 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_NodalPrism"),
741 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Hex")};
762 const int nqcol = m_nquad0 * m_numElmt;
764 ASSERTL1(wsp.size() == m_wspSize,
"Incorrect workspace size");
765 ASSERTL1(input.size() >= nqcol,
"Incorrect input size");
769 Blas::Dgemm(
'N',
'N', m_nquad0, m_numElmt, m_nquad0, 1.0, m_Deriv0,
770 m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
775 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
779 Vmath::Vmul(nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
781 else if (m_coordim == 3)
783 Vmath::Vmul(nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
784 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output2, 1);
790 for (
int e = 0; e < m_numElmt; ++e)
792 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
793 t = output0 + e * m_nqe, 1);
798 for (
int e = 0; e < m_numElmt; ++e)
800 Vmath::Smul(m_nqe, m_derivFac[1][e], diff0 + e * m_nqe, 1,
801 t = output1 + e * m_nqe, 1);
804 else if (m_coordim == 3)
806 for (
int e = 0; e < m_numElmt; ++e)
808 Vmath::Smul(m_nqe, m_derivFac[1][e], diff0 + e * m_nqe, 1,
809 t = output1 + e * m_nqe, 1);
810 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
811 t = output2 + e * m_nqe, 1);
821 const int nqcol = m_nquad0 * m_numElmt;
823 ASSERTL1(wsp.size() == m_wspSize,
"Incorrect workspace size");
824 ASSERTL1(input.size() >= nqcol,
"Incorrect input size");
828 Blas::Dgemm(
'N',
'N', m_nquad0, m_numElmt, m_nquad0, 1.0, m_Deriv0,
829 m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
834 Vmath::Vmul(nqcol, m_derivFac[dir], 1, diff0, 1, output, 1);
839 for (
int e = 0; e < m_numElmt; ++e)
841 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
842 t = output + e * m_nqe, 1);
848 int coll_phys_offset)
override
850 boost::ignore_unused(factors, coll_phys_offset);
851 ASSERTL0(
false,
"Not valid for this operator.");
864 :
Operator(pCollExp, pGeomData, factors),
865 m_nquad0(m_stdExp->GetNumPoints(0))
868 m_coordim = pCollExp[0]->GetCoordim();
870 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
872 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
873 m_wspSize = m_nquad0 * m_numElmt;
881 PhysDeriv_SumFac_Seg::create,
"PhysDeriv_SumFac_Seg");
902 const int nqtot = m_nquad0 * m_nquad1;
903 const int nqcol = nqtot * m_numElmt;
905 ASSERTL1(wsp.size() == m_wspSize,
"Incorrect workspace size");
906 ASSERTL1(input.size() >= nqcol,
"Incorrect input size");
911 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
912 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
916 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
918 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
919 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
920 diff1.get() + cnt, m_nquad0);
925 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
926 Vmath::Vvtvp(nqcol, m_derivFac[1], 1, diff1, 1, output0, 1, output0,
928 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
929 Vmath::Vvtvp(nqcol, m_derivFac[3], 1, diff1, 1, output1, 1, output1,
934 Vmath::Vmul(nqcol, m_derivFac[4], 1, diff0, 1, output2, 1);
935 Vmath::Vvtvp(nqcol, m_derivFac[5], 1, diff1, 1, output2, 1,
942 for (
int e = 0; e < m_numElmt; ++e)
944 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
945 t = output0 + e * m_nqe, 1);
946 Vmath::Svtvp(m_nqe, m_derivFac[1][e], diff1 + e * m_nqe, 1,
947 output0 + e * m_nqe, 1, t = output0 + e * m_nqe,
950 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
951 t = output1 + e * m_nqe, 1);
952 Vmath::Svtvp(m_nqe, m_derivFac[3][e], diff1 + e * m_nqe, 1,
953 output1 + e * m_nqe, 1, t = output1 + e * m_nqe,
959 for (
int e = 0; e < m_numElmt; ++e)
961 Vmath::Smul(m_nqe, m_derivFac[4][e], diff0 + e * m_nqe, 1,
962 t = output2 + e * m_nqe, 1);
963 Vmath::Svtvp(m_nqe, m_derivFac[5][e], diff1 + e * m_nqe, 1,
964 output2 + e * m_nqe, 1,
965 t = output2 + e * m_nqe, 1);
975 const int nqtot = m_nquad0 * m_nquad1;
976 const int nqcol = nqtot * m_numElmt;
978 ASSERTL1(wsp.size() == m_wspSize,
"Incorrect workspace size");
979 ASSERTL1(input.size() >= nqcol,
"Incorrect input size");
984 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
985 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
989 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
991 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
992 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
993 diff1.get() + cnt, m_nquad0);
998 Vmath::Vmul(nqcol, m_derivFac[2 * dir], 1, diff0, 1, output, 1);
999 Vmath::Vvtvp(nqcol, m_derivFac[2 * dir + 1], 1, diff1, 1, output, 1,
1005 for (
int e = 0; e < m_numElmt; ++e)
1007 Vmath::Smul(m_nqe, m_derivFac[2 * dir][e], diff0 + e * m_nqe, 1,
1008 t = output + e * m_nqe, 1);
1010 diff1 + e * m_nqe, 1, output + e * m_nqe, 1,
1011 t = output + e * m_nqe, 1);
1017 int coll_phys_offset)
override
1019 boost::ignore_unused(factors, coll_phys_offset);
1020 ASSERTL0(
false,
"Not valid for this operator.");
1035 :
Operator(pCollExp, pGeomData, factors),
1036 m_nquad0(m_stdExp->GetNumPoints(0)),
1037 m_nquad1(m_stdExp->GetNumPoints(1))
1040 m_coordim = pCollExp[0]->GetCoordim();
1042 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1044 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1045 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1046 m_wspSize = 2 * m_nquad0 * m_nquad1 * m_numElmt;
1054 PhysDeriv_SumFac_Quad::create,
"PhysDeriv_SumFac_Quad");
1075 const int nqtot = m_nquad0 * m_nquad1;
1076 const int nqcol = nqtot * m_numElmt;
1078 ASSERTL1(wsp.size() == m_wspSize,
"Incorrect workspace size");
1079 ASSERTL1(input.size() >= nqcol,
"Incorrect input size");
1085 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
1086 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
1090 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
1093 Vmath::Vmul(nqtot, &m_fac1[0], 1, diff0.get() + cnt, 1,
1094 diff0.get() + cnt, 1);
1096 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1097 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
1098 diff1.get() + cnt, m_nquad0);
1101 Vmath::Vvtvp(nqtot, m_fac0.get(), 1, diff0.get() + cnt, 1,
1102 diff1.get() + cnt, 1, diff1.get() + cnt, 1);
1107 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
1108 Vmath::Vvtvp(nqcol, m_derivFac[1], 1, diff1, 1, output0, 1, output0,
1110 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
1111 Vmath::Vvtvp(nqcol, m_derivFac[3], 1, diff1, 1, output1, 1, output1,
1116 Vmath::Vmul(nqcol, m_derivFac[4], 1, diff0, 1, output2, 1);
1117 Vmath::Vvtvp(nqcol, m_derivFac[5], 1, diff1, 1, output2, 1,
1124 for (
int e = 0; e < m_numElmt; ++e)
1126 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
1127 t = output0 + e * m_nqe, 1);
1128 Vmath::Svtvp(m_nqe, m_derivFac[1][e], diff1 + e * m_nqe, 1,
1129 output0 + e * m_nqe, 1, t = output0 + e * m_nqe,
1132 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
1133 t = output1 + e * m_nqe, 1);
1134 Vmath::Svtvp(m_nqe, m_derivFac[3][e], diff1 + e * m_nqe, 1,
1135 output1 + e * m_nqe, 1, t = output1 + e * m_nqe,
1141 for (
int e = 0; e < m_numElmt; ++e)
1143 Vmath::Smul(m_nqe, m_derivFac[4][e], diff0 + e * m_nqe, 1,
1144 t = output2 + e * m_nqe, 1);
1145 Vmath::Svtvp(m_nqe, m_derivFac[5][e], diff1 + e * m_nqe, 1,
1146 output2 + e * m_nqe, 1,
1147 t = output2 + e * m_nqe, 1);
1157 const int nqtot = m_nquad0 * m_nquad1;
1158 const int nqcol = nqtot * m_numElmt;
1160 ASSERTL1(wsp.size() == m_wspSize,
"Incorrect workspace size");
1161 ASSERTL1(input.size() >= nqcol,
"Incorrect input size");
1167 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
1168 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
1172 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
1175 Vmath::Vmul(nqtot, &m_fac1[0], 1, diff0.get() + cnt, 1,
1176 diff0.get() + cnt, 1);
1178 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1179 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
1180 diff1.get() + cnt, m_nquad0);
1183 Vmath::Vvtvp(nqtot, m_fac0.get(), 1, diff0.get() + cnt, 1,
1184 diff1.get() + cnt, 1, diff1.get() + cnt, 1);
1189 Vmath::Vmul(nqcol, m_derivFac[2 * dir], 1, diff0, 1, output, 1);
1190 Vmath::Vvtvp(nqcol, m_derivFac[2 * dir + 1], 1, diff1, 1, output, 1,
1196 for (
int e = 0; e < m_numElmt; ++e)
1198 Vmath::Smul(m_nqe, m_derivFac[2 * dir][e], diff0 + e * m_nqe, 1,
1199 t = output + e * m_nqe, 1);
1201 diff1 + e * m_nqe, 1, output + e * m_nqe, 1,
1202 t = output + e * m_nqe, 1);
1208 int coll_phys_offset)
override
1210 boost::ignore_unused(factors, coll_phys_offset);
1211 ASSERTL0(
false,
"Not valid for this operator.");
1228 :
Operator(pCollExp, pGeomData, factors),
1229 m_nquad0(m_stdExp->GetNumPoints(0)),
1230 m_nquad1(m_stdExp->GetNumPoints(1))
1233 m_coordim = pCollExp[0]->GetCoordim();
1235 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1241 for (
int i = 0; i < m_nquad0; ++i)
1243 for (
int j = 0; j < m_nquad1; ++j)
1245 m_fac0[i + j * m_nquad0] = 0.5 * (1 + z0[i]);
1251 for (
int i = 0; i < m_nquad0; ++i)
1253 for (
int j = 0; j < m_nquad1; ++j)
1255 m_fac1[i + j * m_nquad0] = 2.0 / (1 - z1[j]);
1259 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1260 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1261 m_wspSize = 2 * m_nquad0 * m_nquad1 * m_numElmt;
1269 PhysDeriv_SumFac_Tri::create,
"PhysDeriv_SumFac_Tri"),
1272 PhysDeriv_SumFac_Tri::create,
"PhysDeriv_SumFac_NodalTri")};
1293 int nPhys = m_stdExp->GetTotPoints();
1294 int ntot = m_numElmt * nPhys;
1302 for (
int i = 0; i < 3; ++i)
1304 Diff[i] = wsp + i * ntot;
1307 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
1308 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1309 &Diff[0][0], m_nquad0);
1311 for (
int i = 0; i < m_numElmt; ++i)
1313 for (
int j = 0; j < m_nquad2; ++j)
1315 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1316 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1317 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1318 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1322 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1323 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1324 m_nquad2, 0.0, &Diff[2][i * nPhys],
1325 m_nquad0 * m_nquad1);
1331 for (
int i = 0; i < m_coordim; ++i)
1333 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1334 for (
int j = 1; j < 3; ++j)
1336 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1337 out[i], 1, out[i], 1);
1344 for (
int e = 0; e < m_numElmt; ++e)
1346 for (
int i = 0; i < m_coordim; ++i)
1350 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1353 for (
int j = 1; j < 3; ++j)
1356 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1357 1, t = out[i] + e * m_nqe, 1);
1368 int nPhys = m_stdExp->GetTotPoints();
1369 int ntot = m_numElmt * nPhys;
1373 for (
int i = 0; i < 3; ++i)
1375 Diff[i] = wsp + i * ntot;
1378 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
1379 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1380 &Diff[0][0], m_nquad0);
1382 for (
int i = 0; i < m_numElmt; ++i)
1384 for (
int j = 0; j < m_nquad2; ++j)
1386 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1387 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1388 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1389 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1393 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1394 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1395 m_nquad2, 0.0, &Diff[2][i * nPhys],
1396 m_nquad0 * m_nquad1);
1403 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1404 for (
int j = 1; j < 3; ++j)
1406 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1407 output, 1, output, 1);
1413 for (
int e = 0; e < m_numElmt; ++e)
1415 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1416 1, t = output + e * m_nqe, 1);
1418 for (
int j = 1; j < 3; ++j)
1421 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1422 t = output + e * m_nqe, 1);
1429 int coll_phys_offset)
override
1431 boost::ignore_unused(factors, coll_phys_offset);
1432 ASSERTL0(
false,
"Not valid for this operator.");
1449 :
Operator(pCollExp, pGeomData, factors),
1450 m_nquad0(m_stdExp->GetNumPoints(0)),
1451 m_nquad1(m_stdExp->GetNumPoints(1)),
1452 m_nquad2(m_stdExp->GetNumPoints(2))
1456 m_coordim = pCollExp[0]->GetCoordim();
1458 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1460 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1461 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1462 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1464 m_wspSize = 3 * m_nquad0 * m_nquad1 * m_nquad2 * m_numElmt;
1472 PhysDeriv_SumFac_Hex::create,
"PhysDeriv_SumFac_Hex")};
1493 int nPhys = m_stdExp->GetTotPoints();
1494 int ntot = m_numElmt * nPhys;
1502 for (
int i = 0; i < 3; ++i)
1504 Diff[i] = wsp + i * ntot;
1508 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
1509 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1510 &Diff[0][0], m_nquad0);
1513 for (
int i = 0; i < m_numElmt; ++i)
1515 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1516 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1517 m_nquad2, 0.0, &Diff[2][i * nPhys],
1518 m_nquad0 * m_nquad1);
1521 for (
int i = 0; i < m_numElmt; ++i)
1525 for (
int j = 0; j < m_nquad2; ++j)
1527 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1528 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1529 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1530 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1535 Vmath::Vvtvp(nPhys, m_fac3.get(), 1, Diff[1].get() + i * nPhys, 1,
1536 Diff[2].get() + i * nPhys, 1,
1537 Diff[2].get() + i * nPhys, 1);
1540 Vmath::Vmul(nPhys, m_fac2.get(), 1, Diff[1].get() + i * nPhys, 1,
1541 Diff[1].get() + i * nPhys, 1);
1544 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1545 Diff[1].get() + i * nPhys, 1,
1546 Diff[1].get() + i * nPhys, 1);
1549 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1550 Diff[2].get() + i * nPhys, 1,
1551 Diff[2].get() + i * nPhys, 1);
1554 Vmath::Vmul(nPhys, m_fac0.get(), 1, Diff[0].get() + i * nPhys, 1,
1555 Diff[0].get() + i * nPhys, 1);
1561 for (
int i = 0; i < m_coordim; ++i)
1563 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1564 for (
int j = 1; j < 3; ++j)
1566 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1567 out[i], 1, out[i], 1);
1574 for (
int e = 0; e < m_numElmt; ++e)
1576 for (
int i = 0; i < m_coordim; ++i)
1579 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1582 for (
int j = 1; j < 3; ++j)
1585 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1586 1, t = out[i] + e * m_nqe, 1);
1597 int nPhys = m_stdExp->GetTotPoints();
1598 int ntot = m_numElmt * nPhys;
1602 for (
int i = 0; i < 3; ++i)
1604 Diff[i] = wsp + i * ntot;
1608 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
1609 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1610 &Diff[0][0], m_nquad0);
1613 for (
int i = 0; i < m_numElmt; ++i)
1615 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1616 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1617 m_nquad2, 0.0, &Diff[2][i * nPhys],
1618 m_nquad0 * m_nquad1);
1621 for (
int i = 0; i < m_numElmt; ++i)
1625 for (
int j = 0; j < m_nquad2; ++j)
1627 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1628 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1629 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1630 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1635 Vmath::Vvtvp(nPhys, m_fac3.get(), 1, Diff[1].get() + i * nPhys, 1,
1636 Diff[2].get() + i * nPhys, 1,
1637 Diff[2].get() + i * nPhys, 1);
1640 Vmath::Vmul(nPhys, m_fac2.get(), 1, Diff[1].get() + i * nPhys, 1,
1641 Diff[1].get() + i * nPhys, 1);
1644 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1645 Diff[1].get() + i * nPhys, 1,
1646 Diff[1].get() + i * nPhys, 1);
1649 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1650 Diff[2].get() + i * nPhys, 1,
1651 Diff[2].get() + i * nPhys, 1);
1654 Vmath::Vmul(nPhys, m_fac0.get(), 1, Diff[0].get() + i * nPhys, 1,
1655 Diff[0].get() + i * nPhys, 1);
1662 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1663 for (
int j = 1; j < 3; ++j)
1665 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1666 output, 1, output, 1);
1672 for (
int e = 0; e < m_numElmt; ++e)
1674 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1675 1, t = output + e * m_nqe, 1);
1677 for (
int j = 1; j < 3; ++j)
1680 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1681 t = output + e * m_nqe, 1);
1688 int coll_phys_offset)
override
1690 boost::ignore_unused(factors, coll_phys_offset);
1691 ASSERTL0(
false,
"Not valid for this operator.");
1712 :
Operator(pCollExp, pGeomData, factors),
1713 m_nquad0(m_stdExp->GetNumPoints(0)),
1714 m_nquad1(m_stdExp->GetNumPoints(1)),
1715 m_nquad2(m_stdExp->GetNumPoints(2))
1719 m_coordim = pCollExp[0]->GetCoordim();
1721 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1723 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1724 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1725 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1727 m_wspSize = 3 * m_nquad0 * m_nquad1 * m_nquad2 * m_numElmt;
1738 for (
int i = 0; i < m_nquad0; ++i)
1740 for (
int j = 0; j < m_nquad1; ++j)
1742 for (
int k = 0; k < m_nquad2; ++k)
1745 m_fac0[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1746 4.0 / ((1 - z1[j]) * (1 - z2[k]));
1747 m_fac1[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1748 2.0 * (1 + z0[i]) / ((1 - z1[j]) * (1 - z2[k]));
1749 m_fac2[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1751 m_fac3[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1752 (1 + z1[j]) / (1 - z2[k]);
1763 PhysDeriv_SumFac_Tet::create,
"PhysDeriv_SumFac_Tet")};
1784 int nPhys = m_stdExp->GetTotPoints();
1785 int ntot = m_numElmt * nPhys;
1793 for (
int i = 0; i < 3; ++i)
1795 Diff[i] = wsp + i * ntot;
1799 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
1800 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1801 &Diff[0][0], m_nquad0);
1804 for (
int i = 0; i < m_numElmt; ++i)
1808 for (
int j = 0; j < m_nquad2; ++j)
1810 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1811 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1812 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1813 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1818 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1819 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1820 m_nquad2, 0.0, &Diff[2][i * nPhys],
1821 m_nquad0 * m_nquad1);
1824 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
1825 Diff[0].get() + cnt, 1);
1828 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
1829 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1836 for (
int i = 0; i < m_coordim; ++i)
1838 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1839 for (
int j = 1; j < 3; ++j)
1841 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1842 out[i], 1, out[i], 1);
1849 for (
int e = 0; e < m_numElmt; ++e)
1851 for (
int i = 0; i < m_coordim; ++i)
1854 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1857 for (
int j = 1; j < 3; ++j)
1860 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1861 1, t = out[i] + e * m_nqe, 1);
1872 int nPhys = m_stdExp->GetTotPoints();
1873 int ntot = m_numElmt * nPhys;
1877 for (
int i = 0; i < 3; ++i)
1879 Diff[i] = wsp + i * ntot;
1883 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
1884 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1885 &Diff[0][0], m_nquad0);
1888 for (
int i = 0; i < m_numElmt; ++i)
1892 for (
int j = 0; j < m_nquad2; ++j)
1894 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1895 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1896 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1897 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1902 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1903 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1904 m_nquad2, 0.0, &Diff[2][i * nPhys],
1905 m_nquad0 * m_nquad1);
1908 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
1909 Diff[0].get() + cnt, 1);
1912 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
1913 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1921 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1922 for (
int j = 1; j < 3; ++j)
1924 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1925 output, 1, output, 1);
1931 for (
int e = 0; e < m_numElmt; ++e)
1933 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1934 1, t = output + e * m_nqe, 1);
1936 for (
int j = 1; j < 3; ++j)
1939 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1940 t = output + e * m_nqe, 1);
1947 int coll_phys_offset)
override
1949 boost::ignore_unused(factors, coll_phys_offset);
1950 ASSERTL0(
false,
"Not valid for this operator.");
1969 :
Operator(pCollExp, pGeomData, factors),
1970 m_nquad0(m_stdExp->GetNumPoints(0)),
1971 m_nquad1(m_stdExp->GetNumPoints(1)),
1972 m_nquad2(m_stdExp->GetNumPoints(2))
1976 m_coordim = pCollExp[0]->GetCoordim();
1978 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1984 for (
int i = 0; i < m_nquad0; ++i)
1986 for (
int j = 0; j < m_nquad1; ++j)
1988 for (
int k = 0; k < m_nquad2; ++k)
1990 m_fac0[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1992 m_fac1[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1998 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1999 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
2000 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
2002 m_wspSize = 3 * m_nquad0 * m_nquad1 * m_nquad2 * m_numElmt;
2007 OperatorKey PhysDeriv_SumFac_Prism::m_typeArr[] = {
2010 PhysDeriv_SumFac_Prism::create,
"PhysDeriv_SumFac_Prism")};
2031 int nPhys = m_stdExp->GetTotPoints();
2032 int ntot = m_numElmt * nPhys;
2040 for (
int i = 0; i < 3; ++i)
2042 Diff[i] = wsp + i * ntot;
2046 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
2047 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
2048 &Diff[0][0], m_nquad0);
2051 for (
int i = 0; i < m_numElmt; ++i)
2055 for (
int j = 0; j < m_nquad2; ++j)
2057 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
2058 &input[i * nPhys + j * m_nquad0 * m_nquad1],
2059 m_nquad0, m_Deriv1, m_nquad1, 0.0,
2060 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
2065 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
2066 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
2067 m_nquad2, 0.0, &Diff[2][i * nPhys],
2068 m_nquad0 * m_nquad1);
2071 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
2072 Diff[0].get() + cnt, 1);
2075 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[1].get() + cnt, 1,
2076 Diff[1].get() + cnt, 1);
2079 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
2080 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2083 Vmath::Vvtvp(nPhys, &m_fac2[0], 1, Diff[1].get() + cnt, 1,
2084 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2091 for (
int i = 0; i < m_coordim; ++i)
2093 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
2094 for (
int j = 1; j < 3; ++j)
2096 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
2097 out[i], 1, out[i], 1);
2104 for (
int e = 0; e < m_numElmt; ++e)
2106 for (
int i = 0; i < m_coordim; ++i)
2109 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
2112 for (
int j = 1; j < 3; ++j)
2115 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
2116 1, t = out[i] + e * m_nqe, 1);
2127 int nPhys = m_stdExp->GetTotPoints();
2128 int ntot = m_numElmt * nPhys;
2132 for (
int i = 0; i < 3; ++i)
2134 Diff[i] = wsp + i * ntot;
2138 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1 * m_nquad2 * m_numElmt,
2139 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
2140 &Diff[0][0], m_nquad0);
2143 for (
int i = 0; i < m_numElmt; ++i)
2146 for (
int j = 0; j < m_nquad2; ++j)
2148 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
2149 &input[i * nPhys + j * m_nquad0 * m_nquad1],
2150 m_nquad0, m_Deriv1, m_nquad1, 0.0,
2151 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
2156 Blas::Dgemm(
'N',
'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
2157 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
2158 m_nquad2, 0.0, &Diff[2][i * nPhys],
2159 m_nquad0 * m_nquad1);
2162 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
2163 Diff[0].get() + cnt, 1);
2166 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[1].get() + cnt, 1,
2167 Diff[1].get() + cnt, 1);
2170 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
2171 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2173 Vmath::Vvtvp(nPhys, &m_fac2[0], 1, Diff[1].get() + cnt, 1,
2174 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2182 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
2183 for (
int j = 1; j < 3; ++j)
2185 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
2186 output, 1, output, 1);
2192 for (
int e = 0; e < m_numElmt; ++e)
2194 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
2195 1, t = output + e * m_nqe, 1);
2197 for (
int j = 1; j < 3; ++j)
2200 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
2201 t = output + e * m_nqe, 1);
2208 int coll_phys_offset)
override
2210 boost::ignore_unused(factors, coll_phys_offset);
2211 ASSERTL0(
false,
"Not valid for this operator.");
2231 :
Operator(pCollExp, pGeomData, factors),
2232 m_nquad0(m_stdExp->GetNumPoints(0)),
2233 m_nquad1(m_stdExp->GetNumPoints(1)),
2234 m_nquad2(m_stdExp->GetNumPoints(2))
2238 m_coordim = pCollExp[0]->GetCoordim();
2240 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
2249 int nq0_nq1 = m_nquad0 * m_nquad1;
2250 for (
int i = 0; i < m_nquad0; ++i)
2252 for (
int j = 0; j < m_nquad1; ++j)
2254 int ifac = i + j * m_nquad0;
2255 for (
int k = 0; k < m_nquad2; ++k)
2257 m_fac0[ifac + k * nq0_nq1] = 2.0 / (1 - z2[k]);
2258 m_fac1[ifac + k * nq0_nq1] = 0.5 * (1 + z0[i]);
2259 m_fac2[ifac + k * nq0_nq1] = 0.5 * (1 + z1[j]);
2264 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
2265 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
2266 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
2268 m_wspSize = 3 * m_nquad0 * m_nquad1 * m_nquad2 * m_numElmt;
2276 PhysDeriv_SumFac_Pyr::create,
"PhysDeriv_SumFac_Pyr")};
#define ASSERTL0(condition, msg)
#define NEKERROR(type, msg)
Assert Level 0 – Fundamental assert which is used whether in FULLDEBUG, DEBUG or OPT compilation mode...
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode....
#define OPERATOR_CREATE(cname)
Base class for operators on a collection of elements.
Phys deriv operator using element-wise operation.
PhysDeriv_IterPerExp(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Array< TwoD, const NekDouble > m_derivFac
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
~PhysDeriv_IterPerExp() final
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
Phys deriv operator using matrix free operators.
~PhysDeriv_MatrixFree() final
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
PhysDeriv_MatrixFree(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
std::shared_ptr< MatrixFree::PhysDeriv > m_oper
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
Phys deriv operator using original LocalRegions implementation.
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
PhysDeriv_NoCollection(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
vector< StdRegions::StdExpansionSharedPtr > m_expList
~PhysDeriv_NoCollection() final
Phys deriv operator using standard matrix approach.
Array< TwoD, const NekDouble > m_derivFac
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
Array< OneD, DNekMatSharedPtr > m_derivMat
PhysDeriv_StdMat(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
~PhysDeriv_StdMat() final
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
Phys deriv operator using sum-factorisation (Hex)
PhysDeriv_SumFac_Hex(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
~PhysDeriv_SumFac_Hex() final
Array< TwoD, const NekDouble > m_derivFac
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Phys deriv operator using sum-factorisation (Prism)
Array< OneD, NekDouble > m_fac1
Array< TwoD, const NekDouble > m_derivFac
PhysDeriv_SumFac_Prism(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
~PhysDeriv_SumFac_Prism() final
Array< OneD, NekDouble > m_fac0
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
Phys deriv operator using sum-factorisation (Pyramid)
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
PhysDeriv_SumFac_Pyr(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
~PhysDeriv_SumFac_Pyr() final
Array< OneD, NekDouble > m_fac1
Array< OneD, NekDouble > m_fac2
Array< TwoD, const NekDouble > m_derivFac
Array< OneD, NekDouble > m_fac0
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
Phys deriv operator using sum-factorisation (Quad)
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
PhysDeriv_SumFac_Quad(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
~PhysDeriv_SumFac_Quad() final
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
Array< TwoD, const NekDouble > m_derivFac
Phys deriv operator using sum-factorisation (Segment)
PhysDeriv_SumFac_Seg(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Array< TwoD, const NekDouble > m_derivFac
~PhysDeriv_SumFac_Seg() final
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Phys deriv operator using sum-factorisation (Tet)
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
~PhysDeriv_SumFac_Tet() final
Array< OneD, NekDouble > m_fac2
Array< OneD, NekDouble > m_fac1
PhysDeriv_SumFac_Tet(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
Array< OneD, NekDouble > m_fac0
Array< TwoD, const NekDouble > m_derivFac
Array< OneD, NekDouble > m_fac3
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Phys deriv operator using sum-factorisation (Tri)
virtual void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp) override final
Perform operation.
PhysDeriv_SumFac_Tri(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
~PhysDeriv_SumFac_Tri() final
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) override final
Array< OneD, NekDouble > m_fac1
Array< TwoD, const NekDouble > m_derivFac
Array< OneD, NekDouble > m_fac0
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
tBaseSharedPtr CreateInstance(tKey idKey, tParam... args)
Create an instance of the class referred to by idKey.
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
static void Dgemm(const char &transa, const char &transb, const int &m, const int &n, const int &k, const double &alpha, const double *a, const int &lda, const double *b, const int &ldb, const double &beta, double *c, const int &ldc)
BLAS level 3: Matrix-matrix multiply C = A x B where op(A)[m x k], op(B)[k x n], C[m x n] DGEMM perfo...
std::tuple< LibUtilities::ShapeType, OperatorType, ImplementationType, ExpansionIsNodal > OperatorKey
Key for describing an Operator.
std::shared_ptr< CoalescedGeomData > CoalescedGeomDataSharedPtr
OperatorFactory & GetOperatorFactory()
Returns the singleton Operator factory object.
std::vector< PointsKey > PointsKeyVector
The above copyright notice and this permission notice shall be included.
void Vmul(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Multiply vector z = x*y.
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
svtvp (scalar times vector plus vector): z = alpha*x + y
void Vvtvp(int n, const T *w, const int incw, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
vvtvp (vector times vector plus vector): z = w*x + y
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
void Zero(int n, T *x, const int incx)
Zero vector.
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)