35 #include <boost/core/ignore_unused.hpp> 43 namespace Collections {
72 int nPhys = m_stdExp->GetTotPoints();
73 int ntot = m_numElmt*nPhys;
77 out[0] = output0; out[1] = output1; out[2] = output2;
79 for(
int i = 0; i < m_dim; ++i)
81 Diff[i] = wsp + i*ntot;
85 for(
int i = 0; i < m_dim; ++i)
87 Blas::Dgemm(
'N',
'N', m_derivMat[i]->GetRows(), m_numElmt,
88 m_derivMat[i]->GetColumns(), 1.0,
89 m_derivMat[i]->GetRawPtr(),
90 m_derivMat[i]->GetRows(), input.get(), nPhys,
91 0.0, &Diff[i][0],nPhys);
95 for(
int i = 0; i < m_coordim; ++i)
98 for(
int j = 0; j < m_dim; ++j)
114 int nPhys = m_stdExp->GetTotPoints();
115 int ntot = m_numElmt*nPhys;
119 for(
int i = 0; i < m_dim; ++i)
121 Diff[i] = wsp + i*ntot;
125 for(
int i = 0; i < m_dim; ++i)
127 Blas::Dgemm(
'N',
'N', m_derivMat[i]->GetRows(), m_numElmt,
128 m_derivMat[i]->GetColumns(), 1.0,
129 m_derivMat[i]->GetRawPtr(),
130 m_derivMat[i]->GetRows(), input.get(), nPhys,
131 0.0, &Diff[i][0],nPhys);
136 for(
int j = 0; j < m_dim; ++j)
153 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
159 m_dim = PtsKey.size();
160 m_coordim = pCollExp[0]->GetCoordim();
162 for(
int i = 0; i < m_dim; ++i)
164 nqtot *= PtsKey[i].GetNumPoints();
168 for(
int i = 0; i < m_dim; ++i)
173 for(
int j = 0; j < nqtot; ++j)
177 m_stdExp->PhysDeriv(i,tmp,tmp1);
179 &(m_derivMat[i]->GetPtr())[0] + j*nqtot, 1);
182 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
183 m_wspSize = 3*nqtot*m_numElmt;
192 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Seg"),
195 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Tri"),
198 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_NodalTri"),
201 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Quad"),
204 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Tet"),
207 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_NodalTet"),
210 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Pyr"),
213 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Prism"),
216 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_NodalPrism"),
219 PhysDeriv_StdMat::create,
"PhysDeriv_StdMat_Hex"),
222 PhysDeriv_StdMat::create,
"PhysDeriv_SumFac_Pyr")
245 int nPhys = m_stdExp->GetTotPoints();
246 int ntot = m_numElmt*nPhys;
250 out[0] = output0; out[1] = output1; out[2] = output2;
252 for(
int i = 0; i < m_dim; ++i)
254 Diff[i] = wsp + i*ntot;
258 for (
int i = 0; i < m_numElmt; ++i)
260 m_stdExp->PhysDeriv(input + i*nPhys,
261 tmp0 = Diff[0] + i*nPhys,
262 tmp1 = Diff[1] + i*nPhys,
263 tmp2 = Diff[2] + i*nPhys);
267 for(
int i = 0; i < m_coordim; ++i)
269 Vmath::Vmul(ntot,m_derivFac[i*m_dim],1,Diff[0],1,out[i],1);
270 for(
int j = 1; j < m_dim; ++j)
286 int nPhys = m_stdExp->GetTotPoints();
287 int ntot = m_numElmt*nPhys;
291 for(
int i = 0; i < m_dim; ++i)
293 Diff[i] = wsp + i*ntot;
297 for (
int i = 0; i < m_numElmt; ++i)
299 m_stdExp->PhysDeriv(input + i*nPhys,
300 tmp0 = Diff[0] + i*nPhys,
301 tmp1 = Diff[1] + i*nPhys,
302 tmp2 = Diff[2] + i*nPhys);
306 Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1);
307 for(
int j = 1; j < m_dim; ++j)
323 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
329 m_dim = PtsKey.size();
330 m_coordim = pCollExp[0]->GetCoordim();
332 for(
int i = 0; i < m_dim; ++i)
334 nqtot *= PtsKey[i].GetNumPoints();
336 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
337 m_wspSize = 3*nqtot*m_numElmt;
346 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Seg"),
349 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Tri"),
352 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_NodalTri"),
355 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Quad"),
358 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Tet"),
361 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_NodalTet"),
364 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Pyr"),
367 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Prism"),
370 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_NodalPrism"),
373 PhysDeriv_IterPerExp::create,
"PhysDeriv_IterPerExp_Hex")
396 boost::ignore_unused(wsp);
398 const int nPhys = m_expList[0]->GetTotPoints();
402 switch (m_expList[0]->GetShapeDimension())
406 for (
int i = 0; i < m_numElmt; ++i)
408 m_expList[i]->PhysDeriv(input + i*nPhys,
409 tmp0 = output0 + i*nPhys);
415 for (
int i = 0; i < m_numElmt; ++i)
417 m_expList[i]->PhysDeriv(input + i*nPhys,
418 tmp0 = output0 + i*nPhys,
419 tmp1 = output1 + i*nPhys);
425 for (
int i = 0; i < m_numElmt; ++i)
427 m_expList[i]->PhysDeriv(input + i*nPhys,
428 tmp0 = output0 + i*nPhys,
429 tmp1 = output1 + i*nPhys,
430 tmp2 = output2 + i*nPhys);
435 ASSERTL0(
false,
"Unknown dimension.");
445 boost::ignore_unused(wsp);
447 const int nPhys = m_expList[0]->GetTotPoints();
451 for (
int i = 0; i < m_numElmt; ++i)
453 m_expList[i]->PhysDeriv(dir, input + i*nPhys,
454 tmp = output + i*nPhys);
463 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
467 m_expList = pCollExp;
476 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Seg"),
479 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Tri"),
482 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_NodalTri"),
485 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Quad"),
488 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Tet"),
491 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_NodalTet"),
494 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Pyr"),
497 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Prism"),
500 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_NodalPrism"),
503 PhysDeriv_NoCollection::create,
"PhysDeriv_NoCollection_Hex")
526 const int nqcol = m_nquad0*m_numElmt;
528 ASSERTL1(wsp.num_elements() == m_wspSize,
529 "Incorrect workspace size");
530 ASSERTL1(input.num_elements() >= nqcol,
531 "Incorrect input size");
536 m_nquad0, 1.0, m_Deriv0, m_nquad0,
537 input.get(), m_nquad0, 0.0,
538 diff0.get(), m_nquad0);
540 Vmath::Vmul (nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
544 Vmath::Vmul (nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
546 else if (m_coordim == 3)
548 Vmath::Vmul (nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
549 Vmath::Vmul (nqcol, m_derivFac[2], 1, diff0, 1, output2, 1);
559 const int nqcol = m_nquad0*m_numElmt;
561 ASSERTL1(wsp.num_elements() == m_wspSize,
562 "Incorrect workspace size");
563 ASSERTL1(input.num_elements() >= nqcol,
564 "Incorrect input size");
569 m_nquad0, 1.0, m_Deriv0, m_nquad0,
570 input.get(), m_nquad0, 0.0,
571 diff0.get(), m_nquad0);
573 Vmath::Vmul(nqcol, m_derivFac[dir], 1, diff0, 1, output, 1);
584 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
587 m_nquad0 (m_stdExp->GetNumPoints(0))
590 m_coordim = pCollExp[0]->GetCoordim();
592 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
594 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
595 m_wspSize = m_nquad0*m_numElmt;
602 RegisterCreatorFunction(
604 PhysDeriv_SumFac_Seg::create,
"PhysDeriv_SumFac_Seg");
627 const int nqtot = m_nquad0 * m_nquad1;
628 const int nqcol = nqtot*m_numElmt;
630 ASSERTL1(wsp.num_elements() == m_wspSize,
631 "Incorrect workspace size");
632 ASSERTL1(input.num_elements() >= nqcol,
633 "Incorrect input size");
638 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1*m_numElmt,
639 m_nquad0, 1.0, m_Deriv0, m_nquad0,
640 input.get(), m_nquad0, 0.0,
641 diff0.get(), m_nquad0);
644 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
646 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
647 input.get() + cnt, m_nquad0,
648 m_Deriv1, m_nquad1, 0.0,
649 diff1.get() + cnt, m_nquad0);
652 Vmath::Vmul (nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
653 Vmath::Vvtvp (nqcol, m_derivFac[1], 1, diff1, 1, output0, 1,
655 Vmath::Vmul (nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
656 Vmath::Vvtvp (nqcol, m_derivFac[3], 1, diff1, 1, output1, 1,
661 Vmath::Vmul (nqcol, m_derivFac[4], 1, diff0, 1, output2, 1);
662 Vmath::Vvtvp (nqcol, m_derivFac[5], 1, diff1, 1, output2, 1,
673 const int nqtot = m_nquad0 * m_nquad1;
674 const int nqcol = nqtot*m_numElmt;
676 ASSERTL1(wsp.num_elements() == m_wspSize,
677 "Incorrect workspace size");
678 ASSERTL1(input.num_elements() >= nqcol,
679 "Incorrect input size");
684 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1*m_numElmt,
685 m_nquad0, 1.0, m_Deriv0, m_nquad0,
686 input.get(), m_nquad0, 0.0,
687 diff0.get(), m_nquad0);
690 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
692 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
693 input.get() + cnt, m_nquad0,
694 m_Deriv1, m_nquad1, 0.0,
695 diff1.get() + cnt, m_nquad0);
698 Vmath::Vmul (nqcol, m_derivFac[2*dir] , 1, diff0, 1, output, 1);
699 Vmath::Vvtvp (nqcol, m_derivFac[2*dir+1], 1, diff1, 1, output, 1,
713 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
716 m_nquad0 (m_stdExp->GetNumPoints(0)),
717 m_nquad1 (m_stdExp->GetNumPoints(1))
720 m_coordim = pCollExp[0]->GetCoordim();
722 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
724 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
725 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
726 m_wspSize = 2 * m_nquad0*m_nquad1*m_numElmt;
732 RegisterCreatorFunction(
734 PhysDeriv_SumFac_Quad::create,
"PhysDeriv_SumFac_Quad");
756 const int nqtot = m_nquad0 * m_nquad1;
757 const int nqcol = nqtot*m_numElmt;
759 ASSERTL1(wsp.num_elements() == m_wspSize,
760 "Incorrect workspace size");
761 ASSERTL1(input.num_elements() >= nqcol,
762 "Incorrect input size");
768 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1*m_numElmt,
769 m_nquad0, 1.0, m_Deriv0, m_nquad0,
770 input.get(), m_nquad0, 0.0,
771 diff0.get(), m_nquad0);
774 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
780 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
781 input.get() + cnt, m_nquad0,
782 m_Deriv1, m_nquad1, 0.0,
783 diff1.get() + cnt, m_nquad0);
787 diff1.get()+cnt,1,diff1.get()+cnt,1);
791 Vmath::Vmul (nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
793 output0, 1, output0, 1);
794 Vmath::Vmul (nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
796 output1, 1, output1, 1);
803 output2, 1, output2, 1);
813 const int nqtot = m_nquad0 * m_nquad1;
814 const int nqcol = nqtot*m_numElmt;
816 ASSERTL1(wsp.num_elements() == m_wspSize,
817 "Incorrect workspace size");
818 ASSERTL1(input.num_elements() >= nqcol,
819 "Incorrect input size");
825 Blas::Dgemm(
'N',
'N', m_nquad0, m_nquad1*m_numElmt,
826 m_nquad0, 1.0, m_Deriv0, m_nquad0,
827 input.get(), m_nquad0, 0.0,
828 diff0.get(), m_nquad0);
831 for (
int i = 0; i < m_numElmt; ++i, cnt += nqtot)
837 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
838 input.get() + cnt, m_nquad0,
839 m_Deriv1, m_nquad1, 0.0,
840 diff1.get() + cnt, m_nquad0);
844 diff1.get()+cnt,1,diff1.get()+cnt,1);
848 Vmath::Vmul (nqcol, m_derivFac[2*dir] , 1, diff0, 1, output, 1);
849 Vmath::Vvtvp (nqcol, m_derivFac[2*dir+1], 1, diff1, 1, output, 1,
865 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
868 m_nquad0 (m_stdExp->GetNumPoints(0)),
869 m_nquad1 (m_stdExp->GetNumPoints(1))
872 m_coordim = pCollExp[0]->GetCoordim();
874 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
877 = m_stdExp->GetBasis(0)->GetZ();
879 = m_stdExp->GetBasis(1)->GetZ();
882 for (
int i = 0; i < m_nquad0; ++i)
884 for(
int j = 0; j < m_nquad1; ++j)
886 m_fac0[i+j*m_nquad0] = 0.5*(1+z0[i]);
892 for (
int i = 0; i < m_nquad0; ++i)
894 for(
int j = 0; j < m_nquad1; ++j)
896 m_fac1[i+j*m_nquad0] = 2.0/(1-z1[j]);
901 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
902 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
903 m_wspSize = 2 * m_nquad0*m_nquad1*m_numElmt;
912 PhysDeriv_SumFac_Tri::create,
"PhysDeriv_SumFac_Tri"),
915 PhysDeriv_SumFac_Tri::create,
"PhysDeriv_SumFac_NodalTri")
938 int nPhys = m_stdExp->GetTotPoints();
939 int ntot = m_numElmt*nPhys;
943 out[0] = output0; out[1] = output1; out[2] = output2;
945 for(
int i = 0; i < 3; ++i)
947 Diff[i] = wsp + i*ntot;
950 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
951 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
952 m_nquad0,0.0,&Diff[0][0],m_nquad0);
954 for(
int i = 0; i < m_numElmt; ++i)
956 for (
int j = 0; j < m_nquad2; ++j)
958 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
959 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
960 m_nquad0, m_Deriv1, m_nquad1, 0.0,
961 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
965 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
966 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
967 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
972 for(
int i = 0; i < m_coordim; ++i)
974 Vmath::Vmul(ntot,m_derivFac[i*3],1,Diff[0],1,out[i],1);
975 for(
int j = 1; j < 3; ++j)
991 int nPhys = m_stdExp->GetTotPoints();
992 int ntot = m_numElmt*nPhys;
996 for(
int i = 0; i < 3; ++i)
998 Diff[i] = wsp + i*ntot;
1001 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
1002 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
1003 m_nquad0,0.0,&Diff[0][0],m_nquad0);
1005 for(
int i = 0; i < m_numElmt; ++i)
1007 for (
int j = 0; j < m_nquad2; ++j)
1009 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
1010 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
1011 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1012 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
1016 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
1017 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
1018 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
1023 Vmath::Vmul(ntot,m_derivFac[dir*3],1,Diff[0],1,output,1);
1024 for(
int j = 1; j < 3; ++j)
1045 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1048 m_nquad0 (m_stdExp->GetNumPoints(0)),
1049 m_nquad1 (m_stdExp->GetNumPoints(1)),
1050 m_nquad2 (m_stdExp->GetNumPoints(2))
1054 m_coordim = pCollExp[0]->GetCoordim();
1056 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1058 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1059 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1060 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1062 m_wspSize = 3*m_nquad0*m_nquad1*m_nquad2*m_numElmt;
1071 PhysDeriv_SumFac_Hex::create,
"PhysDeriv_SumFac_Hex")
1094 int nPhys = m_stdExp->GetTotPoints();
1095 int ntot = m_numElmt*nPhys;
1099 out[0] = output0; out[1] = output1; out[2] = output2;
1101 for(
int i = 0; i < 3; ++i)
1103 Diff[i] = wsp + i*ntot;
1107 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
1108 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
1109 m_nquad0,0.0,&Diff[0][0],m_nquad0);
1112 for(
int i = 0; i < m_numElmt; ++i)
1114 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
1115 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
1116 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
1120 for(
int i = 0; i < m_numElmt; ++i)
1124 for (
int j = 0; j < m_nquad2; ++j)
1126 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
1127 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
1128 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1129 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
1135 Diff[1].get() + i*nPhys, 1,
1136 Diff[2].get() + i*nPhys, 1,
1137 Diff[2].get() + i*nPhys, 1);
1141 Diff[1].get() + i*nPhys, 1,
1142 Diff[1].get() + i*nPhys, 1);
1146 Diff[0].get() + i*nPhys, 1,
1147 Diff[1].get() + i*nPhys, 1,
1148 Diff[1].get() + i*nPhys, 1);
1152 Diff[0].get() + i*nPhys, 1,
1153 Diff[2].get() + i*nPhys, 1,
1154 Diff[2].get() + i*nPhys, 1);
1158 Diff[0].get() + i*nPhys, 1,
1159 Diff[0].get() + i*nPhys, 1);
1164 for(
int i = 0; i < m_coordim; ++i)
1166 Vmath::Vmul(ntot,m_derivFac[i*3],1,Diff[0],1,out[i],1);
1167 for(
int j = 1; j < 3; ++j)
1170 Diff[j], 1, out[i], 1, out[i], 1);
1181 int nPhys = m_stdExp->GetTotPoints();
1182 int ntot = m_numElmt*nPhys;
1186 for(
int i = 0; i < 3; ++i)
1188 Diff[i] = wsp + i*ntot;
1192 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
1193 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
1194 m_nquad0,0.0,&Diff[0][0],m_nquad0);
1197 for(
int i = 0; i < m_numElmt; ++i)
1199 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
1200 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
1201 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
1205 for(
int i = 0; i < m_numElmt; ++i)
1209 for (
int j = 0; j < m_nquad2; ++j)
1211 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
1212 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
1213 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1214 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
1220 Diff[1].get() + i*nPhys, 1,
1221 Diff[2].get() + i*nPhys, 1,
1222 Diff[2].get() + i*nPhys, 1);
1226 Diff[1].get() + i*nPhys, 1,
1227 Diff[1].get() + i*nPhys, 1);
1231 Diff[0].get() + i*nPhys, 1,
1232 Diff[1].get() + i*nPhys, 1,
1233 Diff[1].get() + i*nPhys, 1);
1237 Diff[0].get() + i*nPhys, 1,
1238 Diff[2].get() + i*nPhys, 1,
1239 Diff[2].get() + i*nPhys, 1);
1243 Diff[0].get() + i*nPhys, 1,
1244 Diff[0].get() + i*nPhys, 1);
1249 Vmath::Vmul(ntot,m_derivFac[dir*3],1,Diff[0],1,output,1);
1250 for(
int j = 1; j < 3; ++j)
1253 Diff[j], 1, output, 1, output, 1);
1273 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1276 m_nquad0 (m_stdExp->GetNumPoints(0)),
1277 m_nquad1 (m_stdExp->GetNumPoints(1)),
1278 m_nquad2 (m_stdExp->GetNumPoints(2))
1282 m_coordim = pCollExp[0]->GetCoordim();
1284 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1286 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1287 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1288 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1290 m_wspSize = 3*m_nquad0*m_nquad1*m_nquad2*m_numElmt;
1293 = m_stdExp->GetBasis(0)->GetZ();
1295 = m_stdExp->GetBasis(1)->GetZ();
1297 = m_stdExp->GetBasis(2)->GetZ();
1304 for (
int i = 0; i < m_nquad0; ++i)
1306 for(
int j = 0; j < m_nquad1; ++j)
1308 for(
int k = 0; k < m_nquad2; ++k)
1311 m_fac0[i + j*m_nquad0 + k*m_nquad0*m_nquad1]
1312 = 4.0/((1-z1[j])*(1-z2[k]));
1313 m_fac1[i + j*m_nquad0 + k*m_nquad0*m_nquad1]
1314 = 2.0*(1+z0[i])/((1-z1[j])*(1-z2[k]));
1315 m_fac2[i + j*m_nquad0 + k*m_nquad0*m_nquad1]
1317 m_fac3[i + j*m_nquad0 + k*m_nquad0*m_nquad1]
1318 = (1+z1[j])/(1-z2[k]);
1331 PhysDeriv_SumFac_Tet::create,
"PhysDeriv_SumFac_Tet")
1354 int nPhys = m_stdExp->GetTotPoints();
1355 int ntot = m_numElmt*nPhys;
1359 out[0] = output0; out[1] = output1; out[2] = output2;
1361 for(
int i = 0; i < 3; ++i)
1363 Diff[i] = wsp + i*ntot;
1367 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
1368 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
1369 m_nquad0,0.0,&Diff[0][0],m_nquad0);
1372 for(
int i = 0; i < m_numElmt; ++i)
1376 for (
int j = 0; j < m_nquad2; ++j)
1378 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
1379 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
1380 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1381 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
1386 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
1387 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
1388 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
1392 Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[0].
get()+cnt,1,
1393 Diff[0].
get()+cnt,1);
1397 Diff[2].
get()+cnt,1,Diff[2].
get()+cnt,1);
1402 for(
int i = 0; i < m_coordim; ++i)
1404 Vmath::Vmul(ntot,m_derivFac[i*3],1,Diff[0],1,out[i],1);
1405 for(
int j = 1; j < 3; ++j)
1408 Diff[j], 1, out[i], 1, out[i], 1);
1419 int nPhys = m_stdExp->GetTotPoints();
1420 int ntot = m_numElmt*nPhys;
1424 for(
int i = 0; i < 3; ++i)
1426 Diff[i] = wsp + i*ntot;
1430 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
1431 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
1432 m_nquad0,0.0,&Diff[0][0],m_nquad0);
1435 for(
int i = 0; i < m_numElmt; ++i)
1439 for (
int j = 0; j < m_nquad2; ++j)
1441 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
1442 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
1443 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1444 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
1449 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
1450 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
1451 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
1455 Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[0].
get()+cnt,1,
1456 Diff[0].
get()+cnt,1);
1460 Diff[2].
get()+cnt,1,Diff[2].
get()+cnt,1);
1465 Vmath::Vmul(ntot,m_derivFac[dir*3],1,Diff[0],1,output,1);
1466 for(
int j = 1; j < 3; ++j)
1469 Diff[j], 1, output, 1, output, 1);
1487 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1490 m_nquad0 (m_stdExp->GetNumPoints(0)),
1491 m_nquad1 (m_stdExp->GetNumPoints(1)),
1492 m_nquad2 (m_stdExp->GetNumPoints(2))
1496 m_coordim = pCollExp[0]->GetCoordim();
1498 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1501 = m_stdExp->GetBasis(0)->GetZ();
1503 = m_stdExp->GetBasis(2)->GetZ();
1506 for (
int i = 0; i < m_nquad0; ++i)
1508 for(
int j = 0; j < m_nquad1; ++j)
1510 for(
int k = 0; k < m_nquad2; ++k)
1512 m_fac0[i+j*m_nquad0 + k*m_nquad0*m_nquad1] =
1514 m_fac1[i+j*m_nquad0 + k*m_nquad0*m_nquad1] =
1522 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1523 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1524 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1526 m_wspSize = 3*m_nquad0*m_nquad1*m_nquad2*m_numElmt;
1531 OperatorKey PhysDeriv_SumFac_Prism::m_typeArr[] = {
1534 PhysDeriv_SumFac_Prism::create,
"PhysDeriv_SumFac_Prism")
1557 int nPhys = m_stdExp->GetTotPoints();
1558 int ntot = m_numElmt*nPhys;
1562 out[0] = output0; out[1] = output1; out[2] = output2;
1564 for(
int i = 0; i < 3; ++i)
1566 Diff[i] = wsp + i*ntot;
1570 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
1571 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
1572 m_nquad0,0.0,&Diff[0][0],m_nquad0);
1575 for(
int i = 0; i < m_numElmt; ++i)
1579 for (
int j = 0; j < m_nquad2; ++j)
1581 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
1582 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
1583 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1584 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
1589 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
1590 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
1591 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
1595 Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[0].
get()+cnt,1,
1596 Diff[0].
get()+cnt,1);
1599 Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[1].
get()+cnt,1,
1600 Diff[1].
get()+cnt,1);
1604 Diff[2].
get()+cnt,1,Diff[2].
get()+cnt,1);
1607 Diff[2].
get()+cnt,1,Diff[2].
get()+cnt,1);
1612 for(
int i = 0; i < m_coordim; ++i)
1614 Vmath::Vmul(ntot,m_derivFac[i*3],1,Diff[0],1,out[i],1);
1615 for(
int j = 1; j < 3; ++j)
1618 Diff[j], 1, out[i], 1, out[i], 1);
1629 int nPhys = m_stdExp->GetTotPoints();
1630 int ntot = m_numElmt*nPhys;
1634 for(
int i = 0; i < 3; ++i)
1636 Diff[i] = wsp + i*ntot;
1640 Blas::Dgemm(
'N',
'N', m_nquad0,m_nquad1*m_nquad2*m_numElmt,
1641 m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0],
1642 m_nquad0,0.0,&Diff[0][0],m_nquad0);
1645 for(
int i = 0; i < m_numElmt; ++i)
1648 for (
int j = 0; j < m_nquad2; ++j)
1650 Blas::Dgemm(
'N',
'T', m_nquad0, m_nquad1, m_nquad1,
1651 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1],
1652 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1653 &Diff[1][i*nPhys+j*m_nquad0*m_nquad1],
1658 Blas::Dgemm(
'N',
'T',m_nquad0*m_nquad1,m_nquad2,m_nquad2,
1659 1.0, &input[i*nPhys],m_nquad0*m_nquad1,
1660 m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys],
1664 Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[0].
get()+cnt,1,
1665 Diff[0].
get()+cnt,1);
1668 Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[1].
get()+cnt,1,
1669 Diff[1].
get()+cnt,1);
1673 Diff[2].
get()+cnt,1,Diff[2].
get()+cnt,1);
1676 Diff[2].
get()+cnt,1,Diff[2].
get()+cnt,1);
1681 Vmath::Vmul(ntot,m_derivFac[dir*3],1,Diff[0],1,output,1);
1682 for(
int j = 1; j < 3; ++j)
1685 Diff[j], 1, output, 1, output, 1);
1704 vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1707 m_nquad0 (m_stdExp->GetNumPoints(0)),
1708 m_nquad1 (m_stdExp->GetNumPoints(1)),
1709 m_nquad2 (m_stdExp->GetNumPoints(2))
1713 m_coordim = pCollExp[0]->GetCoordim();
1715 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1718 = m_stdExp->GetBasis(0)->GetZ();
1720 = m_stdExp->GetBasis(1)->GetZ();
1722 = m_stdExp->GetBasis(2)->GetZ();
1727 int nq0_nq1 = m_nquad0*m_nquad1;
1728 for (
int i = 0; i < m_nquad0; ++i)
1730 for(
int j = 0; j < m_nquad1; ++j)
1732 int ifac = i+j*m_nquad0;
1733 for(
int k = 0; k < m_nquad2; ++k)
1735 m_fac0[ifac + k*nq0_nq1] =
1737 m_fac1[ifac + k*nq0_nq1] =
1739 m_fac2[ifac + k*nq0_nq1] =
1745 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1746 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1747 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1749 m_wspSize = 3*m_nquad0*m_nquad1*m_nquad2*m_numElmt;
1757 PhysDeriv_SumFac_Pyr::create,
"PhysDeriv_SumFac_Pyr")
Phys deriv operator using sum-factorisation (Prism)
std::shared_ptr< CoalescedGeomData > CoalescedGeomDataSharedPtr
#define ASSERTL0(condition, msg)
std::vector< PointsKey > PointsKeyVector
PhysDeriv_SumFac_Quad(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
PhysDeriv_SumFac_Hex(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
PhysDeriv_SumFac_Tet(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
Array< TwoD, const NekDouble > m_derivFac
Array< TwoD, const NekDouble > m_derivFac
Array< OneD, NekDouble > m_fac1
PhysDeriv_SumFac_Seg(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
std::tuple< LibUtilities::ShapeType, OperatorType, ImplementationType, ExpansionIsNodal > OperatorKey
Key for describing an Operator.
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Phys deriv operator using sum-factorisation (Segment)
vector< StdRegions::StdExpansionSharedPtr > m_expList
Array< OneD, NekDouble > m_fac2
void Vvtvp(int n, const T *w, const int incw, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
vvtvp (vector times vector plus vector): z = w*x + y
Array< TwoD, const NekDouble > m_derivFac
Array< TwoD, const NekDouble > m_derivFac
Phys deriv operator using element-wise operation.
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
PhysDeriv_SumFac_Prism(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
Array< OneD, NekDouble > m_fac0
static void Dgemm(const char &transa, const char &transb, const int &m, const int &n, const int &k, const double &alpha, const double *a, const int &lda, const double *b, const int &ldb, const double &beta, double *c, const int &ldc)
BLAS level 3: Matrix-matrix multiply C = A x B where A[m x n], B[n x k], C[m x k].
Phys deriv operator using sum-factorisation (Tri)
Base class for operators on a collection of elements.
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
Array< OneD, NekDouble > m_fac0
Array< TwoD, const NekDouble > m_derivFac
Phys deriv operator using standard matrix approach.
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Array< TwoD, const NekDouble > m_derivFac
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Array< OneD, NekDouble > m_fac1
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Array< TwoD, const NekDouble > m_derivFac
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
OperatorFactory & GetOperatorFactory()
Returns the singleton Operator factory object.
Phys deriv operator using sum-factorisation (Pyramid)
Phys deriv operator using sum-factorisation (Tet)
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
Array< OneD, NekDouble > m_fac1
Array< OneD, NekDouble > m_fac0
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
#define OPERATOR_CREATE(cname)
Array< TwoD, const NekDouble > m_derivFac
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
Array< OneD, NekDouble > m_fac3
Array< OneD, DNekMatSharedPtr > m_derivMat
Phys deriv operator using original LocalRegions implementation.
Array< OneD, NekDouble > m_fac1
Array< OneD, NekDouble > m_fac2
PhysDeriv_SumFac_Pyr(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
PhysDeriv_SumFac_Tri(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
Phys deriv operator using sum-factorisation (Hex)
PhysDeriv_StdMat(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
Array< OneD, NekDouble > m_fac0
void Zero(int n, T *x, const int incx)
Zero vector.
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode...
Phys deriv operator using sum-factorisation (Quad)
Array< TwoD, const NekDouble > m_derivFac
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
virtual void operator()(const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output0, Array< OneD, NekDouble > &output1, Array< OneD, NekDouble > &output2, Array< OneD, NekDouble > &wsp)
Perform operation.
void Vmul(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Multiply vector z = x*y.
virtual void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
PhysDeriv_NoCollection(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)
PhysDeriv_IterPerExp(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData)