Nektar++
PhysDeriv.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: PhysDeriv.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description: PhysDeriv operator implementations
32//
33///////////////////////////////////////////////////////////////////////////////
34
38#include <MatrixFreeOps/Operator.hpp>
39
40using namespace std;
41
42namespace Nektar::Collections
43{
44
52
53/**
54 * @brief Physical Derivative help class to calculate the size of the collection
55 * that is given as an input and as an output to the PhysDeriv Operator. The
56 * Operator evaluation is happenning in the physical space and the output is
57 * expected to be part of the physical space too.
58 */
59class PhysDeriv_Helper : virtual public Operator
60{
61protected:
63 {
64 // expect input to be number of elements by the number of quadrature
65 // points
66 m_inputSize = m_numElmt * m_stdExp->GetTotPoints();
67 // the derivate is using data from the physical space to evaluate the
68 // derivative in the physical space
70 }
71};
72
73/**
74 * @brief Phys deriv operator using standard matrix approach
75 */
76class PhysDeriv_StdMat final : virtual public Operator,
77 virtual public PhysDeriv_Helper
78{
79public:
81
82 ~PhysDeriv_StdMat() final = default;
83
84 void operator()(const Array<OneD, const NekDouble> &input,
85 Array<OneD, NekDouble> &output0,
86 Array<OneD, NekDouble> &output1,
87 Array<OneD, NekDouble> &output2,
88 Array<OneD, NekDouble> &wsp) final
89 {
90 int nPhys = m_stdExp->GetTotPoints();
91 int ntot = m_numElmt * nPhys;
92 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
95 out[0] = output0;
96 out[1] = output1;
97 out[2] = output2;
98
99 for (int i = 0; i < m_dim; ++i)
100 {
101 Diff[i] = wsp + i * ntot;
102 }
103
104 // calculate local derivatives
105 for (int i = 0; i < m_dim; ++i)
106 {
107 Blas::Dgemm('N', 'N', m_derivMat[i]->GetRows(), m_numElmt,
108 m_derivMat[i]->GetColumns(), 1.0,
109 m_derivMat[i]->GetRawPtr(), m_derivMat[i]->GetRows(),
110 input.get(), nPhys, 0.0, &Diff[i][0], nPhys);
111 }
112
113 // calculate full derivative
114 if (m_isDeformed)
115 {
116 for (int i = 0; i < m_coordim; ++i)
117 {
118 Vmath::Zero(ntot, out[i], 1);
119 for (int j = 0; j < m_dim; ++j)
120 {
121 Vmath::Vvtvp(ntot, m_derivFac[i * m_dim + j], 1, Diff[j], 1,
122 out[i], 1, out[i], 1);
123 }
124 }
125 }
126 else
127 {
129 for (int i = 0; i < m_coordim; ++i)
130 {
131 Vmath::Zero(ntot, out[i], 1);
132 for (int e = 0; e < m_numElmt; ++e)
133 {
134 for (int j = 0; j < m_dim; ++j)
135 {
136 Vmath::Svtvp(m_nqe, m_derivFac[i * m_dim + j][e],
137 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
138 1, t = out[i] + e * m_nqe, 1);
139 }
140 }
141 }
142 }
143 }
144
145 void operator()(int dir, const Array<OneD, const NekDouble> &input,
147 Array<OneD, NekDouble> &wsp) final
148 {
149 int nPhys = m_stdExp->GetTotPoints();
150 int ntot = m_numElmt * nPhys;
151 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
153
154 for (int i = 0; i < m_dim; ++i)
155 {
156 Diff[i] = wsp + i * ntot;
157 }
158
159 // calculate local derivatives
160 for (int i = 0; i < m_dim; ++i)
161 {
162 Blas::Dgemm('N', 'N', m_derivMat[i]->GetRows(), m_numElmt,
163 m_derivMat[i]->GetColumns(), 1.0,
164 m_derivMat[i]->GetRawPtr(), m_derivMat[i]->GetRows(),
165 input.get(), nPhys, 0.0, &Diff[i][0], nPhys);
166 }
167
168 // calculate full derivative
169 Vmath::Zero(ntot, output, 1);
170 if (m_isDeformed)
171 {
172 for (int j = 0; j < m_dim; ++j)
173 {
174 Vmath::Vvtvp(ntot, m_derivFac[dir * m_dim + j], 1, Diff[j], 1,
175 output, 1, output, 1);
176 }
177 }
178 else
179 {
181 for (int e = 0; e < m_numElmt; ++e)
182 {
183 for (int j = 0; j < m_dim; ++j)
184 {
185 Vmath::Svtvp(m_nqe, m_derivFac[dir * m_dim + j][e],
186 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
187 t = output + e * m_nqe, 1);
188 }
189 }
190 }
191 }
192
193protected:
196 int m_dim;
198
199private:
200 PhysDeriv_StdMat(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
203 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper()
204 {
205 int nqtot = pCollExp[0]->GetTotPoints();
206 m_dim = pCollExp[0]->GetShapeDimension();
207 m_coordim = pCollExp[0]->GetCoordim();
208
209 // set up a PhysDeriv StdMat.
211 for (int i = 0; i < m_dim; ++i)
212 {
213 Array<OneD, NekDouble> tmp(nqtot), tmp1(nqtot);
214 m_derivMat[i] =
216 for (int j = 0; j < nqtot; ++j)
217 {
218 Vmath::Zero(nqtot, tmp, 1);
219 tmp[j] = 1.0;
220 m_stdExp->PhysDeriv(i, tmp, tmp1);
221 Vmath::Vcopy(nqtot, &tmp1[0], 1,
222 &(m_derivMat[i]->GetPtr())[0] + j * nqtot, 1);
223 }
224 }
225 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
226 m_wspSize = 3 * nqtot * m_numElmt;
227 }
228};
229
230/// Factory initialisation for the PhysDeriv_StdMat operators
231OperatorKey PhysDeriv_StdMat::m_typeArr[] = {
234 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Seg"),
237 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Tri"),
240 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_NodalTri"),
243 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Quad"),
246 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Tet"),
249 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_NodalTet"),
252 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Pyr"),
255 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Prism"),
258 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_NodalPrism"),
261 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Hex"),
264 PhysDeriv_StdMat::create, "PhysDeriv_SumFac_Pyr")};
265
266/**
267 * @brief Phys deriv operator using matrix free operators.
268 */
269class PhysDeriv_MatrixFree final : virtual public Operator,
271 virtual public PhysDeriv_Helper
272{
273public:
275
276 ~PhysDeriv_MatrixFree() final = default;
277
278 void operator()(const Array<OneD, const NekDouble> &input,
279 Array<OneD, NekDouble> &output0,
280 Array<OneD, NekDouble> &output1,
281 Array<OneD, NekDouble> &output2,
282 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
283 {
284 if (m_isPadded)
285 {
286 // copy into padded vector
287 Vmath::Vcopy(m_nIn, input, 1, m_input, 1);
288 (*m_oper)(m_input, m_output);
289 }
290 else
291 {
292 (*m_oper)(input, m_output);
293 }
294
295 // currently using temporary local temporary space for output
296 // to allow for other operator call below which is
297 // directionally dependent
298 switch (m_coordim)
299 {
300 case 1:
301 Vmath::Vcopy(m_nOut, m_output[0], 1, output0, 1);
302 break;
303 case 2:
304 Vmath::Vcopy(m_nOut, m_output[0], 1, output0, 1);
305 Vmath::Vcopy(m_nOut, m_output[1], 1, output1, 1);
306 break;
307 case 3:
308 Vmath::Vcopy(m_nOut, m_output[0], 1, output0, 1);
309 Vmath::Vcopy(m_nOut, m_output[1], 1, output1, 1);
310 Vmath::Vcopy(m_nOut, m_output[2], 1, output2, 1);
311 break;
312 default:
313 NEKERROR(ErrorUtil::efatal, "Unknown coordinate dimension");
314 break;
315 }
316 }
317
318 void operator()(int dir, const Array<OneD, const NekDouble> &input,
320 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
321 {
322 if (m_isPadded)
323 {
324 // copy into padded vector
325 Vmath::Vcopy(m_nIn, input, 1, m_input, 1);
326 (*m_oper)(m_input, m_output);
327 }
328 else
329 {
330 (*m_oper)(input, m_output);
331 }
332 Vmath::Vcopy(m_nOut, m_output[dir], 1, output, 1);
333 }
334
335private:
336 std::shared_ptr<MatrixFree::PhysDeriv> m_oper;
337
338 PhysDeriv_MatrixFree(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
341 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
342 MatrixFreeOneInMultiOut(pCollExp[0]->GetCoordim(),
343 pCollExp[0]->GetStdExp()->GetTotPoints(),
344 pCollExp[0]->GetStdExp()->GetTotPoints(),
345 pCollExp.size())
346 {
347 // Check if deformed
348 bool deformed{pGeomData->IsDeformed(pCollExp)};
349 const auto dim = pCollExp[0]->GetStdExp()->GetShapeDimension();
350
351 if (m_isPadded == false) // declare local space non-padded case
352 {
353 int nOut = pCollExp[0]->GetStdExp()->GetTotPoints();
356 if (m_coordim == 2)
357 {
359 }
360 else if (m_coordim == 3)
361 {
364 }
365 }
366
367 // Basis vector.
368 std::vector<LibUtilities::BasisSharedPtr> basis(dim);
369 for (unsigned int i = 0; i < dim; ++i)
370 {
371 basis[i] = pCollExp[0]->GetBasis(i);
372 }
373
374 // Get shape type
375 auto shapeType = pCollExp[0]->GetStdExp()->DetShapeType();
376
377 // Generate operator string and create operator.
378 std::string op_string = "PhysDeriv";
379 op_string += MatrixFree::GetOpstring(shapeType, deformed);
381 op_string, basis, m_nElmtPad);
382
383 // Set derivative factors
384 oper->SetDF(pGeomData->GetDerivFactorsInterLeave(pCollExp, m_nElmtPad));
385
386 m_oper = std::dynamic_pointer_cast<MatrixFree::PhysDeriv>(oper);
387 ASSERTL0(m_oper, "Failed to cast pointer.");
388 }
389};
390
391/// Factory initialisation for the PhysDeriv_MatrixFree operators
392OperatorKey PhysDeriv_MatrixFree::m_typeArr[] = {
395 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Seg"),
398 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Tri"),
401 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Quad"),
404 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Hex"),
407 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Prism"),
410 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Pyr"),
413 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Tet")
414
415};
416
417/**
418 * @brief Phys deriv operator using element-wise operation
419 */
420class PhysDeriv_IterPerExp final : virtual public Operator,
421 virtual public PhysDeriv_Helper
422{
423public:
425
426 ~PhysDeriv_IterPerExp() final = default;
427
428 void operator()(const Array<OneD, const NekDouble> &input,
429 Array<OneD, NekDouble> &output0,
430 Array<OneD, NekDouble> &output1,
431 Array<OneD, NekDouble> &output2,
432 Array<OneD, NekDouble> &wsp) final
433 {
434 int nPhys = m_stdExp->GetTotPoints();
435 int ntot = m_numElmt * nPhys;
436 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
439 out[0] = output0;
440 out[1] = output1;
441 out[2] = output2;
442
443 for (int i = 0; i < m_dim; ++i)
444 {
445 Diff[i] = wsp + i * ntot;
446 }
447
448 // calculate local derivatives
449 for (int i = 0; i < m_numElmt; ++i)
450 {
451 m_stdExp->PhysDeriv(input + i * nPhys, tmp0 = Diff[0] + i * nPhys,
452 tmp1 = Diff[1] + i * nPhys,
453 tmp2 = Diff[2] + i * nPhys);
454 }
455
456 // calculate full derivative
457 if (m_isDeformed)
458 {
459 for (int i = 0; i < m_coordim; ++i)
460 {
461 Vmath::Vmul(ntot, m_derivFac[i * m_dim], 1, Diff[0], 1, out[i],
462 1);
463 for (int j = 1; j < m_dim; ++j)
464 {
465 Vmath::Vvtvp(ntot, m_derivFac[i * m_dim + j], 1, Diff[j], 1,
466 out[i], 1, out[i], 1);
467 }
468 }
469 }
470 else
471 {
473 for (int e = 0; e < m_numElmt; ++e)
474 {
475 for (int i = 0; i < m_coordim; ++i)
476 {
478 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
479 1);
480 for (int j = 1; j < m_dim; ++j)
481 {
482 Vmath::Svtvp(m_nqe, m_derivFac[i * m_dim + j][e],
483 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
484 1, t = out[i] + e * m_nqe, 1);
485 }
486 }
487 }
488 }
489 }
490
491 void operator()(int dir, const Array<OneD, const NekDouble> &input,
493 Array<OneD, NekDouble> &wsp) final
494 {
495 int nPhys = m_stdExp->GetTotPoints();
496 int ntot = m_numElmt * nPhys;
497 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
499
500 for (int i = 0; i < m_dim; ++i)
501 {
502 Diff[i] = wsp + i * ntot;
503 }
504
505 // calculate local derivatives
506 for (int i = 0; i < m_numElmt; ++i)
507 {
508 m_stdExp->PhysDeriv(input + i * nPhys, tmp0 = Diff[0] + i * nPhys,
509 tmp1 = Diff[1] + i * nPhys,
510 tmp2 = Diff[2] + i * nPhys);
511 }
512
513 Vmath::Zero(ntot, output, 1);
514 if (m_isDeformed)
515 {
516 for (int j = 0; j < m_dim; ++j)
517 {
518 Vmath::Vvtvp(ntot, m_derivFac[dir * m_dim + j], 1, Diff[j], 1,
519 output, 1, output, 1);
520 }
521 }
522 else
523 {
525 for (int e = 0; e < m_numElmt; ++e)
526 {
527 for (int j = 0; j < m_dim; ++j)
528 {
529 Vmath::Svtvp(m_nqe, m_derivFac[dir * m_dim + j][e],
530 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
531 t = output + e * m_nqe, 1);
532 }
533 }
534 }
535 }
536
537protected:
539 int m_dim;
541
542private:
543 PhysDeriv_IterPerExp(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
546 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper()
547 {
548 int nqtot = pCollExp[0]->GetTotPoints();
549 m_dim = pCollExp[0]->GetShapeDimension();
550 m_coordim = pCollExp[0]->GetCoordim();
551
552 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
553 m_wspSize = 3 * nqtot * m_numElmt;
554 }
555};
556
557/// Factory initialisation for the PhysDeriv_IterPerExp operators
558OperatorKey PhysDeriv_IterPerExp::m_typeArr[] = {
561 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Seg"),
564 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Tri"),
567 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_NodalTri"),
570 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Quad"),
573 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Tet"),
576 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_NodalTet"),
579 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Pyr"),
582 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Prism"),
585 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_NodalPrism"),
588 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Hex")};
589
590/**
591 * @brief Phys deriv operator using original LocalRegions implementation.
592 */
593class PhysDeriv_NoCollection final : virtual public Operator,
594 virtual public PhysDeriv_Helper
595{
596public:
598
599 ~PhysDeriv_NoCollection() final = default;
600
601 void operator()(const Array<OneD, const NekDouble> &input,
602 Array<OneD, NekDouble> &output0,
603 Array<OneD, NekDouble> &output1,
604 Array<OneD, NekDouble> &output2,
605 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
606 {
607 const int nPhys = m_expList[0]->GetTotPoints();
608 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
609
610 // calculate local derivatives
611 switch (m_expList[0]->GetShapeDimension())
612 {
613 case 1:
614 {
615 for (int i = 0; i < m_numElmt; ++i)
616 {
617 m_expList[i]->PhysDeriv(input + i * nPhys,
618 tmp0 = output0 + i * nPhys);
619 }
620 break;
621 }
622 case 2:
623 {
624 for (int i = 0; i < m_numElmt; ++i)
625 {
626 m_expList[i]->PhysDeriv(input + i * nPhys,
627 tmp0 = output0 + i * nPhys,
628 tmp1 = output1 + i * nPhys);
629 }
630 break;
631 }
632 case 3:
633 {
634 for (int i = 0; i < m_numElmt; ++i)
635 {
636 m_expList[i]->PhysDeriv(
637 input + i * nPhys, tmp0 = output0 + i * nPhys,
638 tmp1 = output1 + i * nPhys, tmp2 = output2 + i * nPhys);
639 }
640 break;
641 }
642 default:
643 ASSERTL0(false, "Unknown dimension.");
644 }
645 }
646
647 void operator()(int dir, const Array<OneD, const NekDouble> &input,
649 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
650 {
651 const int nPhys = m_expList[0]->GetTotPoints();
653
654 // calculate local derivatives
655 for (int i = 0; i < m_numElmt; ++i)
656 {
657 m_expList[i]->PhysDeriv(dir, input + i * nPhys,
658 tmp = output + i * nPhys);
659 }
660 }
661
662protected:
663 vector<StdRegions::StdExpansionSharedPtr> m_expList;
664
665private:
666 PhysDeriv_NoCollection(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
669 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper()
670 {
671 m_expList = pCollExp;
672 }
673};
674
675/// Factory initialisation for the PhysDeriv_NoCollection operators
676OperatorKey PhysDeriv_NoCollection::m_typeArr[] = {
679 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Seg"),
682 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Tri"),
685 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_NodalTri"),
688 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Quad"),
691 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Tet"),
694 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_NodalTet"),
697 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Pyr"),
700 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Prism"),
703 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_NodalPrism"),
706 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Hex")};
707
708/**
709 * @brief Phys deriv operator using sum-factorisation (Segment)
710 */
711class PhysDeriv_SumFac_Seg final : virtual public Operator,
712 virtual public PhysDeriv_Helper
713{
714public:
716
717 ~PhysDeriv_SumFac_Seg() final = default;
718
719 void operator()(const Array<OneD, const NekDouble> &input,
720 Array<OneD, NekDouble> &output0,
721 Array<OneD, NekDouble> &output1,
722 Array<OneD, NekDouble> &output2,
723 Array<OneD, NekDouble> &wsp) final
724 {
725 const int nqcol = m_nquad0 * m_numElmt;
726
727 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
728 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
729
730 Array<OneD, NekDouble> diff0(nqcol, wsp);
731
733 m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
734 m_nquad0);
735
736 if (m_isDeformed)
737 {
738 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
739
740 if (m_coordim == 2)
741 {
742 Vmath::Vmul(nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
743 }
744 else if (m_coordim == 3)
745 {
746 Vmath::Vmul(nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
747 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output2, 1);
748 }
749 }
750 else
751 {
753 for (int e = 0; e < m_numElmt; ++e)
754 {
755 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
756 t = output0 + e * m_nqe, 1);
757 }
758
759 if (m_coordim == 2)
760 {
761 for (int e = 0; e < m_numElmt; ++e)
762 {
763 Vmath::Smul(m_nqe, m_derivFac[1][e], diff0 + e * m_nqe, 1,
764 t = output1 + e * m_nqe, 1);
765 }
766 }
767 else if (m_coordim == 3)
768 {
769 for (int e = 0; e < m_numElmt; ++e)
770 {
771 Vmath::Smul(m_nqe, m_derivFac[1][e], diff0 + e * m_nqe, 1,
772 t = output1 + e * m_nqe, 1);
773 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
774 t = output2 + e * m_nqe, 1);
775 }
776 }
777 }
778 }
779
780 void operator()(int dir, const Array<OneD, const NekDouble> &input,
782 Array<OneD, NekDouble> &wsp) final
783 {
784 const int nqcol = m_nquad0 * m_numElmt;
785
786 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
787 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
788
789 Array<OneD, NekDouble> diff0(nqcol, wsp);
790
792 m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
793 m_nquad0);
794
795 if (m_isDeformed)
796 {
797 Vmath::Vmul(nqcol, m_derivFac[dir], 1, diff0, 1, output, 1);
798 }
799 else
800 {
802 for (int e = 0; e < m_numElmt; ++e)
803 {
804 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
805 t = output + e * m_nqe, 1);
806 }
807 }
808 }
809
810protected:
812 const int m_nquad0;
815
816private:
817 PhysDeriv_SumFac_Seg(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
820 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
821 m_nquad0(m_stdExp->GetNumPoints(0))
822 {
823 m_coordim = pCollExp[0]->GetCoordim();
824
825 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
826
827 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
829 }
830};
831
832/// Factory initialisation for the PhysDeriv_SumFac_Seg operators
833OperatorKey PhysDeriv_SumFac_Seg::m_type =
836 PhysDeriv_SumFac_Seg::create, "PhysDeriv_SumFac_Seg");
837
838/**
839 * @brief Phys deriv operator using sum-factorisation (Quad)
840 */
841class PhysDeriv_SumFac_Quad final : virtual public Operator,
842 virtual public PhysDeriv_Helper
843{
844public:
846
847 ~PhysDeriv_SumFac_Quad() final = default;
848
849 void operator()(const Array<OneD, const NekDouble> &input,
850 Array<OneD, NekDouble> &output0,
851 Array<OneD, NekDouble> &output1,
852 Array<OneD, NekDouble> &output2,
853 Array<OneD, NekDouble> &wsp) final
854 {
855 const int nqtot = m_nquad0 * m_nquad1;
856 const int nqcol = nqtot * m_numElmt;
857
858 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
859 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
860
861 Array<OneD, NekDouble> diff0(nqcol, wsp);
862 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
863
865 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
866 m_nquad0);
867
868 int cnt = 0;
869 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
870 {
871 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
872 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
873 diff1.get() + cnt, m_nquad0);
874 }
875
876 if (m_isDeformed)
877 {
878 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
879 Vmath::Vvtvp(nqcol, m_derivFac[1], 1, diff1, 1, output0, 1, output0,
880 1);
881 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
882 Vmath::Vvtvp(nqcol, m_derivFac[3], 1, diff1, 1, output1, 1, output1,
883 1);
884
885 if (m_coordim == 3)
886 {
887 Vmath::Vmul(nqcol, m_derivFac[4], 1, diff0, 1, output2, 1);
888 Vmath::Vvtvp(nqcol, m_derivFac[5], 1, diff1, 1, output2, 1,
889 output2, 1);
890 }
891 }
892 else
893 {
895 for (int e = 0; e < m_numElmt; ++e)
896 {
897 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
898 t = output0 + e * m_nqe, 1);
899 Vmath::Svtvp(m_nqe, m_derivFac[1][e], diff1 + e * m_nqe, 1,
900 output0 + e * m_nqe, 1, t = output0 + e * m_nqe,
901 1);
902
903 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
904 t = output1 + e * m_nqe, 1);
905 Vmath::Svtvp(m_nqe, m_derivFac[3][e], diff1 + e * m_nqe, 1,
906 output1 + e * m_nqe, 1, t = output1 + e * m_nqe,
907 1);
908 }
909
910 if (m_coordim == 3)
911 {
912 for (int e = 0; e < m_numElmt; ++e)
913 {
914 Vmath::Smul(m_nqe, m_derivFac[4][e], diff0 + e * m_nqe, 1,
915 t = output2 + e * m_nqe, 1);
916 Vmath::Svtvp(m_nqe, m_derivFac[5][e], diff1 + e * m_nqe, 1,
917 output2 + e * m_nqe, 1,
918 t = output2 + e * m_nqe, 1);
919 }
920 }
921 }
922 }
923
924 void operator()(int dir, const Array<OneD, const NekDouble> &input,
926 Array<OneD, NekDouble> &wsp) final
927 {
928 const int nqtot = m_nquad0 * m_nquad1;
929 const int nqcol = nqtot * m_numElmt;
930
931 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
932 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
933
934 Array<OneD, NekDouble> diff0(nqcol, wsp);
935 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
936
938 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
939 m_nquad0);
940
941 int cnt = 0;
942 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
943 {
944 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
945 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
946 diff1.get() + cnt, m_nquad0);
947 }
948
949 if (m_isDeformed)
950 {
951 Vmath::Vmul(nqcol, m_derivFac[2 * dir], 1, diff0, 1, output, 1);
952 Vmath::Vvtvp(nqcol, m_derivFac[2 * dir + 1], 1, diff1, 1, output, 1,
953 output, 1);
954 }
955 else
956 {
958 for (int e = 0; e < m_numElmt; ++e)
959 {
960 Vmath::Smul(m_nqe, m_derivFac[2 * dir][e], diff0 + e * m_nqe, 1,
961 t = output + e * m_nqe, 1);
962 Vmath::Svtvp(m_nqe, m_derivFac[2 * dir + 1][e],
963 diff1 + e * m_nqe, 1, output + e * m_nqe, 1,
964 t = output + e * m_nqe, 1);
965 }
966 }
967 }
968
969protected:
971 const int m_nquad0;
972 const int m_nquad1;
976
977private:
978 PhysDeriv_SumFac_Quad(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
981 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
982 m_nquad0(m_stdExp->GetNumPoints(0)),
983 m_nquad1(m_stdExp->GetNumPoints(1))
984 {
985 m_coordim = pCollExp[0]->GetCoordim();
986
987 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
988
989 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
990 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
992 }
993};
994
995/// Factory initialisation for the PhysDeriv_SumFac_Quad operators
996OperatorKey PhysDeriv_SumFac_Quad::m_type =
999 PhysDeriv_SumFac_Quad::create, "PhysDeriv_SumFac_Quad");
1000
1001/**
1002 * @brief Phys deriv operator using sum-factorisation (Tri)
1003 */
1004class PhysDeriv_SumFac_Tri final : virtual public Operator,
1005 virtual public PhysDeriv_Helper
1006{
1007public:
1009
1010 ~PhysDeriv_SumFac_Tri() final = default;
1011
1012 void operator()(const Array<OneD, const NekDouble> &input,
1013 Array<OneD, NekDouble> &output0,
1014 Array<OneD, NekDouble> &output1,
1015 Array<OneD, NekDouble> &output2,
1016 Array<OneD, NekDouble> &wsp) final
1017 {
1018 const int nqtot = m_nquad0 * m_nquad1;
1019 const int nqcol = nqtot * m_numElmt;
1020
1021 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
1022 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
1023
1024 Array<OneD, NekDouble> diff0(nqcol, wsp);
1025 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
1026
1027 // Tensor Product Derivative
1028 Blas::Dgemm('N', 'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
1029 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
1030 m_nquad0);
1031
1032 int cnt = 0;
1033 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
1034 {
1035 // scale diff0 by geometric factor: 2/(1-z1)
1036 Vmath::Vmul(nqtot, &m_fac1[0], 1, diff0.get() + cnt, 1,
1037 diff0.get() + cnt, 1);
1038
1039 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1040 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
1041 diff1.get() + cnt, m_nquad0);
1042
1043 // add to diff1 by diff0 scaled by: (1_z0)/(1-z1)
1044 Vmath::Vvtvp(nqtot, m_fac0.get(), 1, diff0.get() + cnt, 1,
1045 diff1.get() + cnt, 1, diff1.get() + cnt, 1);
1046 }
1047
1048 if (m_isDeformed)
1049 {
1050 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
1051 Vmath::Vvtvp(nqcol, m_derivFac[1], 1, diff1, 1, output0, 1, output0,
1052 1);
1053 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
1054 Vmath::Vvtvp(nqcol, m_derivFac[3], 1, diff1, 1, output1, 1, output1,
1055 1);
1056
1057 if (m_coordim == 3)
1058 {
1059 Vmath::Vmul(nqcol, m_derivFac[4], 1, diff0, 1, output2, 1);
1060 Vmath::Vvtvp(nqcol, m_derivFac[5], 1, diff1, 1, output2, 1,
1061 output2, 1);
1062 }
1063 }
1064 else
1065 {
1067 for (int e = 0; e < m_numElmt; ++e)
1068 {
1069 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
1070 t = output0 + e * m_nqe, 1);
1071 Vmath::Svtvp(m_nqe, m_derivFac[1][e], diff1 + e * m_nqe, 1,
1072 output0 + e * m_nqe, 1, t = output0 + e * m_nqe,
1073 1);
1074
1075 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
1076 t = output1 + e * m_nqe, 1);
1077 Vmath::Svtvp(m_nqe, m_derivFac[3][e], diff1 + e * m_nqe, 1,
1078 output1 + e * m_nqe, 1, t = output1 + e * m_nqe,
1079 1);
1080 }
1081
1082 if (m_coordim == 3)
1083 {
1084 for (int e = 0; e < m_numElmt; ++e)
1085 {
1086 Vmath::Smul(m_nqe, m_derivFac[4][e], diff0 + e * m_nqe, 1,
1087 t = output2 + e * m_nqe, 1);
1088 Vmath::Svtvp(m_nqe, m_derivFac[5][e], diff1 + e * m_nqe, 1,
1089 output2 + e * m_nqe, 1,
1090 t = output2 + e * m_nqe, 1);
1091 }
1092 }
1093 }
1094 }
1095
1096 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1097 Array<OneD, NekDouble> &output,
1098 Array<OneD, NekDouble> &wsp) final
1099 {
1100 const int nqtot = m_nquad0 * m_nquad1;
1101 const int nqcol = nqtot * m_numElmt;
1102
1103 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
1104 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
1105
1106 Array<OneD, NekDouble> diff0(nqcol, wsp);
1107 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
1108
1109 // Tensor Product Derivative
1110 Blas::Dgemm('N', 'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
1111 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
1112 m_nquad0);
1113
1114 int cnt = 0;
1115 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
1116 {
1117 // scale diff0 by geometric factor: 2/(1-z1)
1118 Vmath::Vmul(nqtot, &m_fac1[0], 1, diff0.get() + cnt, 1,
1119 diff0.get() + cnt, 1);
1120
1121 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1122 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
1123 diff1.get() + cnt, m_nquad0);
1124
1125 // add to diff1 by diff0 scaled by: (1_z0)/(1-z1)
1126 Vmath::Vvtvp(nqtot, m_fac0.get(), 1, diff0.get() + cnt, 1,
1127 diff1.get() + cnt, 1, diff1.get() + cnt, 1);
1128 }
1129
1130 if (m_isDeformed)
1131 {
1132 Vmath::Vmul(nqcol, m_derivFac[2 * dir], 1, diff0, 1, output, 1);
1133 Vmath::Vvtvp(nqcol, m_derivFac[2 * dir + 1], 1, diff1, 1, output, 1,
1134 output, 1);
1135 }
1136 else
1137 {
1139 for (int e = 0; e < m_numElmt; ++e)
1140 {
1141 Vmath::Smul(m_nqe, m_derivFac[2 * dir][e], diff0 + e * m_nqe, 1,
1142 t = output + e * m_nqe, 1);
1143 Vmath::Svtvp(m_nqe, m_derivFac[2 * dir + 1][e],
1144 diff1 + e * m_nqe, 1, output + e * m_nqe, 1,
1145 t = output + e * m_nqe, 1);
1146 }
1147 }
1148 }
1149
1150protected:
1152 const int m_nquad0;
1153 const int m_nquad1;
1159
1160private:
1161 PhysDeriv_SumFac_Tri(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1164 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1165 m_nquad0(m_stdExp->GetNumPoints(0)),
1166 m_nquad1(m_stdExp->GetNumPoints(1))
1167 {
1168 m_coordim = pCollExp[0]->GetCoordim();
1169
1170 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1171
1172 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
1173 const Array<OneD, const NekDouble> &z1 = m_stdExp->GetBasis(1)->GetZ();
1175 // set up geometric factor: 0.5*(1+z0)
1176 for (int i = 0; i < m_nquad0; ++i)
1177 {
1178 for (int j = 0; j < m_nquad1; ++j)
1179 {
1180 m_fac0[i + j * m_nquad0] = 0.5 * (1 + z0[i]);
1181 }
1182 }
1183
1185 // set up geometric factor: 2/(1-z1)
1186 for (int i = 0; i < m_nquad0; ++i)
1187 {
1188 for (int j = 0; j < m_nquad1; ++j)
1189 {
1190 m_fac1[i + j * m_nquad0] = 2.0 / (1 - z1[j]);
1191 }
1192 }
1193
1194 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1195 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1197 }
1198};
1199
1200/// Factory initialisation for the PhysDeriv_SumFac_Tri operators
1201OperatorKey PhysDeriv_SumFac_Tri::m_typeArr[] = {
1204 PhysDeriv_SumFac_Tri::create, "PhysDeriv_SumFac_Tri"),
1207 PhysDeriv_SumFac_Tri::create, "PhysDeriv_SumFac_NodalTri")};
1208
1209/**
1210 * @brief Phys deriv operator using sum-factorisation (Hex)
1211 */
1212class PhysDeriv_SumFac_Hex final : virtual public Operator,
1213 virtual public PhysDeriv_Helper
1214{
1215public:
1217
1218 ~PhysDeriv_SumFac_Hex() final = default;
1219
1220 void operator()(const Array<OneD, const NekDouble> &input,
1221 Array<OneD, NekDouble> &output0,
1222 Array<OneD, NekDouble> &output1,
1223 Array<OneD, NekDouble> &output2,
1224 Array<OneD, NekDouble> &wsp) final
1225 {
1226 int nPhys = m_stdExp->GetTotPoints();
1227 int ntot = m_numElmt * nPhys;
1228 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1231 out[0] = output0;
1232 out[1] = output1;
1233 out[2] = output2;
1234
1235 for (int i = 0; i < 3; ++i)
1236 {
1237 Diff[i] = wsp + i * ntot;
1238 }
1239
1241 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1242 &Diff[0][0], m_nquad0);
1243
1244 for (int i = 0; i < m_numElmt; ++i)
1245 {
1246 for (int j = 0; j < m_nquad2; ++j)
1247 {
1248 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1249 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1250 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1251 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1252 m_nquad0);
1253 }
1254
1255 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1256 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1257 m_nquad2, 0.0, &Diff[2][i * nPhys],
1258 m_nquad0 * m_nquad1);
1259 }
1260
1261 // calculate full derivative
1262 if (m_isDeformed)
1263 {
1264 for (int i = 0; i < m_coordim; ++i)
1265 {
1266 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1267 for (int j = 1; j < 3; ++j)
1268 {
1269 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1270 out[i], 1, out[i], 1);
1271 }
1272 }
1273 }
1274 else
1275 {
1277 for (int e = 0; e < m_numElmt; ++e)
1278 {
1279 for (int i = 0; i < m_coordim; ++i)
1280 {
1281 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
1282 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1283 1);
1284
1285 for (int j = 1; j < 3; ++j)
1286 {
1287 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
1288 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1289 1, t = out[i] + e * m_nqe, 1);
1290 }
1291 }
1292 }
1293 }
1294 }
1295
1296 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1297 Array<OneD, NekDouble> &output,
1298 Array<OneD, NekDouble> &wsp) final
1299 {
1300 int nPhys = m_stdExp->GetTotPoints();
1301 int ntot = m_numElmt * nPhys;
1302 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1304
1305 for (int i = 0; i < 3; ++i)
1306 {
1307 Diff[i] = wsp + i * ntot;
1308 }
1309
1311 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1312 &Diff[0][0], m_nquad0);
1313
1314 for (int i = 0; i < m_numElmt; ++i)
1315 {
1316 for (int j = 0; j < m_nquad2; ++j)
1317 {
1318 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1319 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1320 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1321 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1322 m_nquad0);
1323 }
1324
1325 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1326 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1327 m_nquad2, 0.0, &Diff[2][i * nPhys],
1328 m_nquad0 * m_nquad1);
1329 }
1330
1331 // calculate full derivative
1332 if (m_isDeformed)
1333 {
1334 // calculate full derivative
1335 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1336 for (int j = 1; j < 3; ++j)
1337 {
1338 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1339 output, 1, output, 1);
1340 }
1341 }
1342 else
1343 {
1345 for (int e = 0; e < m_numElmt; ++e)
1346 {
1347 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1348 1, t = output + e * m_nqe, 1);
1349
1350 for (int j = 1; j < 3; ++j)
1351 {
1352 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
1353 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1354 t = output + e * m_nqe, 1);
1355 }
1356 }
1357 }
1358 }
1359
1360protected:
1363 const int m_nquad0;
1364 const int m_nquad1;
1365 const int m_nquad2;
1369
1370private:
1371 PhysDeriv_SumFac_Hex(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1374 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1375 m_nquad0(m_stdExp->GetNumPoints(0)),
1376 m_nquad1(m_stdExp->GetNumPoints(1)),
1377 m_nquad2(m_stdExp->GetNumPoints(2))
1378 {
1379 m_coordim = pCollExp[0]->GetCoordim();
1380
1381 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1382
1383 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1384 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1385 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1386
1388 }
1389};
1390
1391/// Factory initialisation for the PhysDeriv_SumFac_Hex operators
1392OperatorKey PhysDeriv_SumFac_Hex::m_typeArr[] = {
1395 PhysDeriv_SumFac_Hex::create, "PhysDeriv_SumFac_Hex")};
1396
1397/**
1398 * @brief Phys deriv operator using sum-factorisation (Tet)
1399 */
1400class PhysDeriv_SumFac_Tet final : virtual public Operator,
1401 virtual public PhysDeriv_Helper
1402{
1403public:
1405
1406 ~PhysDeriv_SumFac_Tet() final = default;
1407
1408 void operator()(const Array<OneD, const NekDouble> &input,
1409 Array<OneD, NekDouble> &output0,
1410 Array<OneD, NekDouble> &output1,
1411 Array<OneD, NekDouble> &output2,
1412 Array<OneD, NekDouble> &wsp) final
1413 {
1414 int nPhys = m_stdExp->GetTotPoints();
1415 int ntot = m_numElmt * nPhys;
1416 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1419 out[0] = output0;
1420 out[1] = output1;
1421 out[2] = output2;
1422
1423 for (int i = 0; i < 3; ++i)
1424 {
1425 Diff[i] = wsp + i * ntot;
1426 }
1427
1428 // dEta0
1430 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1431 &Diff[0][0], m_nquad0);
1432
1433 // dEta2
1434 for (int i = 0; i < m_numElmt; ++i)
1435 {
1436 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1437 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1438 m_nquad2, 0.0, &Diff[2][i * nPhys],
1439 m_nquad0 * m_nquad1);
1440 }
1441
1442 for (int i = 0; i < m_numElmt; ++i)
1443 {
1444 // dEta1
1445 for (int j = 0; j < m_nquad2; ++j)
1446 {
1447 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1448 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1449 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1450 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1451 m_nquad0);
1452 }
1453
1454 // dxi2 = (1 + eta_1)/(1 -eta_2)*dEta1 + dEta2
1455 Vmath::Vvtvp(nPhys, m_fac3.get(), 1, Diff[1].get() + i * nPhys, 1,
1456 Diff[2].get() + i * nPhys, 1,
1457 Diff[2].get() + i * nPhys, 1);
1458
1459 // dxi1 = 2/(1 - eta_2) dEta1
1460 Vmath::Vmul(nPhys, m_fac2.get(), 1, Diff[1].get() + i * nPhys, 1,
1461 Diff[1].get() + i * nPhys, 1);
1462
1463 // dxi1 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi1
1464 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1465 Diff[1].get() + i * nPhys, 1,
1466 Diff[1].get() + i * nPhys, 1);
1467
1468 // dxi2 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi2
1469 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1470 Diff[2].get() + i * nPhys, 1,
1471 Diff[2].get() + i * nPhys, 1);
1472
1473 // dxi0 = 4.0/((1-eta_1)(1-eta_2)) dEta0
1474 Vmath::Vmul(nPhys, m_fac0.get(), 1, Diff[0].get() + i * nPhys, 1,
1475 Diff[0].get() + i * nPhys, 1);
1476 }
1477
1478 // calculate full derivative
1479 if (m_isDeformed)
1480 {
1481 for (int i = 0; i < m_coordim; ++i)
1482 {
1483 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1484 for (int j = 1; j < 3; ++j)
1485 {
1486 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1487 out[i], 1, out[i], 1);
1488 }
1489 }
1490 }
1491 else
1492 {
1494 for (int e = 0; e < m_numElmt; ++e)
1495 {
1496 for (int i = 0; i < m_coordim; ++i)
1497 {
1498 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
1499 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1500 1);
1501 for (int j = 1; j < 3; ++j)
1502 {
1503 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
1504 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1505 1, t = out[i] + e * m_nqe, 1);
1506 }
1507 }
1508 }
1509 }
1510 }
1511
1512 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1513 Array<OneD, NekDouble> &output,
1514 Array<OneD, NekDouble> &wsp) final
1515 {
1516 int nPhys = m_stdExp->GetTotPoints();
1517 int ntot = m_numElmt * nPhys;
1518 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1520
1521 for (int i = 0; i < 3; ++i)
1522 {
1523 Diff[i] = wsp + i * ntot;
1524 }
1525
1526 // dEta0
1528 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1529 &Diff[0][0], m_nquad0);
1530
1531 // dEta2
1532 for (int i = 0; i < m_numElmt; ++i)
1533 {
1534 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1535 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1536 m_nquad2, 0.0, &Diff[2][i * nPhys],
1537 m_nquad0 * m_nquad1);
1538 }
1539
1540 for (int i = 0; i < m_numElmt; ++i)
1541 {
1542 // dEta1
1543 for (int j = 0; j < m_nquad2; ++j)
1544 {
1545 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1546 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1547 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1548 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1549 m_nquad0);
1550 }
1551
1552 // dxi2 = (1 + eta_1)/(1 -eta_2)*dEta1 + dEta2
1553 Vmath::Vvtvp(nPhys, m_fac3.get(), 1, Diff[1].get() + i * nPhys, 1,
1554 Diff[2].get() + i * nPhys, 1,
1555 Diff[2].get() + i * nPhys, 1);
1556
1557 // dxi1 = 2/(1 - eta_2) dEta1
1558 Vmath::Vmul(nPhys, m_fac2.get(), 1, Diff[1].get() + i * nPhys, 1,
1559 Diff[1].get() + i * nPhys, 1);
1560
1561 // dxi1 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi1
1562 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1563 Diff[1].get() + i * nPhys, 1,
1564 Diff[1].get() + i * nPhys, 1);
1565
1566 // dxi2 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi2
1567 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1568 Diff[2].get() + i * nPhys, 1,
1569 Diff[2].get() + i * nPhys, 1);
1570
1571 // dxi0 = 4.0/((1-eta_1)(1-eta_2)) dEta0
1572 Vmath::Vmul(nPhys, m_fac0.get(), 1, Diff[0].get() + i * nPhys, 1,
1573 Diff[0].get() + i * nPhys, 1);
1574 }
1575
1576 // calculate full derivative
1577 if (m_isDeformed)
1578 {
1579 // calculate full derivative
1580 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1581 for (int j = 1; j < 3; ++j)
1582 {
1583 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1584 output, 1, output, 1);
1585 }
1586 }
1587 else
1588 {
1590 for (int e = 0; e < m_numElmt; ++e)
1591 {
1592 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1593 1, t = output + e * m_nqe, 1);
1594 for (int j = 1; j < 3; ++j)
1595 {
1596 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
1597 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1598 t = output + e * m_nqe, 1);
1599 }
1600 }
1601 }
1602 }
1603
1604protected:
1607 const int m_nquad0;
1608 const int m_nquad1;
1609 const int m_nquad2;
1617
1618private:
1619 PhysDeriv_SumFac_Tet(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1622 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1623 m_nquad0(m_stdExp->GetNumPoints(0)),
1624 m_nquad1(m_stdExp->GetNumPoints(1)),
1625 m_nquad2(m_stdExp->GetNumPoints(2))
1626 {
1627 m_coordim = pCollExp[0]->GetCoordim();
1628
1629 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1630
1631 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1632 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1633 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1634
1636
1637 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
1638 const Array<OneD, const NekDouble> &z1 = m_stdExp->GetBasis(1)->GetZ();
1639 const Array<OneD, const NekDouble> &z2 = m_stdExp->GetBasis(2)->GetZ();
1640
1645
1646 // calculate 2.0/((1-eta_1)(1-eta_2))
1647 for (int i = 0; i < m_nquad0; ++i)
1648 {
1649 for (int j = 0; j < m_nquad1; ++j)
1650 {
1651 for (int k = 0; k < m_nquad2; ++k)
1652 {
1653 m_fac0[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1654 4.0 / ((1 - z1[j]) * (1 - z2[k]));
1655 m_fac1[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1656 2.0 * (1 + z0[i]) / ((1 - z1[j]) * (1 - z2[k]));
1657 m_fac2[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1658 2.0 / (1 - z2[k]);
1659 m_fac3[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1660 (1 + z1[j]) / (1 - z2[k]);
1661 }
1662 }
1663 }
1664 }
1665};
1666
1667/// Factory initialisation for the PhysDeriv_SumFac_Tet operators
1668OperatorKey PhysDeriv_SumFac_Tet::m_typeArr[] = {
1671 PhysDeriv_SumFac_Tet::create, "PhysDeriv_SumFac_Tet")};
1672
1673/**
1674 * @brief Phys deriv operator using sum-factorisation (Prism)
1675 */
1676class PhysDeriv_SumFac_Prism final : virtual public Operator,
1677 virtual public PhysDeriv_Helper
1678{
1679public:
1681
1682 ~PhysDeriv_SumFac_Prism() final = default;
1683
1684 void operator()(const Array<OneD, const NekDouble> &input,
1685 Array<OneD, NekDouble> &output0,
1686 Array<OneD, NekDouble> &output1,
1687 Array<OneD, NekDouble> &output2,
1688 Array<OneD, NekDouble> &wsp) final
1689 {
1690 int nPhys = m_stdExp->GetTotPoints();
1691 int ntot = m_numElmt * nPhys;
1692 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1695 out[0] = output0;
1696 out[1] = output1;
1697 out[2] = output2;
1698
1699 for (int i = 0; i < 3; ++i)
1700 {
1701 Diff[i] = wsp + i * ntot;
1702 }
1703
1704 // dEta0
1706 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1707 &Diff[0][0], m_nquad0);
1708
1709 int cnt = 0;
1710 for (int i = 0; i < m_numElmt; ++i)
1711 {
1712 // dEta 1
1713 for (int j = 0; j < m_nquad2; ++j)
1714 {
1715 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1716 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1717 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1718 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1719 m_nquad0);
1720 }
1721
1722 // dEta 2
1723 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1724 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1725 m_nquad2, 0.0, &Diff[2][i * nPhys],
1726 m_nquad0 * m_nquad1);
1727
1728 // dxi0 = 2/(1-eta_2) d Eta_0
1729 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
1730 Diff[0].get() + cnt, 1);
1731
1732 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
1733 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
1734 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1735 cnt += nPhys;
1736 }
1737
1738 // calculate full derivative
1739 if (m_isDeformed)
1740 {
1741 for (int i = 0; i < m_coordim; ++i)
1742 {
1743 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1744 for (int j = 1; j < 3; ++j)
1745 {
1746 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1747 out[i], 1, out[i], 1);
1748 }
1749 }
1750 }
1751 else
1752 {
1754 for (int e = 0; e < m_numElmt; ++e)
1755 {
1756 for (int i = 0; i < m_coordim; ++i)
1757 {
1758 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
1759 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1760 1);
1761
1762 for (int j = 1; j < 3; ++j)
1763 {
1764 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
1765 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1766 1, t = out[i] + e * m_nqe, 1);
1767 }
1768 }
1769 }
1770 }
1771 }
1772
1773 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1774 Array<OneD, NekDouble> &output,
1775 Array<OneD, NekDouble> &wsp) final
1776 {
1777 int nPhys = m_stdExp->GetTotPoints();
1778 int ntot = m_numElmt * nPhys;
1779 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1781
1782 for (int i = 0; i < 3; ++i)
1783 {
1784 Diff[i] = wsp + i * ntot;
1785 }
1786
1787 // dEta0
1789 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1790 &Diff[0][0], m_nquad0);
1791
1792 int cnt = 0;
1793 for (int i = 0; i < m_numElmt; ++i)
1794 {
1795 // dEta 1
1796 for (int j = 0; j < m_nquad2; ++j)
1797 {
1798 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1799 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1800 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1801 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1802 m_nquad0);
1803 }
1804
1805 // dEta 2
1806 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1807 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1808 m_nquad2, 0.0, &Diff[2][i * nPhys],
1809 m_nquad0 * m_nquad1);
1810
1811 // dxi0 = 2/(1-eta_2) d Eta_0
1812 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
1813 Diff[0].get() + cnt, 1);
1814
1815 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
1816 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
1817 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1818 cnt += nPhys;
1819 }
1820
1821 // calculate full derivative
1822 if (m_isDeformed)
1823 {
1824 // calculate full derivative
1825 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1826 for (int j = 1; j < 3; ++j)
1827 {
1828 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1829 output, 1, output, 1);
1830 }
1831 }
1832 else
1833 {
1835 for (int e = 0; e < m_numElmt; ++e)
1836 {
1837 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1838 1, t = output + e * m_nqe, 1);
1839
1840 for (int j = 1; j < 3; ++j)
1841 {
1842 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
1843 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1844 t = output + e * m_nqe, 1);
1845 }
1846 }
1847 }
1848 }
1849
1850protected:
1853 const int m_nquad0;
1854 const int m_nquad1;
1855 const int m_nquad2;
1861
1862private:
1863 PhysDeriv_SumFac_Prism(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1866 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1867 m_nquad0(m_stdExp->GetNumPoints(0)),
1868 m_nquad1(m_stdExp->GetNumPoints(1)),
1869 m_nquad2(m_stdExp->GetNumPoints(2))
1870 {
1871 m_coordim = pCollExp[0]->GetCoordim();
1872
1873 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1874
1875 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
1876 const Array<OneD, const NekDouble> &z2 = m_stdExp->GetBasis(2)->GetZ();
1879 for (int i = 0; i < m_nquad0; ++i)
1880 {
1881 for (int j = 0; j < m_nquad1; ++j)
1882 {
1883 for (int k = 0; k < m_nquad2; ++k)
1884 {
1885 m_fac0[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1886 2.0 / (1 - z2[k]);
1887 m_fac1[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1888 0.5 * (1 + z0[i]);
1889 }
1890 }
1891 }
1892
1893 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1894 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1895 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1896
1898 }
1899};
1900
1901/// Factory initialisation for the PhysDeriv_SumFac_Prism operators
1902OperatorKey PhysDeriv_SumFac_Prism::m_typeArr[] = {
1905 PhysDeriv_SumFac_Prism::create, "PhysDeriv_SumFac_Prism")};
1906
1907/**
1908 * @brief Phys deriv operator using sum-factorisation (Pyramid)
1909 */
1910class PhysDeriv_SumFac_Pyr final : virtual public Operator,
1911 virtual public PhysDeriv_Helper
1912{
1913public:
1915
1916 ~PhysDeriv_SumFac_Pyr() final = default;
1917
1918 void operator()(const Array<OneD, const NekDouble> &input,
1919 Array<OneD, NekDouble> &output0,
1920 Array<OneD, NekDouble> &output1,
1921 Array<OneD, NekDouble> &output2,
1922 Array<OneD, NekDouble> &wsp) final
1923 {
1924 int nPhys = m_stdExp->GetTotPoints();
1925 int ntot = m_numElmt * nPhys;
1926 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1929 out[0] = output0;
1930 out[1] = output1;
1931 out[2] = output2;
1932
1933 for (int i = 0; i < 3; ++i)
1934 {
1935 Diff[i] = wsp + i * ntot;
1936 }
1937
1938 // dEta0
1940 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1941 &Diff[0][0], m_nquad0);
1942
1943 int cnt = 0;
1944 for (int i = 0; i < m_numElmt; ++i)
1945 {
1946 // dEta 1
1947 for (int j = 0; j < m_nquad2; ++j)
1948 {
1949 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1950 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1951 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1952 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1953 m_nquad0);
1954 }
1955
1956 // dEta 2
1957 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1958 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1959 m_nquad2, 0.0, &Diff[2][i * nPhys],
1960 m_nquad0 * m_nquad1);
1961
1962 // dxi0 = 2/(1-eta_2) d Eta_0
1963 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
1964 Diff[0].get() + cnt, 1);
1965
1966 // dxi1 = 2/(1-eta_2) d Eta_1
1967 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[1].get() + cnt, 1,
1968 Diff[1].get() + cnt, 1);
1969
1970 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
1971 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
1972 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1973
1974 // dxi2 += (1+eta1)/(1-eta_2) d Eta_1
1975 Vmath::Vvtvp(nPhys, &m_fac2[0], 1, Diff[1].get() + cnt, 1,
1976 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1977 cnt += nPhys;
1978 }
1979
1980 // calculate full derivative
1981 if (m_isDeformed)
1982 {
1983 for (int i = 0; i < m_coordim; ++i)
1984 {
1985 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1986 for (int j = 1; j < 3; ++j)
1987 {
1988 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1989 out[i], 1, out[i], 1);
1990 }
1991 }
1992 }
1993 else
1994 {
1996 for (int e = 0; e < m_numElmt; ++e)
1997 {
1998 for (int i = 0; i < m_coordim; ++i)
1999 {
2000 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
2001 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
2002 1);
2003
2004 for (int j = 1; j < 3; ++j)
2005 {
2006 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
2007 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
2008 1, t = out[i] + e * m_nqe, 1);
2009 }
2010 }
2011 }
2012 }
2013 }
2014
2015 void operator()(int dir, const Array<OneD, const NekDouble> &input,
2016 Array<OneD, NekDouble> &output,
2017 Array<OneD, NekDouble> &wsp) final
2018 {
2019 int nPhys = m_stdExp->GetTotPoints();
2020 int ntot = m_numElmt * nPhys;
2021 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
2023
2024 for (int i = 0; i < 3; ++i)
2025 {
2026 Diff[i] = wsp + i * ntot;
2027 }
2028
2029 // dEta0
2031 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
2032 &Diff[0][0], m_nquad0);
2033
2034 int cnt = 0;
2035 for (int i = 0; i < m_numElmt; ++i)
2036 {
2037 // dEta 1
2038 for (int j = 0; j < m_nquad2; ++j)
2039 {
2040 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
2041 &input[i * nPhys + j * m_nquad0 * m_nquad1],
2042 m_nquad0, m_Deriv1, m_nquad1, 0.0,
2043 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
2044 m_nquad0);
2045 }
2046
2047 // dEta 2
2048 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
2049 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
2050 m_nquad2, 0.0, &Diff[2][i * nPhys],
2051 m_nquad0 * m_nquad1);
2052
2053 // dxi0 = 2/(1-eta_2) d Eta_0
2054 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
2055 Diff[0].get() + cnt, 1);
2056
2057 // dxi1 = 2/(1-eta_2) d Eta_1
2058 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[1].get() + cnt, 1,
2059 Diff[1].get() + cnt, 1);
2060
2061 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
2062 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
2063 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2064
2065 // dxi2 = (1+eta1)/(1-eta_2) d Eta_1 + d/dEta2;
2066 Vmath::Vvtvp(nPhys, &m_fac2[0], 1, Diff[1].get() + cnt, 1,
2067 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2068 cnt += nPhys;
2069 }
2070
2071 // calculate full derivative
2072 if (m_isDeformed)
2073 {
2074 // calculate full derivative
2075 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
2076 for (int j = 1; j < 3; ++j)
2077 {
2078 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
2079 output, 1, output, 1);
2080 }
2081 }
2082 else
2083 {
2085 for (int e = 0; e < m_numElmt; ++e)
2086 {
2087 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
2088 1, t = output + e * m_nqe, 1);
2089
2090 for (int j = 1; j < 3; ++j)
2091 {
2092 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
2093 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
2094 t = output + e * m_nqe, 1);
2095 }
2096 }
2097 }
2098 }
2099
2100protected:
2103 const int m_nquad0;
2104 const int m_nquad1;
2105 const int m_nquad2;
2112
2113private:
2114 PhysDeriv_SumFac_Pyr(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
2117 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
2118 m_nquad0(m_stdExp->GetNumPoints(0)),
2119 m_nquad1(m_stdExp->GetNumPoints(1)),
2120 m_nquad2(m_stdExp->GetNumPoints(2))
2121 {
2122 m_coordim = pCollExp[0]->GetCoordim();
2123
2124 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
2125
2126 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
2127 const Array<OneD, const NekDouble> &z1 = m_stdExp->GetBasis(1)->GetZ();
2128 const Array<OneD, const NekDouble> &z2 = m_stdExp->GetBasis(2)->GetZ();
2132
2133 int nq0_nq1 = m_nquad0 * m_nquad1;
2134 for (int i = 0; i < m_nquad0; ++i)
2135 {
2136 for (int j = 0; j < m_nquad1; ++j)
2137 {
2138 int ifac = i + j * m_nquad0;
2139 for (int k = 0; k < m_nquad2; ++k)
2140 {
2141 m_fac0[ifac + k * nq0_nq1] = 2.0 / (1 - z2[k]);
2142 m_fac1[ifac + k * nq0_nq1] = 0.5 * (1 + z0[i]);
2143 m_fac2[ifac + k * nq0_nq1] = 0.5 * (1 + z1[j]);
2144 }
2145 }
2146 }
2147
2148 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
2149 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
2150 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
2151
2153 }
2154};
2155
2156/// Factory initialisation for the PhysDeriv_SumFac_Pyr operators
2157OperatorKey PhysDeriv_SumFac_Pyr::m_typeArr[] = {
2160 PhysDeriv_SumFac_Pyr::create, "PhysDeriv_SumFac_Pyr")};
2161
2162} // namespace Nektar::Collections
#define ASSERTL0(condition, msg)
Definition: ErrorUtil.hpp:208
#define NEKERROR(type, msg)
Assert Level 0 – Fundamental assert which is used whether in FULLDEBUG, DEBUG or OPT compilation mode...
Definition: ErrorUtil.hpp:202
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode....
Definition: ErrorUtil.hpp:242
#define OPERATOR_CREATE(cname)
Definition: Operator.h:43
unsigned int m_nElmtPad
size after padding
Array< OneD, NekDouble > m_input
padded input/output vectors
unsigned short m_coordim
coordinates dimension
Array< OneD, Array< OneD, NekDouble > > m_output
Base class for operators on a collection of elements.
Definition: Operator.h:138
StdRegions::StdExpansionSharedPtr m_stdExp
Definition: Operator.h:217
unsigned int m_numElmt
number of elements that the operator is applied on
Definition: Operator.h:219
unsigned int m_outputSize
number of modes or quadrature points that are taken as output from an operator
Definition: Operator.h:227
unsigned int m_inputSize
number of modes or quadrature points that are passed as input to an operator
Definition: Operator.h:224
Physical Derivative help class to calculate the size of the collection that is given as an input and ...
Definition: PhysDeriv.cpp:60
Phys deriv operator using element-wise operation.
Definition: PhysDeriv.cpp:422
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:491
PhysDeriv_IterPerExp(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:543
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:538
Phys deriv operator using matrix free operators.
Definition: PhysDeriv.cpp:272
PhysDeriv_MatrixFree(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:338
std::shared_ptr< MatrixFree::PhysDeriv > m_oper
Definition: PhysDeriv.cpp:336
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:318
Phys deriv operator using original LocalRegions implementation.
Definition: PhysDeriv.cpp:595
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:647
PhysDeriv_NoCollection(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:666
vector< StdRegions::StdExpansionSharedPtr > m_expList
Definition: PhysDeriv.cpp:663
Phys deriv operator using standard matrix approach.
Definition: PhysDeriv.cpp:78
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:195
Array< OneD, DNekMatSharedPtr > m_derivMat
Definition: PhysDeriv.cpp:194
PhysDeriv_StdMat(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:200
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:145
Phys deriv operator using sum-factorisation (Hex)
Definition: PhysDeriv.cpp:1214
PhysDeriv_SumFac_Hex(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1371
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1361
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1296
Phys deriv operator using sum-factorisation (Prism)
Definition: PhysDeriv.cpp:1678
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1773
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1851
PhysDeriv_SumFac_Prism(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1863
Phys deriv operator using sum-factorisation (Pyramid)
Definition: PhysDeriv.cpp:1912
PhysDeriv_SumFac_Pyr(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:2114
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:2101
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:2015
Phys deriv operator using sum-factorisation (Quad)
Definition: PhysDeriv.cpp:843
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:924
PhysDeriv_SumFac_Quad(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:978
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:973
Phys deriv operator using sum-factorisation (Segment)
Definition: PhysDeriv.cpp:713
PhysDeriv_SumFac_Seg(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:817
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:813
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:780
Phys deriv operator using sum-factorisation (Tet)
Definition: PhysDeriv.cpp:1402
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1512
PhysDeriv_SumFac_Tet(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1619
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1605
Phys deriv operator using sum-factorisation (Tri)
Definition: PhysDeriv.cpp:1006
PhysDeriv_SumFac_Tri(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1161
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1096
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1154
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
tBaseSharedPtr CreateInstance(tKey idKey, tParam... args)
Create an instance of the class referred to by idKey.
static std::shared_ptr< DataType > AllocateSharedPtr(const Args &...args)
Allocate a shared pointer from the memory pool.
static void Dgemm(const char &transa, const char &transb, const int &m, const int &n, const int &k, const double &alpha, const double *a, const int &lda, const double *b, const int &ldb, const double &beta, double *c, const int &ldc)
BLAS level 3: Matrix-matrix multiply C = A x B where op(A)[m x k], op(B)[k x n], C[m x n] DGEMM perfo...
Definition: Blas.hpp:383
std::tuple< LibUtilities::ShapeType, OperatorType, ImplementationType, ExpansionIsNodal > OperatorKey
Key for describing an Operator.
Definition: Operator.h:120
std::shared_ptr< CoalescedGeomData > CoalescedGeomDataSharedPtr
OperatorFactory & GetOperatorFactory()
Returns the singleton Operator factory object.
Definition: Operator.cpp:44
ConstFactorMap FactorMap
Definition: StdRegions.hpp:434
StdRegions::ConstFactorMap factors
double NekDouble
void Vmul(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Multiply vector z = x*y.
Definition: Vmath.hpp:72
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition: Vmath.hpp:396
void Vvtvp(int n, const T *w, const int incw, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
vvtvp (vector times vector plus vector): z = w*x + y
Definition: Vmath.hpp:366
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.hpp:825
STL namespace.