Nektar++
PhysDeriv.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: PhysDeriv.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description: PhysDeriv operator implementations
32//
33///////////////////////////////////////////////////////////////////////////////
34
38#include <MatrixFreeOps/Operator.hpp>
39
40using namespace std;
41
42namespace Nektar::Collections
43{
44
52
53/**
54 * @brief Physical Derivative help class to calculate the size of the collection
55 * that is given as an input and as an output to the PhysDeriv Operator. The
56 * Operator evaluation is happenning in the physical space and the output is
57 * expected to be part of the physical space too.
58 */
59class PhysDeriv_Helper : virtual public Operator
60{
61protected:
63 {
64 // expect input to be number of elements by the number of quadrature
65 // points
66 m_inputSize = m_numElmt * m_stdExp->GetTotPoints();
67 // the derivate is using data from the physical space to evaluate the
68 // derivative in the physical space
70 }
71};
72
73/**
74 * @brief Phys deriv operator using standard matrix approach
75 */
76class PhysDeriv_StdMat final : virtual public Operator,
77 virtual public PhysDeriv_Helper
78{
79public:
81
82 ~PhysDeriv_StdMat() final = default;
83
84 void operator()(const Array<OneD, const NekDouble> &input,
85 Array<OneD, NekDouble> &output0,
86 Array<OneD, NekDouble> &output1,
87 Array<OneD, NekDouble> &output2,
88 Array<OneD, NekDouble> &wsp) final
89 {
90 int nPhys = m_stdExp->GetTotPoints();
91 int ntot = m_numElmt * nPhys;
92 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
95 out[0] = output0;
96 out[1] = output1;
97 out[2] = output2;
98
99 for (int i = 0; i < m_dim; ++i)
100 {
101 Diff[i] = wsp + i * ntot;
102 }
103
104 // calculate local derivatives
105 for (int i = 0; i < m_dim; ++i)
106 {
107 Blas::Dgemm('N', 'N', m_derivMat[i]->GetRows(), m_numElmt,
108 m_derivMat[i]->GetColumns(), 1.0,
109 m_derivMat[i]->GetRawPtr(), m_derivMat[i]->GetRows(),
110 input.get(), nPhys, 0.0, &Diff[i][0], nPhys);
111 }
112
113 // calculate full derivative
114 if (m_isDeformed)
115 {
116 for (int i = 0; i < m_coordim; ++i)
117 {
118 Vmath::Zero(ntot, out[i], 1);
119 for (int j = 0; j < m_dim; ++j)
120 {
121 Vmath::Vvtvp(ntot, m_derivFac[i * m_dim + j], 1, Diff[j], 1,
122 out[i], 1, out[i], 1);
123 }
124 }
125 }
126 else
127 {
129 for (int i = 0; i < m_coordim; ++i)
130 {
131 Vmath::Zero(ntot, out[i], 1);
132 for (int e = 0; e < m_numElmt; ++e)
133 {
134 for (int j = 0; j < m_dim; ++j)
135 {
136 Vmath::Svtvp(m_nqe, m_derivFac[i * m_dim + j][e],
137 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
138 1, t = out[i] + e * m_nqe, 1);
139 }
140 }
141 }
142 }
143 }
144
145 void operator()(int dir, const Array<OneD, const NekDouble> &input,
147 Array<OneD, NekDouble> &wsp) final
148 {
149 int nPhys = m_stdExp->GetTotPoints();
150 int ntot = m_numElmt * nPhys;
151 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
153
154 for (int i = 0; i < m_dim; ++i)
155 {
156 Diff[i] = wsp + i * ntot;
157 }
158
159 // calculate local derivatives
160 for (int i = 0; i < m_dim; ++i)
161 {
162 Blas::Dgemm('N', 'N', m_derivMat[i]->GetRows(), m_numElmt,
163 m_derivMat[i]->GetColumns(), 1.0,
164 m_derivMat[i]->GetRawPtr(), m_derivMat[i]->GetRows(),
165 input.get(), nPhys, 0.0, &Diff[i][0], nPhys);
166 }
167
168 // calculate full derivative
169 Vmath::Zero(ntot, output, 1);
170 if (m_isDeformed)
171 {
172 for (int j = 0; j < m_dim; ++j)
173 {
174 Vmath::Vvtvp(ntot, m_derivFac[dir * m_dim + j], 1, Diff[j], 1,
175 output, 1, output, 1);
176 }
177 }
178 else
179 {
181 for (int e = 0; e < m_numElmt; ++e)
182 {
183 for (int j = 0; j < m_dim; ++j)
184 {
185 Vmath::Svtvp(m_nqe, m_derivFac[dir * m_dim + j][e],
186 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
187 t = output + e * m_nqe, 1);
188 }
189 }
190 }
191 }
192
194 [[maybe_unused]] int coll_phys_offset) override
195 {
196 ASSERTL0(false, "Not valid for this operator.");
197 }
198
199protected:
202 int m_dim;
204
205private:
206 PhysDeriv_StdMat(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
209 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper()
210 {
211 int nqtot = pCollExp[0]->GetTotPoints();
212 m_dim = pCollExp[0]->GetShapeDimension();
213 m_coordim = pCollExp[0]->GetCoordim();
214
215 // set up a PhysDeriv StdMat.
217 for (int i = 0; i < m_dim; ++i)
218 {
219 Array<OneD, NekDouble> tmp(nqtot), tmp1(nqtot);
220 m_derivMat[i] =
222 for (int j = 0; j < nqtot; ++j)
223 {
224 Vmath::Zero(nqtot, tmp, 1);
225 tmp[j] = 1.0;
226 m_stdExp->PhysDeriv(i, tmp, tmp1);
227 Vmath::Vcopy(nqtot, &tmp1[0], 1,
228 &(m_derivMat[i]->GetPtr())[0] + j * nqtot, 1);
229 }
230 }
231 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
232 m_wspSize = 3 * nqtot * m_numElmt;
233 }
234};
235
236/// Factory initialisation for the PhysDeriv_StdMat operators
237OperatorKey PhysDeriv_StdMat::m_typeArr[] = {
240 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Seg"),
243 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Tri"),
246 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_NodalTri"),
249 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Quad"),
252 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Tet"),
255 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_NodalTet"),
258 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Pyr"),
261 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Prism"),
264 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_NodalPrism"),
267 PhysDeriv_StdMat::create, "PhysDeriv_StdMat_Hex"),
270 PhysDeriv_StdMat::create, "PhysDeriv_SumFac_Pyr")};
271
272/**
273 * @brief Phys deriv operator using matrix free operators.
274 */
275class PhysDeriv_MatrixFree final : virtual public Operator,
277 virtual public PhysDeriv_Helper
278{
279public:
281
282 ~PhysDeriv_MatrixFree() final = default;
283
284 void operator()(const Array<OneD, const NekDouble> &input,
285 Array<OneD, NekDouble> &output0,
286 Array<OneD, NekDouble> &output1,
287 Array<OneD, NekDouble> &output2,
288 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
289 {
290 if (m_isPadded)
291 {
292 // copy into padded vector
293 Vmath::Vcopy(m_nIn, input, 1, m_input, 1);
294 (*m_oper)(m_input, m_output);
295 }
296 else
297 {
298 (*m_oper)(input, m_output);
299 }
300
301 // currently using temporary local temporary space for output
302 // to allow for other operator call below which is
303 // directionally dependent
304 switch (m_coordim)
305 {
306 case 1:
307 Vmath::Vcopy(m_nOut, m_output[0], 1, output0, 1);
308 break;
309 case 2:
310 Vmath::Vcopy(m_nOut, m_output[0], 1, output0, 1);
311 Vmath::Vcopy(m_nOut, m_output[1], 1, output1, 1);
312 break;
313 case 3:
314 Vmath::Vcopy(m_nOut, m_output[0], 1, output0, 1);
315 Vmath::Vcopy(m_nOut, m_output[1], 1, output1, 1);
316 Vmath::Vcopy(m_nOut, m_output[2], 1, output2, 1);
317 break;
318 default:
319 NEKERROR(ErrorUtil::efatal, "Unknown coordinate dimension");
320 break;
321 }
322 }
323
324 void operator()(int dir, const Array<OneD, const NekDouble> &input,
326 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
327 {
328 if (m_isPadded)
329 {
330 // copy into padded vector
331 Vmath::Vcopy(m_nIn, input, 1, m_input, 1);
332 (*m_oper)(m_input, m_output);
333 }
334 else
335 {
336 (*m_oper)(input, m_output);
337 }
338 Vmath::Vcopy(m_nOut, m_output[dir], 1, output, 1);
339 }
340
342 [[maybe_unused]] int coll_phys_offset) override
343 {
344 ASSERTL0(false, "Not valid for this operator.");
345 }
346
347private:
348 std::shared_ptr<MatrixFree::PhysDeriv> m_oper;
349
350 PhysDeriv_MatrixFree(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
353 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
354 MatrixFreeOneInMultiOut(pCollExp[0]->GetCoordim(),
355 pCollExp[0]->GetStdExp()->GetTotPoints(),
356 pCollExp[0]->GetStdExp()->GetTotPoints(),
357 pCollExp.size())
358 {
359 // Check if deformed
360 bool deformed{pGeomData->IsDeformed(pCollExp)};
361 const auto dim = pCollExp[0]->GetStdExp()->GetShapeDimension();
362
363 if (m_isPadded == false) // declare local space non-padded case
364 {
365 int nOut = pCollExp[0]->GetStdExp()->GetTotPoints();
368 if (m_coordim == 2)
369 {
371 }
372 else if (m_coordim == 3)
373 {
376 }
377 }
378
379 // Basis vector.
380 std::vector<LibUtilities::BasisSharedPtr> basis(dim);
381 for (unsigned int i = 0; i < dim; ++i)
382 {
383 basis[i] = pCollExp[0]->GetBasis(i);
384 }
385
386 // Get shape type
387 auto shapeType = pCollExp[0]->GetStdExp()->DetShapeType();
388
389 // Generate operator string and create operator.
390 std::string op_string = "PhysDeriv";
391 op_string += MatrixFree::GetOpstring(shapeType, deformed);
393 op_string, basis, m_nElmtPad);
394
395 // Set derivative factors
396 oper->SetDF(pGeomData->GetDerivFactorsInterLeave(pCollExp, m_nElmtPad));
397
398 m_oper = std::dynamic_pointer_cast<MatrixFree::PhysDeriv>(oper);
399 ASSERTL0(m_oper, "Failed to cast pointer.");
400 }
401};
402
403/// Factory initialisation for the PhysDeriv_MatrixFree operators
404OperatorKey PhysDeriv_MatrixFree::m_typeArr[] = {
407 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Seg"),
410 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Tri"),
413 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Quad"),
416 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Hex"),
419 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Prism"),
422 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Pyr"),
425 PhysDeriv_MatrixFree::create, "PhysDeriv_MatrixFree_Tet")
426
427};
428
429/**
430 * @brief Phys deriv operator using element-wise operation
431 */
432class PhysDeriv_IterPerExp final : virtual public Operator,
433 virtual public PhysDeriv_Helper
434{
435public:
437
438 ~PhysDeriv_IterPerExp() final = default;
439
440 void operator()(const Array<OneD, const NekDouble> &input,
441 Array<OneD, NekDouble> &output0,
442 Array<OneD, NekDouble> &output1,
443 Array<OneD, NekDouble> &output2,
444 Array<OneD, NekDouble> &wsp) final
445 {
446 int nPhys = m_stdExp->GetTotPoints();
447 int ntot = m_numElmt * nPhys;
448 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
451 out[0] = output0;
452 out[1] = output1;
453 out[2] = output2;
454
455 for (int i = 0; i < m_dim; ++i)
456 {
457 Diff[i] = wsp + i * ntot;
458 }
459
460 // calculate local derivatives
461 for (int i = 0; i < m_numElmt; ++i)
462 {
463 m_stdExp->PhysDeriv(input + i * nPhys, tmp0 = Diff[0] + i * nPhys,
464 tmp1 = Diff[1] + i * nPhys,
465 tmp2 = Diff[2] + i * nPhys);
466 }
467
468 // calculate full derivative
469 if (m_isDeformed)
470 {
471 for (int i = 0; i < m_coordim; ++i)
472 {
473 Vmath::Vmul(ntot, m_derivFac[i * m_dim], 1, Diff[0], 1, out[i],
474 1);
475 for (int j = 1; j < m_dim; ++j)
476 {
477 Vmath::Vvtvp(ntot, m_derivFac[i * m_dim + j], 1, Diff[j], 1,
478 out[i], 1, out[i], 1);
479 }
480 }
481 }
482 else
483 {
485 for (int e = 0; e < m_numElmt; ++e)
486 {
487 for (int i = 0; i < m_coordim; ++i)
488 {
490 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
491 1);
492 for (int j = 1; j < m_dim; ++j)
493 {
494 Vmath::Svtvp(m_nqe, m_derivFac[i * m_dim + j][e],
495 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
496 1, t = out[i] + e * m_nqe, 1);
497 }
498 }
499 }
500 }
501 }
502
503 void operator()(int dir, const Array<OneD, const NekDouble> &input,
505 Array<OneD, NekDouble> &wsp) final
506 {
507 int nPhys = m_stdExp->GetTotPoints();
508 int ntot = m_numElmt * nPhys;
509 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
511
512 for (int i = 0; i < m_dim; ++i)
513 {
514 Diff[i] = wsp + i * ntot;
515 }
516
517 // calculate local derivatives
518 for (int i = 0; i < m_numElmt; ++i)
519 {
520 m_stdExp->PhysDeriv(input + i * nPhys, tmp0 = Diff[0] + i * nPhys,
521 tmp1 = Diff[1] + i * nPhys,
522 tmp2 = Diff[2] + i * nPhys);
523 }
524
525 Vmath::Zero(ntot, output, 1);
526 if (m_isDeformed)
527 {
528 for (int j = 0; j < m_dim; ++j)
529 {
530 Vmath::Vvtvp(ntot, m_derivFac[dir * m_dim + j], 1, Diff[j], 1,
531 output, 1, output, 1);
532 }
533 }
534 else
535 {
537 for (int e = 0; e < m_numElmt; ++e)
538 {
539 for (int j = 0; j < m_dim; ++j)
540 {
541 Vmath::Svtvp(m_nqe, m_derivFac[dir * m_dim + j][e],
542 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
543 t = output + e * m_nqe, 1);
544 }
545 }
546 }
547 }
548
550 [[maybe_unused]] int coll_phys_offset) override
551 {
552 ASSERTL0(false, "Not valid for this operator.");
553 }
554
555protected:
557 int m_dim;
559
560private:
561 PhysDeriv_IterPerExp(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
564 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper()
565 {
566 int nqtot = pCollExp[0]->GetTotPoints();
567 m_dim = pCollExp[0]->GetShapeDimension();
568 m_coordim = pCollExp[0]->GetCoordim();
569
570 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
571 m_wspSize = 3 * nqtot * m_numElmt;
572 }
573};
574
575/// Factory initialisation for the PhysDeriv_IterPerExp operators
576OperatorKey PhysDeriv_IterPerExp::m_typeArr[] = {
579 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Seg"),
582 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Tri"),
585 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_NodalTri"),
588 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Quad"),
591 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Tet"),
594 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_NodalTet"),
597 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Pyr"),
600 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Prism"),
603 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_NodalPrism"),
606 PhysDeriv_IterPerExp::create, "PhysDeriv_IterPerExp_Hex")};
607
608/**
609 * @brief Phys deriv operator using original LocalRegions implementation.
610 */
611class PhysDeriv_NoCollection final : virtual public Operator,
612 virtual public PhysDeriv_Helper
613{
614public:
616
617 ~PhysDeriv_NoCollection() final = default;
618
619 void operator()(const Array<OneD, const NekDouble> &input,
620 Array<OneD, NekDouble> &output0,
621 Array<OneD, NekDouble> &output1,
622 Array<OneD, NekDouble> &output2,
623 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
624 {
625 const int nPhys = m_expList[0]->GetTotPoints();
626 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
627
628 // calculate local derivatives
629 switch (m_expList[0]->GetShapeDimension())
630 {
631 case 1:
632 {
633 for (int i = 0; i < m_numElmt; ++i)
634 {
635 m_expList[i]->PhysDeriv(input + i * nPhys,
636 tmp0 = output0 + i * nPhys);
637 }
638 break;
639 }
640 case 2:
641 {
642 for (int i = 0; i < m_numElmt; ++i)
643 {
644 m_expList[i]->PhysDeriv(input + i * nPhys,
645 tmp0 = output0 + i * nPhys,
646 tmp1 = output1 + i * nPhys);
647 }
648 break;
649 }
650 case 3:
651 {
652 for (int i = 0; i < m_numElmt; ++i)
653 {
654 m_expList[i]->PhysDeriv(
655 input + i * nPhys, tmp0 = output0 + i * nPhys,
656 tmp1 = output1 + i * nPhys, tmp2 = output2 + i * nPhys);
657 }
658 break;
659 }
660 default:
661 ASSERTL0(false, "Unknown dimension.");
662 }
663 }
664
665 void operator()(int dir, const Array<OneD, const NekDouble> &input,
667 [[maybe_unused]] Array<OneD, NekDouble> &wsp) final
668 {
669 const int nPhys = m_expList[0]->GetTotPoints();
671
672 // calculate local derivatives
673 for (int i = 0; i < m_numElmt; ++i)
674 {
675 m_expList[i]->PhysDeriv(dir, input + i * nPhys,
676 tmp = output + i * nPhys);
677 }
678 }
679
681 [[maybe_unused]] int coll_phys_offset) override
682 {
683 ASSERTL0(false, "Not valid for this operator.");
684 }
685
686protected:
687 vector<StdRegions::StdExpansionSharedPtr> m_expList;
688
689private:
690 PhysDeriv_NoCollection(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
693 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper()
694 {
695 m_expList = pCollExp;
696 }
697};
698
699/// Factory initialisation for the PhysDeriv_NoCollection operators
700OperatorKey PhysDeriv_NoCollection::m_typeArr[] = {
703 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Seg"),
706 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Tri"),
709 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_NodalTri"),
712 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Quad"),
715 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Tet"),
718 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_NodalTet"),
721 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Pyr"),
724 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Prism"),
727 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_NodalPrism"),
730 PhysDeriv_NoCollection::create, "PhysDeriv_NoCollection_Hex")};
731
732/**
733 * @brief Phys deriv operator using sum-factorisation (Segment)
734 */
735class PhysDeriv_SumFac_Seg final : virtual public Operator,
736 virtual public PhysDeriv_Helper
737{
738public:
740
741 ~PhysDeriv_SumFac_Seg() final = default;
742
743 void operator()(const Array<OneD, const NekDouble> &input,
744 Array<OneD, NekDouble> &output0,
745 Array<OneD, NekDouble> &output1,
746 Array<OneD, NekDouble> &output2,
747 Array<OneD, NekDouble> &wsp) final
748 {
749 const int nqcol = m_nquad0 * m_numElmt;
750
751 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
752 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
753
754 Array<OneD, NekDouble> diff0(nqcol, wsp);
755
757 m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
758 m_nquad0);
759
760 if (m_isDeformed)
761 {
762 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
763
764 if (m_coordim == 2)
765 {
766 Vmath::Vmul(nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
767 }
768 else if (m_coordim == 3)
769 {
770 Vmath::Vmul(nqcol, m_derivFac[1], 1, diff0, 1, output1, 1);
771 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output2, 1);
772 }
773 }
774 else
775 {
777 for (int e = 0; e < m_numElmt; ++e)
778 {
779 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
780 t = output0 + e * m_nqe, 1);
781 }
782
783 if (m_coordim == 2)
784 {
785 for (int e = 0; e < m_numElmt; ++e)
786 {
787 Vmath::Smul(m_nqe, m_derivFac[1][e], diff0 + e * m_nqe, 1,
788 t = output1 + e * m_nqe, 1);
789 }
790 }
791 else if (m_coordim == 3)
792 {
793 for (int e = 0; e < m_numElmt; ++e)
794 {
795 Vmath::Smul(m_nqe, m_derivFac[1][e], diff0 + e * m_nqe, 1,
796 t = output1 + e * m_nqe, 1);
797 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
798 t = output2 + e * m_nqe, 1);
799 }
800 }
801 }
802 }
803
804 void operator()(int dir, const Array<OneD, const NekDouble> &input,
806 Array<OneD, NekDouble> &wsp) final
807 {
808 const int nqcol = m_nquad0 * m_numElmt;
809
810 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
811 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
812
813 Array<OneD, NekDouble> diff0(nqcol, wsp);
814
816 m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
817 m_nquad0);
818
819 if (m_isDeformed)
820 {
821 Vmath::Vmul(nqcol, m_derivFac[dir], 1, diff0, 1, output, 1);
822 }
823 else
824 {
826 for (int e = 0; e < m_numElmt; ++e)
827 {
828 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
829 t = output + e * m_nqe, 1);
830 }
831 }
832 }
833
835 [[maybe_unused]] int coll_phys_offset) override
836 {
837 ASSERTL0(false, "Not valid for this operator.");
838 }
839
840protected:
842 const int m_nquad0;
845
846private:
847 PhysDeriv_SumFac_Seg(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
850 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
851 m_nquad0(m_stdExp->GetNumPoints(0))
852 {
853 m_coordim = pCollExp[0]->GetCoordim();
854
855 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
856
857 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
859 }
860};
861
862/// Factory initialisation for the PhysDeriv_SumFac_Seg operators
863OperatorKey PhysDeriv_SumFac_Seg::m_type =
866 PhysDeriv_SumFac_Seg::create, "PhysDeriv_SumFac_Seg");
867
868/**
869 * @brief Phys deriv operator using sum-factorisation (Quad)
870 */
871class PhysDeriv_SumFac_Quad final : virtual public Operator,
872 virtual public PhysDeriv_Helper
873{
874public:
876
877 ~PhysDeriv_SumFac_Quad() final = default;
878
879 void operator()(const Array<OneD, const NekDouble> &input,
880 Array<OneD, NekDouble> &output0,
881 Array<OneD, NekDouble> &output1,
882 Array<OneD, NekDouble> &output2,
883 Array<OneD, NekDouble> &wsp) final
884 {
885 const int nqtot = m_nquad0 * m_nquad1;
886 const int nqcol = nqtot * m_numElmt;
887
888 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
889 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
890
891 Array<OneD, NekDouble> diff0(nqcol, wsp);
892 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
893
895 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
896 m_nquad0);
897
898 int cnt = 0;
899 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
900 {
901 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
902 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
903 diff1.get() + cnt, m_nquad0);
904 }
905
906 if (m_isDeformed)
907 {
908 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
909 Vmath::Vvtvp(nqcol, m_derivFac[1], 1, diff1, 1, output0, 1, output0,
910 1);
911 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
912 Vmath::Vvtvp(nqcol, m_derivFac[3], 1, diff1, 1, output1, 1, output1,
913 1);
914
915 if (m_coordim == 3)
916 {
917 Vmath::Vmul(nqcol, m_derivFac[4], 1, diff0, 1, output2, 1);
918 Vmath::Vvtvp(nqcol, m_derivFac[5], 1, diff1, 1, output2, 1,
919 output2, 1);
920 }
921 }
922 else
923 {
925 for (int e = 0; e < m_numElmt; ++e)
926 {
927 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
928 t = output0 + e * m_nqe, 1);
929 Vmath::Svtvp(m_nqe, m_derivFac[1][e], diff1 + e * m_nqe, 1,
930 output0 + e * m_nqe, 1, t = output0 + e * m_nqe,
931 1);
932
933 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
934 t = output1 + e * m_nqe, 1);
935 Vmath::Svtvp(m_nqe, m_derivFac[3][e], diff1 + e * m_nqe, 1,
936 output1 + e * m_nqe, 1, t = output1 + e * m_nqe,
937 1);
938 }
939
940 if (m_coordim == 3)
941 {
942 for (int e = 0; e < m_numElmt; ++e)
943 {
944 Vmath::Smul(m_nqe, m_derivFac[4][e], diff0 + e * m_nqe, 1,
945 t = output2 + e * m_nqe, 1);
946 Vmath::Svtvp(m_nqe, m_derivFac[5][e], diff1 + e * m_nqe, 1,
947 output2 + e * m_nqe, 1,
948 t = output2 + e * m_nqe, 1);
949 }
950 }
951 }
952 }
953
954 void operator()(int dir, const Array<OneD, const NekDouble> &input,
956 Array<OneD, NekDouble> &wsp) final
957 {
958 const int nqtot = m_nquad0 * m_nquad1;
959 const int nqcol = nqtot * m_numElmt;
960
961 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
962 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
963
964 Array<OneD, NekDouble> diff0(nqcol, wsp);
965 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
966
968 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
969 m_nquad0);
970
971 int cnt = 0;
972 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
973 {
974 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
975 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
976 diff1.get() + cnt, m_nquad0);
977 }
978
979 if (m_isDeformed)
980 {
981 Vmath::Vmul(nqcol, m_derivFac[2 * dir], 1, diff0, 1, output, 1);
982 Vmath::Vvtvp(nqcol, m_derivFac[2 * dir + 1], 1, diff1, 1, output, 1,
983 output, 1);
984 }
985 else
986 {
988 for (int e = 0; e < m_numElmt; ++e)
989 {
990 Vmath::Smul(m_nqe, m_derivFac[2 * dir][e], diff0 + e * m_nqe, 1,
991 t = output + e * m_nqe, 1);
992 Vmath::Svtvp(m_nqe, m_derivFac[2 * dir + 1][e],
993 diff1 + e * m_nqe, 1, output + e * m_nqe, 1,
994 t = output + e * m_nqe, 1);
995 }
996 }
997 }
998
1000 [[maybe_unused]] int coll_phys_offset) override
1001 {
1002 ASSERTL0(false, "Not valid for this operator.");
1003 }
1004
1005protected:
1007 const int m_nquad0;
1008 const int m_nquad1;
1012
1013private:
1014 PhysDeriv_SumFac_Quad(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1017 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1018 m_nquad0(m_stdExp->GetNumPoints(0)),
1019 m_nquad1(m_stdExp->GetNumPoints(1))
1020 {
1021 m_coordim = pCollExp[0]->GetCoordim();
1022
1023 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1024
1025 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1026 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1028 }
1029};
1030
1031/// Factory initialisation for the PhysDeriv_SumFac_Quad operators
1032OperatorKey PhysDeriv_SumFac_Quad::m_type =
1035 PhysDeriv_SumFac_Quad::create, "PhysDeriv_SumFac_Quad");
1036
1037/**
1038 * @brief Phys deriv operator using sum-factorisation (Tri)
1039 */
1040class PhysDeriv_SumFac_Tri final : virtual public Operator,
1041 virtual public PhysDeriv_Helper
1042{
1043public:
1045
1046 ~PhysDeriv_SumFac_Tri() final = default;
1047
1048 void operator()(const Array<OneD, const NekDouble> &input,
1049 Array<OneD, NekDouble> &output0,
1050 Array<OneD, NekDouble> &output1,
1051 Array<OneD, NekDouble> &output2,
1052 Array<OneD, NekDouble> &wsp) final
1053 {
1054 const int nqtot = m_nquad0 * m_nquad1;
1055 const int nqcol = nqtot * m_numElmt;
1056
1057 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
1058 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
1059
1060 Array<OneD, NekDouble> diff0(nqcol, wsp);
1061 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
1062
1063 // Tensor Product Derivative
1064 Blas::Dgemm('N', 'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
1065 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
1066 m_nquad0);
1067
1068 int cnt = 0;
1069 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
1070 {
1071 // scale diff0 by geometric factor: 2/(1-z1)
1072 Vmath::Vmul(nqtot, &m_fac1[0], 1, diff0.get() + cnt, 1,
1073 diff0.get() + cnt, 1);
1074
1075 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1076 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
1077 diff1.get() + cnt, m_nquad0);
1078
1079 // add to diff1 by diff0 scaled by: (1_z0)/(1-z1)
1080 Vmath::Vvtvp(nqtot, m_fac0.get(), 1, diff0.get() + cnt, 1,
1081 diff1.get() + cnt, 1, diff1.get() + cnt, 1);
1082 }
1083
1084 if (m_isDeformed)
1085 {
1086 Vmath::Vmul(nqcol, m_derivFac[0], 1, diff0, 1, output0, 1);
1087 Vmath::Vvtvp(nqcol, m_derivFac[1], 1, diff1, 1, output0, 1, output0,
1088 1);
1089 Vmath::Vmul(nqcol, m_derivFac[2], 1, diff0, 1, output1, 1);
1090 Vmath::Vvtvp(nqcol, m_derivFac[3], 1, diff1, 1, output1, 1, output1,
1091 1);
1092
1093 if (m_coordim == 3)
1094 {
1095 Vmath::Vmul(nqcol, m_derivFac[4], 1, diff0, 1, output2, 1);
1096 Vmath::Vvtvp(nqcol, m_derivFac[5], 1, diff1, 1, output2, 1,
1097 output2, 1);
1098 }
1099 }
1100 else
1101 {
1103 for (int e = 0; e < m_numElmt; ++e)
1104 {
1105 Vmath::Smul(m_nqe, m_derivFac[0][e], diff0 + e * m_nqe, 1,
1106 t = output0 + e * m_nqe, 1);
1107 Vmath::Svtvp(m_nqe, m_derivFac[1][e], diff1 + e * m_nqe, 1,
1108 output0 + e * m_nqe, 1, t = output0 + e * m_nqe,
1109 1);
1110
1111 Vmath::Smul(m_nqe, m_derivFac[2][e], diff0 + e * m_nqe, 1,
1112 t = output1 + e * m_nqe, 1);
1113 Vmath::Svtvp(m_nqe, m_derivFac[3][e], diff1 + e * m_nqe, 1,
1114 output1 + e * m_nqe, 1, t = output1 + e * m_nqe,
1115 1);
1116 }
1117
1118 if (m_coordim == 3)
1119 {
1120 for (int e = 0; e < m_numElmt; ++e)
1121 {
1122 Vmath::Smul(m_nqe, m_derivFac[4][e], diff0 + e * m_nqe, 1,
1123 t = output2 + e * m_nqe, 1);
1124 Vmath::Svtvp(m_nqe, m_derivFac[5][e], diff1 + e * m_nqe, 1,
1125 output2 + e * m_nqe, 1,
1126 t = output2 + e * m_nqe, 1);
1127 }
1128 }
1129 }
1130 }
1131
1132 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1133 Array<OneD, NekDouble> &output,
1134 Array<OneD, NekDouble> &wsp) final
1135 {
1136 const int nqtot = m_nquad0 * m_nquad1;
1137 const int nqcol = nqtot * m_numElmt;
1138
1139 ASSERTL1(wsp.size() == m_wspSize, "Incorrect workspace size");
1140 ASSERTL1(input.size() >= nqcol, "Incorrect input size");
1141
1142 Array<OneD, NekDouble> diff0(nqcol, wsp);
1143 Array<OneD, NekDouble> diff1(nqcol, wsp + nqcol);
1144
1145 // Tensor Product Derivative
1146 Blas::Dgemm('N', 'N', m_nquad0, m_nquad1 * m_numElmt, m_nquad0, 1.0,
1147 m_Deriv0, m_nquad0, input.get(), m_nquad0, 0.0, diff0.get(),
1148 m_nquad0);
1149
1150 int cnt = 0;
1151 for (int i = 0; i < m_numElmt; ++i, cnt += nqtot)
1152 {
1153 // scale diff0 by geometric factor: 2/(1-z1)
1154 Vmath::Vmul(nqtot, &m_fac1[0], 1, diff0.get() + cnt, 1,
1155 diff0.get() + cnt, 1);
1156
1157 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1158 input.get() + cnt, m_nquad0, m_Deriv1, m_nquad1, 0.0,
1159 diff1.get() + cnt, m_nquad0);
1160
1161 // add to diff1 by diff0 scaled by: (1_z0)/(1-z1)
1162 Vmath::Vvtvp(nqtot, m_fac0.get(), 1, diff0.get() + cnt, 1,
1163 diff1.get() + cnt, 1, diff1.get() + cnt, 1);
1164 }
1165
1166 if (m_isDeformed)
1167 {
1168 Vmath::Vmul(nqcol, m_derivFac[2 * dir], 1, diff0, 1, output, 1);
1169 Vmath::Vvtvp(nqcol, m_derivFac[2 * dir + 1], 1, diff1, 1, output, 1,
1170 output, 1);
1171 }
1172 else
1173 {
1175 for (int e = 0; e < m_numElmt; ++e)
1176 {
1177 Vmath::Smul(m_nqe, m_derivFac[2 * dir][e], diff0 + e * m_nqe, 1,
1178 t = output + e * m_nqe, 1);
1179 Vmath::Svtvp(m_nqe, m_derivFac[2 * dir + 1][e],
1180 diff1 + e * m_nqe, 1, output + e * m_nqe, 1,
1181 t = output + e * m_nqe, 1);
1182 }
1183 }
1184 }
1185
1187 [[maybe_unused]] int coll_phys_offset) override
1188 {
1189 ASSERTL0(false, "Not valid for this operator.");
1190 }
1191
1192protected:
1194 const int m_nquad0;
1195 const int m_nquad1;
1201
1202private:
1203 PhysDeriv_SumFac_Tri(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1206 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1207 m_nquad0(m_stdExp->GetNumPoints(0)),
1208 m_nquad1(m_stdExp->GetNumPoints(1))
1209 {
1210 m_coordim = pCollExp[0]->GetCoordim();
1211
1212 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1213
1214 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
1215 const Array<OneD, const NekDouble> &z1 = m_stdExp->GetBasis(1)->GetZ();
1217 // set up geometric factor: 0.5*(1+z0)
1218 for (int i = 0; i < m_nquad0; ++i)
1219 {
1220 for (int j = 0; j < m_nquad1; ++j)
1221 {
1222 m_fac0[i + j * m_nquad0] = 0.5 * (1 + z0[i]);
1223 }
1224 }
1225
1227 // set up geometric factor: 2/(1-z1)
1228 for (int i = 0; i < m_nquad0; ++i)
1229 {
1230 for (int j = 0; j < m_nquad1; ++j)
1231 {
1232 m_fac1[i + j * m_nquad0] = 2.0 / (1 - z1[j]);
1233 }
1234 }
1235
1236 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1237 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1239 }
1240};
1241
1242/// Factory initialisation for the PhysDeriv_SumFac_Tri operators
1243OperatorKey PhysDeriv_SumFac_Tri::m_typeArr[] = {
1246 PhysDeriv_SumFac_Tri::create, "PhysDeriv_SumFac_Tri"),
1249 PhysDeriv_SumFac_Tri::create, "PhysDeriv_SumFac_NodalTri")};
1250
1251/**
1252 * @brief Phys deriv operator using sum-factorisation (Hex)
1253 */
1254class PhysDeriv_SumFac_Hex final : virtual public Operator,
1255 virtual public PhysDeriv_Helper
1256{
1257public:
1259
1260 ~PhysDeriv_SumFac_Hex() final = default;
1261
1262 void operator()(const Array<OneD, const NekDouble> &input,
1263 Array<OneD, NekDouble> &output0,
1264 Array<OneD, NekDouble> &output1,
1265 Array<OneD, NekDouble> &output2,
1266 Array<OneD, NekDouble> &wsp) final
1267 {
1268 int nPhys = m_stdExp->GetTotPoints();
1269 int ntot = m_numElmt * nPhys;
1270 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1273 out[0] = output0;
1274 out[1] = output1;
1275 out[2] = output2;
1276
1277 for (int i = 0; i < 3; ++i)
1278 {
1279 Diff[i] = wsp + i * ntot;
1280 }
1281
1283 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1284 &Diff[0][0], m_nquad0);
1285
1286 for (int i = 0; i < m_numElmt; ++i)
1287 {
1288 for (int j = 0; j < m_nquad2; ++j)
1289 {
1290 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1291 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1292 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1293 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1294 m_nquad0);
1295 }
1296
1297 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1298 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1299 m_nquad2, 0.0, &Diff[2][i * nPhys],
1300 m_nquad0 * m_nquad1);
1301 }
1302
1303 // calculate full derivative
1304 if (m_isDeformed)
1305 {
1306 for (int i = 0; i < m_coordim; ++i)
1307 {
1308 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1309 for (int j = 1; j < 3; ++j)
1310 {
1311 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1312 out[i], 1, out[i], 1);
1313 }
1314 }
1315 }
1316 else
1317 {
1319 for (int e = 0; e < m_numElmt; ++e)
1320 {
1321 for (int i = 0; i < m_coordim; ++i)
1322 {
1323 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
1324 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1325 1);
1326
1327 for (int j = 1; j < 3; ++j)
1328 {
1329 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
1330 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1331 1, t = out[i] + e * m_nqe, 1);
1332 }
1333 }
1334 }
1335 }
1336 }
1337
1338 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1339 Array<OneD, NekDouble> &output,
1340 Array<OneD, NekDouble> &wsp) final
1341 {
1342 int nPhys = m_stdExp->GetTotPoints();
1343 int ntot = m_numElmt * nPhys;
1344 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1346
1347 for (int i = 0; i < 3; ++i)
1348 {
1349 Diff[i] = wsp + i * ntot;
1350 }
1351
1353 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1354 &Diff[0][0], m_nquad0);
1355
1356 for (int i = 0; i < m_numElmt; ++i)
1357 {
1358 for (int j = 0; j < m_nquad2; ++j)
1359 {
1360 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1361 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1362 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1363 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1364 m_nquad0);
1365 }
1366
1367 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1368 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1369 m_nquad2, 0.0, &Diff[2][i * nPhys],
1370 m_nquad0 * m_nquad1);
1371 }
1372
1373 // calculate full derivative
1374 if (m_isDeformed)
1375 {
1376 // calculate full derivative
1377 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1378 for (int j = 1; j < 3; ++j)
1379 {
1380 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1381 output, 1, output, 1);
1382 }
1383 }
1384 else
1385 {
1387 for (int e = 0; e < m_numElmt; ++e)
1388 {
1389 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1390 1, t = output + e * m_nqe, 1);
1391
1392 for (int j = 1; j < 3; ++j)
1393 {
1394 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
1395 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1396 t = output + e * m_nqe, 1);
1397 }
1398 }
1399 }
1400 }
1401
1403 [[maybe_unused]] int coll_phys_offset) override
1404 {
1405 ASSERTL0(false, "Not valid for this operator.");
1406 }
1407
1408protected:
1411 const int m_nquad0;
1412 const int m_nquad1;
1413 const int m_nquad2;
1417
1418private:
1419 PhysDeriv_SumFac_Hex(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1422 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1423 m_nquad0(m_stdExp->GetNumPoints(0)),
1424 m_nquad1(m_stdExp->GetNumPoints(1)),
1425 m_nquad2(m_stdExp->GetNumPoints(2))
1426 {
1427 m_coordim = pCollExp[0]->GetCoordim();
1428
1429 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1430
1431 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1432 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1433 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1434
1436 }
1437};
1438
1439/// Factory initialisation for the PhysDeriv_SumFac_Hex operators
1440OperatorKey PhysDeriv_SumFac_Hex::m_typeArr[] = {
1443 PhysDeriv_SumFac_Hex::create, "PhysDeriv_SumFac_Hex")};
1444
1445/**
1446 * @brief Phys deriv operator using sum-factorisation (Tet)
1447 */
1448class PhysDeriv_SumFac_Tet final : virtual public Operator,
1449 virtual public PhysDeriv_Helper
1450{
1451public:
1453
1454 ~PhysDeriv_SumFac_Tet() final = default;
1455
1456 void operator()(const Array<OneD, const NekDouble> &input,
1457 Array<OneD, NekDouble> &output0,
1458 Array<OneD, NekDouble> &output1,
1459 Array<OneD, NekDouble> &output2,
1460 Array<OneD, NekDouble> &wsp) final
1461 {
1462 int nPhys = m_stdExp->GetTotPoints();
1463 int ntot = m_numElmt * nPhys;
1464 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1467 out[0] = output0;
1468 out[1] = output1;
1469 out[2] = output2;
1470
1471 for (int i = 0; i < 3; ++i)
1472 {
1473 Diff[i] = wsp + i * ntot;
1474 }
1475
1476 // dEta0
1478 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1479 &Diff[0][0], m_nquad0);
1480
1481 // dEta2
1482 for (int i = 0; i < m_numElmt; ++i)
1483 {
1484 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1485 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1486 m_nquad2, 0.0, &Diff[2][i * nPhys],
1487 m_nquad0 * m_nquad1);
1488 }
1489
1490 for (int i = 0; i < m_numElmt; ++i)
1491 {
1492 // dEta1
1493 for (int j = 0; j < m_nquad2; ++j)
1494 {
1495 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1496 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1497 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1498 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1499 m_nquad0);
1500 }
1501
1502 // dxi2 = (1 + eta_1)/(1 -eta_2)*dEta1 + dEta2
1503 Vmath::Vvtvp(nPhys, m_fac3.get(), 1, Diff[1].get() + i * nPhys, 1,
1504 Diff[2].get() + i * nPhys, 1,
1505 Diff[2].get() + i * nPhys, 1);
1506
1507 // dxi1 = 2/(1 - eta_2) dEta1
1508 Vmath::Vmul(nPhys, m_fac2.get(), 1, Diff[1].get() + i * nPhys, 1,
1509 Diff[1].get() + i * nPhys, 1);
1510
1511 // dxi1 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi1
1512 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1513 Diff[1].get() + i * nPhys, 1,
1514 Diff[1].get() + i * nPhys, 1);
1515
1516 // dxi2 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi2
1517 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1518 Diff[2].get() + i * nPhys, 1,
1519 Diff[2].get() + i * nPhys, 1);
1520
1521 // dxi0 = 4.0/((1-eta_1)(1-eta_2)) dEta0
1522 Vmath::Vmul(nPhys, m_fac0.get(), 1, Diff[0].get() + i * nPhys, 1,
1523 Diff[0].get() + i * nPhys, 1);
1524 }
1525
1526 // calculate full derivative
1527 if (m_isDeformed)
1528 {
1529 for (int i = 0; i < m_coordim; ++i)
1530 {
1531 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1532 for (int j = 1; j < 3; ++j)
1533 {
1534 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1535 out[i], 1, out[i], 1);
1536 }
1537 }
1538 }
1539 else
1540 {
1542 for (int e = 0; e < m_numElmt; ++e)
1543 {
1544 for (int i = 0; i < m_coordim; ++i)
1545 {
1546 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
1547 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1548 1);
1549 for (int j = 1; j < 3; ++j)
1550 {
1551 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
1552 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1553 1, t = out[i] + e * m_nqe, 1);
1554 }
1555 }
1556 }
1557 }
1558 }
1559
1560 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1561 Array<OneD, NekDouble> &output,
1562 Array<OneD, NekDouble> &wsp) final
1563 {
1564 int nPhys = m_stdExp->GetTotPoints();
1565 int ntot = m_numElmt * nPhys;
1566 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1568
1569 for (int i = 0; i < 3; ++i)
1570 {
1571 Diff[i] = wsp + i * ntot;
1572 }
1573
1574 // dEta0
1576 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1577 &Diff[0][0], m_nquad0);
1578
1579 // dEta2
1580 for (int i = 0; i < m_numElmt; ++i)
1581 {
1582 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1583 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1584 m_nquad2, 0.0, &Diff[2][i * nPhys],
1585 m_nquad0 * m_nquad1);
1586 }
1587
1588 for (int i = 0; i < m_numElmt; ++i)
1589 {
1590 // dEta1
1591 for (int j = 0; j < m_nquad2; ++j)
1592 {
1593 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1594 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1595 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1596 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1597 m_nquad0);
1598 }
1599
1600 // dxi2 = (1 + eta_1)/(1 -eta_2)*dEta1 + dEta2
1601 Vmath::Vvtvp(nPhys, m_fac3.get(), 1, Diff[1].get() + i * nPhys, 1,
1602 Diff[2].get() + i * nPhys, 1,
1603 Diff[2].get() + i * nPhys, 1);
1604
1605 // dxi1 = 2/(1 - eta_2) dEta1
1606 Vmath::Vmul(nPhys, m_fac2.get(), 1, Diff[1].get() + i * nPhys, 1,
1607 Diff[1].get() + i * nPhys, 1);
1608
1609 // dxi1 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi1
1610 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1611 Diff[1].get() + i * nPhys, 1,
1612 Diff[1].get() + i * nPhys, 1);
1613
1614 // dxi2 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi2
1615 Vmath::Vvtvp(nPhys, m_fac1.get(), 1, Diff[0].get() + i * nPhys, 1,
1616 Diff[2].get() + i * nPhys, 1,
1617 Diff[2].get() + i * nPhys, 1);
1618
1619 // dxi0 = 4.0/((1-eta_1)(1-eta_2)) dEta0
1620 Vmath::Vmul(nPhys, m_fac0.get(), 1, Diff[0].get() + i * nPhys, 1,
1621 Diff[0].get() + i * nPhys, 1);
1622 }
1623
1624 // calculate full derivative
1625 if (m_isDeformed)
1626 {
1627 // calculate full derivative
1628 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1629 for (int j = 1; j < 3; ++j)
1630 {
1631 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1632 output, 1, output, 1);
1633 }
1634 }
1635 else
1636 {
1638 for (int e = 0; e < m_numElmt; ++e)
1639 {
1640 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1641 1, t = output + e * m_nqe, 1);
1642 for (int j = 1; j < 3; ++j)
1643 {
1644 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
1645 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1646 t = output + e * m_nqe, 1);
1647 }
1648 }
1649 }
1650 }
1651
1653 [[maybe_unused]] int coll_phys_offset) override
1654 {
1655 ASSERTL0(false, "Not valid for this operator.");
1656 }
1657
1658protected:
1661 const int m_nquad0;
1662 const int m_nquad1;
1663 const int m_nquad2;
1671
1672private:
1673 PhysDeriv_SumFac_Tet(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1676 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1677 m_nquad0(m_stdExp->GetNumPoints(0)),
1678 m_nquad1(m_stdExp->GetNumPoints(1)),
1679 m_nquad2(m_stdExp->GetNumPoints(2))
1680 {
1681 m_coordim = pCollExp[0]->GetCoordim();
1682
1683 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1684
1685 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1686 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1687 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1688
1690
1691 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
1692 const Array<OneD, const NekDouble> &z1 = m_stdExp->GetBasis(1)->GetZ();
1693 const Array<OneD, const NekDouble> &z2 = m_stdExp->GetBasis(2)->GetZ();
1694
1699
1700 // calculate 2.0/((1-eta_1)(1-eta_2))
1701 for (int i = 0; i < m_nquad0; ++i)
1702 {
1703 for (int j = 0; j < m_nquad1; ++j)
1704 {
1705 for (int k = 0; k < m_nquad2; ++k)
1706 {
1707 m_fac0[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1708 4.0 / ((1 - z1[j]) * (1 - z2[k]));
1709 m_fac1[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1710 2.0 * (1 + z0[i]) / ((1 - z1[j]) * (1 - z2[k]));
1711 m_fac2[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1712 2.0 / (1 - z2[k]);
1713 m_fac3[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1714 (1 + z1[j]) / (1 - z2[k]);
1715 }
1716 }
1717 }
1718 }
1719};
1720
1721/// Factory initialisation for the PhysDeriv_SumFac_Tet operators
1722OperatorKey PhysDeriv_SumFac_Tet::m_typeArr[] = {
1725 PhysDeriv_SumFac_Tet::create, "PhysDeriv_SumFac_Tet")};
1726
1727/**
1728 * @brief Phys deriv operator using sum-factorisation (Prism)
1729 */
1730class PhysDeriv_SumFac_Prism final : virtual public Operator,
1731 virtual public PhysDeriv_Helper
1732{
1733public:
1735
1736 ~PhysDeriv_SumFac_Prism() final = default;
1737
1738 void operator()(const Array<OneD, const NekDouble> &input,
1739 Array<OneD, NekDouble> &output0,
1740 Array<OneD, NekDouble> &output1,
1741 Array<OneD, NekDouble> &output2,
1742 Array<OneD, NekDouble> &wsp) final
1743 {
1744 int nPhys = m_stdExp->GetTotPoints();
1745 int ntot = m_numElmt * nPhys;
1746 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1749 out[0] = output0;
1750 out[1] = output1;
1751 out[2] = output2;
1752
1753 for (int i = 0; i < 3; ++i)
1754 {
1755 Diff[i] = wsp + i * ntot;
1756 }
1757
1758 // dEta0
1760 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1761 &Diff[0][0], m_nquad0);
1762
1763 int cnt = 0;
1764 for (int i = 0; i < m_numElmt; ++i)
1765 {
1766 // dEta 1
1767 for (int j = 0; j < m_nquad2; ++j)
1768 {
1769 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1770 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1771 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1772 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1773 m_nquad0);
1774 }
1775
1776 // dEta 2
1777 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1778 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1779 m_nquad2, 0.0, &Diff[2][i * nPhys],
1780 m_nquad0 * m_nquad1);
1781
1782 // dxi0 = 2/(1-eta_2) d Eta_0
1783 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
1784 Diff[0].get() + cnt, 1);
1785
1786 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
1787 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
1788 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1789 cnt += nPhys;
1790 }
1791
1792 // calculate full derivative
1793 if (m_isDeformed)
1794 {
1795 for (int i = 0; i < m_coordim; ++i)
1796 {
1797 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
1798 for (int j = 1; j < 3; ++j)
1799 {
1800 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
1801 out[i], 1, out[i], 1);
1802 }
1803 }
1804 }
1805 else
1806 {
1808 for (int e = 0; e < m_numElmt; ++e)
1809 {
1810 for (int i = 0; i < m_coordim; ++i)
1811 {
1812 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
1813 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
1814 1);
1815
1816 for (int j = 1; j < 3; ++j)
1817 {
1818 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
1819 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
1820 1, t = out[i] + e * m_nqe, 1);
1821 }
1822 }
1823 }
1824 }
1825 }
1826
1827 void operator()(int dir, const Array<OneD, const NekDouble> &input,
1828 Array<OneD, NekDouble> &output,
1829 Array<OneD, NekDouble> &wsp) final
1830 {
1831 int nPhys = m_stdExp->GetTotPoints();
1832 int ntot = m_numElmt * nPhys;
1833 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1835
1836 for (int i = 0; i < 3; ++i)
1837 {
1838 Diff[i] = wsp + i * ntot;
1839 }
1840
1841 // dEta0
1843 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
1844 &Diff[0][0], m_nquad0);
1845
1846 int cnt = 0;
1847 for (int i = 0; i < m_numElmt; ++i)
1848 {
1849 // dEta 1
1850 for (int j = 0; j < m_nquad2; ++j)
1851 {
1852 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
1853 &input[i * nPhys + j * m_nquad0 * m_nquad1],
1854 m_nquad0, m_Deriv1, m_nquad1, 0.0,
1855 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
1856 m_nquad0);
1857 }
1858
1859 // dEta 2
1860 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
1861 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
1862 m_nquad2, 0.0, &Diff[2][i * nPhys],
1863 m_nquad0 * m_nquad1);
1864
1865 // dxi0 = 2/(1-eta_2) d Eta_0
1866 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
1867 Diff[0].get() + cnt, 1);
1868
1869 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
1870 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
1871 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
1872 cnt += nPhys;
1873 }
1874
1875 // calculate full derivative
1876 if (m_isDeformed)
1877 {
1878 // calculate full derivative
1879 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
1880 for (int j = 1; j < 3; ++j)
1881 {
1882 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
1883 output, 1, output, 1);
1884 }
1885 }
1886 else
1887 {
1889 for (int e = 0; e < m_numElmt; ++e)
1890 {
1891 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
1892 1, t = output + e * m_nqe, 1);
1893
1894 for (int j = 1; j < 3; ++j)
1895 {
1896 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
1897 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
1898 t = output + e * m_nqe, 1);
1899 }
1900 }
1901 }
1902 }
1903
1905 [[maybe_unused]] int coll_phys_offset) override
1906 {
1907 ASSERTL0(false, "Not valid for this operator.");
1908 }
1909
1910protected:
1913 const int m_nquad0;
1914 const int m_nquad1;
1915 const int m_nquad2;
1921
1922private:
1923 PhysDeriv_SumFac_Prism(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
1926 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
1927 m_nquad0(m_stdExp->GetNumPoints(0)),
1928 m_nquad1(m_stdExp->GetNumPoints(1)),
1929 m_nquad2(m_stdExp->GetNumPoints(2))
1930 {
1931 m_coordim = pCollExp[0]->GetCoordim();
1932
1933 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
1934
1935 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
1936 const Array<OneD, const NekDouble> &z2 = m_stdExp->GetBasis(2)->GetZ();
1939 for (int i = 0; i < m_nquad0; ++i)
1940 {
1941 for (int j = 0; j < m_nquad1; ++j)
1942 {
1943 for (int k = 0; k < m_nquad2; ++k)
1944 {
1945 m_fac0[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1946 2.0 / (1 - z2[k]);
1947 m_fac1[i + j * m_nquad0 + k * m_nquad0 * m_nquad1] =
1948 0.5 * (1 + z0[i]);
1949 }
1950 }
1951 }
1952
1953 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
1954 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
1955 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
1956
1958 }
1959};
1960
1961/// Factory initialisation for the PhysDeriv_SumFac_Prism operators
1962OperatorKey PhysDeriv_SumFac_Prism::m_typeArr[] = {
1965 PhysDeriv_SumFac_Prism::create, "PhysDeriv_SumFac_Prism")};
1966
1967/**
1968 * @brief Phys deriv operator using sum-factorisation (Pyramid)
1969 */
1970class PhysDeriv_SumFac_Pyr final : virtual public Operator,
1971 virtual public PhysDeriv_Helper
1972{
1973public:
1975
1976 ~PhysDeriv_SumFac_Pyr() final = default;
1977
1978 void operator()(const Array<OneD, const NekDouble> &input,
1979 Array<OneD, NekDouble> &output0,
1980 Array<OneD, NekDouble> &output1,
1981 Array<OneD, NekDouble> &output2,
1982 Array<OneD, NekDouble> &wsp) final
1983 {
1984 int nPhys = m_stdExp->GetTotPoints();
1985 int ntot = m_numElmt * nPhys;
1986 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
1989 out[0] = output0;
1990 out[1] = output1;
1991 out[2] = output2;
1992
1993 for (int i = 0; i < 3; ++i)
1994 {
1995 Diff[i] = wsp + i * ntot;
1996 }
1997
1998 // dEta0
2000 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
2001 &Diff[0][0], m_nquad0);
2002
2003 int cnt = 0;
2004 for (int i = 0; i < m_numElmt; ++i)
2005 {
2006 // dEta 1
2007 for (int j = 0; j < m_nquad2; ++j)
2008 {
2009 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
2010 &input[i * nPhys + j * m_nquad0 * m_nquad1],
2011 m_nquad0, m_Deriv1, m_nquad1, 0.0,
2012 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
2013 m_nquad0);
2014 }
2015
2016 // dEta 2
2017 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
2018 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
2019 m_nquad2, 0.0, &Diff[2][i * nPhys],
2020 m_nquad0 * m_nquad1);
2021
2022 // dxi0 = 2/(1-eta_2) d Eta_0
2023 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
2024 Diff[0].get() + cnt, 1);
2025
2026 // dxi1 = 2/(1-eta_2) d Eta_1
2027 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[1].get() + cnt, 1,
2028 Diff[1].get() + cnt, 1);
2029
2030 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
2031 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
2032 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2033
2034 // dxi2 += (1+eta1)/(1-eta_2) d Eta_1
2035 Vmath::Vvtvp(nPhys, &m_fac2[0], 1, Diff[1].get() + cnt, 1,
2036 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2037 cnt += nPhys;
2038 }
2039
2040 // calculate full derivative
2041 if (m_isDeformed)
2042 {
2043 for (int i = 0; i < m_coordim; ++i)
2044 {
2045 Vmath::Vmul(ntot, m_derivFac[i * 3], 1, Diff[0], 1, out[i], 1);
2046 for (int j = 1; j < 3; ++j)
2047 {
2048 Vmath::Vvtvp(ntot, m_derivFac[i * 3 + j], 1, Diff[j], 1,
2049 out[i], 1, out[i], 1);
2050 }
2051 }
2052 }
2053 else
2054 {
2056 for (int e = 0; e < m_numElmt; ++e)
2057 {
2058 for (int i = 0; i < m_coordim; ++i)
2059 {
2060 Vmath::Smul(m_nqe, m_derivFac[i * 3][e],
2061 Diff[0] + e * m_nqe, 1, t = out[i] + e * m_nqe,
2062 1);
2063
2064 for (int j = 1; j < 3; ++j)
2065 {
2066 Vmath::Svtvp(m_nqe, m_derivFac[i * 3 + j][e],
2067 Diff[j] + e * m_nqe, 1, out[i] + e * m_nqe,
2068 1, t = out[i] + e * m_nqe, 1);
2069 }
2070 }
2071 }
2072 }
2073 }
2074
2075 void operator()(int dir, const Array<OneD, const NekDouble> &input,
2076 Array<OneD, NekDouble> &output,
2077 Array<OneD, NekDouble> &wsp) final
2078 {
2079 int nPhys = m_stdExp->GetTotPoints();
2080 int ntot = m_numElmt * nPhys;
2081 Array<OneD, NekDouble> tmp0, tmp1, tmp2;
2083
2084 for (int i = 0; i < 3; ++i)
2085 {
2086 Diff[i] = wsp + i * ntot;
2087 }
2088
2089 // dEta0
2091 m_nquad0, 1.0, m_Deriv0, m_nquad0, &input[0], m_nquad0, 0.0,
2092 &Diff[0][0], m_nquad0);
2093
2094 int cnt = 0;
2095 for (int i = 0; i < m_numElmt; ++i)
2096 {
2097 // dEta 1
2098 for (int j = 0; j < m_nquad2; ++j)
2099 {
2100 Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0,
2101 &input[i * nPhys + j * m_nquad0 * m_nquad1],
2102 m_nquad0, m_Deriv1, m_nquad1, 0.0,
2103 &Diff[1][i * nPhys + j * m_nquad0 * m_nquad1],
2104 m_nquad0);
2105 }
2106
2107 // dEta 2
2108 Blas::Dgemm('N', 'T', m_nquad0 * m_nquad1, m_nquad2, m_nquad2, 1.0,
2109 &input[i * nPhys], m_nquad0 * m_nquad1, m_Deriv2,
2110 m_nquad2, 0.0, &Diff[2][i * nPhys],
2111 m_nquad0 * m_nquad1);
2112
2113 // dxi0 = 2/(1-eta_2) d Eta_0
2114 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[0].get() + cnt, 1,
2115 Diff[0].get() + cnt, 1);
2116
2117 // dxi1 = 2/(1-eta_2) d Eta_1
2118 Vmath::Vmul(nPhys, &m_fac0[0], 1, Diff[1].get() + cnt, 1,
2119 Diff[1].get() + cnt, 1);
2120
2121 // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2;
2122 Vmath::Vvtvp(nPhys, &m_fac1[0], 1, Diff[0].get() + cnt, 1,
2123 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2124
2125 // dxi2 = (1+eta1)/(1-eta_2) d Eta_1 + d/dEta2;
2126 Vmath::Vvtvp(nPhys, &m_fac2[0], 1, Diff[1].get() + cnt, 1,
2127 Diff[2].get() + cnt, 1, Diff[2].get() + cnt, 1);
2128 cnt += nPhys;
2129 }
2130
2131 // calculate full derivative
2132 if (m_isDeformed)
2133 {
2134 // calculate full derivative
2135 Vmath::Vmul(ntot, m_derivFac[dir * 3], 1, Diff[0], 1, output, 1);
2136 for (int j = 1; j < 3; ++j)
2137 {
2138 Vmath::Vvtvp(ntot, m_derivFac[dir * 3 + j], 1, Diff[j], 1,
2139 output, 1, output, 1);
2140 }
2141 }
2142 else
2143 {
2145 for (int e = 0; e < m_numElmt; ++e)
2146 {
2147 Vmath::Smul(m_nqe, m_derivFac[dir * 3][e], Diff[0] + e * m_nqe,
2148 1, t = output + e * m_nqe, 1);
2149
2150 for (int j = 1; j < 3; ++j)
2151 {
2152 Vmath::Svtvp(m_nqe, m_derivFac[dir * 3 + j][e],
2153 Diff[j] + e * m_nqe, 1, output + e * m_nqe, 1,
2154 t = output + e * m_nqe, 1);
2155 }
2156 }
2157 }
2158 }
2159
2161 [[maybe_unused]] int coll_phys_offset) override
2162 {
2163 ASSERTL0(false, "Not valid for this operator.");
2164 }
2165
2166protected:
2169 const int m_nquad0;
2170 const int m_nquad1;
2171 const int m_nquad2;
2178
2179private:
2180 PhysDeriv_SumFac_Pyr(vector<StdRegions::StdExpansionSharedPtr> pCollExp,
2183 : Operator(pCollExp, pGeomData, factors), PhysDeriv_Helper(),
2184 m_nquad0(m_stdExp->GetNumPoints(0)),
2185 m_nquad1(m_stdExp->GetNumPoints(1)),
2186 m_nquad2(m_stdExp->GetNumPoints(2))
2187 {
2188 m_coordim = pCollExp[0]->GetCoordim();
2189
2190 m_derivFac = pGeomData->GetDerivFactors(pCollExp);
2191
2192 const Array<OneD, const NekDouble> &z0 = m_stdExp->GetBasis(0)->GetZ();
2193 const Array<OneD, const NekDouble> &z1 = m_stdExp->GetBasis(1)->GetZ();
2194 const Array<OneD, const NekDouble> &z2 = m_stdExp->GetBasis(2)->GetZ();
2198
2199 int nq0_nq1 = m_nquad0 * m_nquad1;
2200 for (int i = 0; i < m_nquad0; ++i)
2201 {
2202 for (int j = 0; j < m_nquad1; ++j)
2203 {
2204 int ifac = i + j * m_nquad0;
2205 for (int k = 0; k < m_nquad2; ++k)
2206 {
2207 m_fac0[ifac + k * nq0_nq1] = 2.0 / (1 - z2[k]);
2208 m_fac1[ifac + k * nq0_nq1] = 0.5 * (1 + z0[i]);
2209 m_fac2[ifac + k * nq0_nq1] = 0.5 * (1 + z1[j]);
2210 }
2211 }
2212 }
2213
2214 m_Deriv0 = &((m_stdExp->GetBasis(0)->GetD())->GetPtr())[0];
2215 m_Deriv1 = &((m_stdExp->GetBasis(1)->GetD())->GetPtr())[0];
2216 m_Deriv2 = &((m_stdExp->GetBasis(2)->GetD())->GetPtr())[0];
2217
2219 }
2220};
2221
2222/// Factory initialisation for the PhysDeriv_SumFac_Pyr operators
2223OperatorKey PhysDeriv_SumFac_Pyr::m_typeArr[] = {
2226 PhysDeriv_SumFac_Pyr::create, "PhysDeriv_SumFac_Pyr")};
2227
2228} // namespace Nektar::Collections
#define ASSERTL0(condition, msg)
Definition: ErrorUtil.hpp:208
#define NEKERROR(type, msg)
Assert Level 0 – Fundamental assert which is used whether in FULLDEBUG, DEBUG or OPT compilation mode...
Definition: ErrorUtil.hpp:202
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode....
Definition: ErrorUtil.hpp:242
#define OPERATOR_CREATE(cname)
Definition: Operator.h:43
unsigned int m_nElmtPad
size after padding
Array< OneD, NekDouble > m_input
padded input/output vectors
unsigned short m_coordim
coordinates dimension
Array< OneD, Array< OneD, NekDouble > > m_output
Base class for operators on a collection of elements.
Definition: Operator.h:133
StdRegions::StdExpansionSharedPtr m_stdExp
Definition: Operator.h:188
unsigned int m_numElmt
number of elements that the operator is applied on
Definition: Operator.h:190
unsigned int m_outputSize
number of modes or quadrature points that are taken as output from an operator
Definition: Operator.h:198
unsigned int m_inputSize
number of modes or quadrature points that are passed as input to an operator
Definition: Operator.h:195
Physical Derivative help class to calculate the size of the collection that is given as an input and ...
Definition: PhysDeriv.cpp:60
Phys deriv operator using element-wise operation.
Definition: PhysDeriv.cpp:434
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:503
PhysDeriv_IterPerExp(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:561
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:556
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:549
Phys deriv operator using matrix free operators.
Definition: PhysDeriv.cpp:278
PhysDeriv_MatrixFree(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:350
std::shared_ptr< MatrixFree::PhysDeriv > m_oper
Definition: PhysDeriv.cpp:348
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:341
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:324
Phys deriv operator using original LocalRegions implementation.
Definition: PhysDeriv.cpp:613
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:665
PhysDeriv_NoCollection(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:690
vector< StdRegions::StdExpansionSharedPtr > m_expList
Definition: PhysDeriv.cpp:687
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:680
Phys deriv operator using standard matrix approach.
Definition: PhysDeriv.cpp:78
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:201
Array< OneD, DNekMatSharedPtr > m_derivMat
Definition: PhysDeriv.cpp:200
PhysDeriv_StdMat(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:206
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:193
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:145
Phys deriv operator using sum-factorisation (Hex)
Definition: PhysDeriv.cpp:1256
PhysDeriv_SumFac_Hex(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1419
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:1402
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1409
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1338
Phys deriv operator using sum-factorisation (Prism)
Definition: PhysDeriv.cpp:1732
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1827
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1911
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:1904
PhysDeriv_SumFac_Prism(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1923
Phys deriv operator using sum-factorisation (Pyramid)
Definition: PhysDeriv.cpp:1972
PhysDeriv_SumFac_Pyr(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:2180
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:2160
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:2167
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:2075
Phys deriv operator using sum-factorisation (Quad)
Definition: PhysDeriv.cpp:873
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:999
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:954
PhysDeriv_SumFac_Quad(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1014
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1009
Phys deriv operator using sum-factorisation (Segment)
Definition: PhysDeriv.cpp:737
PhysDeriv_SumFac_Seg(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:847
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:843
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:804
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:834
Phys deriv operator using sum-factorisation (Tet)
Definition: PhysDeriv.cpp:1450
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1560
PhysDeriv_SumFac_Tet(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1673
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:1652
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1659
Phys deriv operator using sum-factorisation (Tri)
Definition: PhysDeriv.cpp:1042
PhysDeriv_SumFac_Tri(vector< StdRegions::StdExpansionSharedPtr > pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors)
Definition: PhysDeriv.cpp:1203
void operator()(int dir, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp) final
Definition: PhysDeriv.cpp:1132
Array< TwoD, const NekDouble > m_derivFac
Definition: PhysDeriv.cpp:1196
void CheckFactors(StdRegions::FactorMap factors, int coll_phys_offset) override
Check the validity of the supplied factor map.
Definition: PhysDeriv.cpp:1186
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
Definition: NekFactory.hpp:197
tBaseSharedPtr CreateInstance(tKey idKey, tParam... args)
Create an instance of the class referred to by idKey.
Definition: NekFactory.hpp:143
static std::shared_ptr< DataType > AllocateSharedPtr(const Args &...args)
Allocate a shared pointer from the memory pool.
static void Dgemm(const char &transa, const char &transb, const int &m, const int &n, const int &k, const double &alpha, const double *a, const int &lda, const double *b, const int &ldb, const double &beta, double *c, const int &ldc)
BLAS level 3: Matrix-matrix multiply C = A x B where op(A)[m x k], op(B)[k x n], C[m x n] DGEMM perfo...
Definition: Blas.hpp:383
std::tuple< LibUtilities::ShapeType, OperatorType, ImplementationType, ExpansionIsNodal > OperatorKey
Key for describing an Operator.
Definition: Operator.h:115
std::shared_ptr< CoalescedGeomData > CoalescedGeomDataSharedPtr
OperatorFactory & GetOperatorFactory()
Returns the singleton Operator factory object.
Definition: Operator.cpp:44
ConstFactorMap FactorMap
Definition: StdRegions.hpp:406
StdRegions::ConstFactorMap factors
double NekDouble
void Vmul(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Multiply vector z = x*y.
Definition: Vmath.hpp:72
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition: Vmath.hpp:396
void Vvtvp(int n, const T *w, const int incw, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
vvtvp (vector times vector plus vector): z = w*x + y
Definition: Vmath.hpp:366
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.hpp:825