46void QuadIProduct(
bool colldir0,
bool colldir1,
int numElmt,
int nquad0,
47 int nquad1,
int nmodes0,
int nmodes1,
54 int totpoints = nquad0 * nquad1;
55 int totmodes = nmodes0 * nmodes1;
57 Vmath::Vmul(numElmt * totpoints, jac, 1, input, 1, wsp, 1);
59 if (colldir0 && colldir1)
61 Vmath::Vcopy(numElmt * totmodes, wsp.data(), 1, output.data(), 1);
68 for (
int i = 0; i < nquad0; ++i)
71 &wsp1[i * nquad1 * numElmt], 1);
76 Blas::Dgemm(
'T',
'N', nquad1 * numElmt, nmodes0, nquad0, 1.0,
77 &wsp[0], nquad0, base0.data(), nquad0, 0.0, &wsp1[0],
85 for (
int i = 0; i < nquad1; ++i)
88 &wsp[i * numElmt * nmodes0], 1);
94 Blas::Dgemm(
'T',
'N', numElmt * nmodes0, nmodes1, nquad1, 1.0,
95 &wsp1[0], nquad1, base1.data(), nquad1, 0.0,
96 &wsp[0], numElmt * nmodes0);
99 for (
int i = 0; i < totmodes; ++i)
109 for (
int i = 0; i < nquad1; ++i)
112 &output[i * numElmt * nmodes0], 1);
117 Blas::Dgemm(
'T',
'N', nmodes0, nmodes1, nquad1, 1.0, &wsp1[0],
118 nquad1, base1.data(), nquad1, 0.0, &output[0],
128void TriIProduct(
bool sortTopVertex,
int numElmt,
int nquad0,
int nquad1,
129 int nmodes0,
int nmodes1,
138 int totpoints = nquad0 * nquad1;
140 Vmath::Vmul(numElmt * totpoints, jac, 1, input, 1, wsp, 1);
144 Blas::Dgemm(
'T',
'N', nquad1 * numElmt, nmodes0, nquad0, 1.0, &wsp[0],
145 nquad0, base0.data(), nquad0, 0.0, &wsp1[0], nquad1 * numElmt);
149 for (mode = i = 0; i < nmodes0; ++i)
151 Blas::Dgemm(
'T',
'N', nmodes1 - i, numElmt, nquad1, 1.0,
152 base1.data() + mode * nquad1, nquad1,
153 wsp1.data() + i * nquad1 * numElmt, nquad1, 0.0,
154 &output[mode], totmodes);
162 Blas::Dgemv(
'T', nquad1, numElmt, 1.0, wsp1.data() + nquad1 * numElmt,
163 nquad1, base1.data() + nquad1, 1, 1.0, &output[1],
171void HexIProduct(
bool colldir0,
bool colldir1,
bool colldir2,
int numElmt,
172 int nquad0,
int nquad1,
int nquad2,
int nmodes0,
int nmodes1,
180 int totmodes = nmodes0 * nmodes1 * nmodes2;
181 int totpoints = nquad0 * nquad1 * nquad2;
183 if (colldir0 && colldir1 && colldir2)
186 Vmath::Vmul(numElmt * totpoints, jac, 1, input, 1, output, 1);
190 Vmath::Vmul(numElmt * totpoints, jac, 1, input, 1, wsp, 1);
197 if (numElmt < nmodes0 || 1)
202 for (
int n = 0; n < numElmt; ++n)
206 for (
int i = 0; i < nmodes0; ++i)
209 nquad0, wsp1.data() + nquad1 * nquad2 * i,
215 Blas::Dgemm(
'T',
'N', nquad1 * nquad2, nmodes0, nquad0, 1.0,
216 &wsp[n * totpoints], nquad0, base0.data(),
217 nquad0, 0.0, wsp1.data(), nquad1 * nquad2);
223 for (
int i = 0; i < nmodes1; ++i)
226 wsp2.data() + nquad2 * nmodes0 * i, 1);
231 Blas::Dgemm(
'T',
'N', nquad2 * nmodes0, nmodes1, nquad1,
232 1.0, wsp1.data(), nquad1, base1.data(), nquad1,
233 0.0, wsp2.data(), nquad2 * nmodes0);
239 for (
int i = 0; i < nmodes2; ++i)
242 nmodes0 * nmodes1, wsp2.data() + i, nquad2,
243 &output[n * totmodes] + nmodes0 * nmodes1 * i, 1);
248 Blas::Dgemm(
'T',
'N', nmodes0 * nmodes1, nmodes2, nquad2,
249 1.0, wsp2.data(), nquad2, base2.data(), nquad2,
250 0.0, &output[n * totmodes], nmodes0 * nmodes1);
257 wsp1 + numElmt * (max(totpoints, totmodes));
261 for (
int i = 0; i < nquad0; ++i)
263 Vmath::Vcopy(nquad1 * nquad2 * numElmt, &wsp[i], nquad0,
264 &wsp1[i * nquad1 * nquad2 * numElmt], 1);
270 Blas::Dgemm(
'T',
'N', nquad1 * nquad2 * numElmt, nmodes0,
271 nquad0, 1.0, &wsp[0], nquad0, base0.data(), nquad0,
272 0.0, &wsp1[0], nquad1 * nquad2 * numElmt);
277 for (
int i = 0; i < nquad1; ++i)
279 Vmath::Vcopy(nquad2 * numElmt * nmodes0, &wsp1[i], nquad1,
280 &wsp2[i * nquad2 * numElmt * nmodes0], 1);
285 Blas::Dgemm(
'T',
'N', nquad2 * numElmt * nmodes0, nmodes1,
286 nquad1, 1.0, &wsp1[0], nquad1, base1.data(), nquad1,
287 0.0, &wsp2[0], nquad2 * numElmt * nmodes0);
294 for (
int i = 0; i < nquad2; ++i)
297 &output[i * nmodes0 * nmodes1], 1);
302 Blas::Dgemm(
'T',
'N', numElmt * nmodes0 * nmodes1, nmodes2,
303 nquad2, 1.0, &wsp2[0], nquad2, base2.data(),
304 nquad2, 0.0, &wsp1[0],
305 numElmt * nmodes0 * nmodes1);
308 for (
int i = 0; i < totmodes; ++i)
310 Vmath::Vcopy(numElmt, &wsp1[i * numElmt], 1, &output[i],
318 for (
int i = 0; i < nquad2; ++i)
321 &output[i * nmodes0 * nmodes1], 1);
326 Blas::Dgemm(
'T',
'N', numElmt * nmodes0 * nmodes1, nmodes2,
327 nquad2, 1.0, &wsp2[0], nquad2, base2.data(),
328 nquad2, 0.0, &output[0],
329 numElmt * nmodes0 * nmodes1);
340 int nquad2,
int nmodes0,
int nmodes1,
int nmodes2,
349 nmodes0, nmodes1, nmodes2);
350 int totpoints = nquad0 * nquad1 * nquad2;
354 Vmath::Vmul(numElmt * totpoints, jac, 1, input, 1, wsp, 1);
357 wsp + numElmt * nquad2 * (max(nquad0 * nquad1, nmodes0 * nmodes1));
360 Blas::Dgemm(
'T',
'N', nquad1 * nquad2 * numElmt, nmodes0, nquad0, 1.0,
361 wsp.data(), nquad0, base0.data(), nquad0, 0.0, wsp1.data(),
362 nquad1 * nquad2 * numElmt);
365 Blas::Dgemm(
'T',
'N', nquad2 * numElmt * nmodes0, nmodes1, nquad1, 1.0,
366 wsp1.data(), nquad1, base1.data(), nquad1, 0.0, wsp.data(),
367 nquad2 * numElmt * nmodes0);
371 mode = mode1 = cnt = 0;
372 for (
int i = 0; i < nmodes0; ++i)
374 cnt = i * nquad2 * numElmt;
375 for (
int j = 0; j < nmodes1; ++j)
377 Blas::Dgemm(
'T',
'N', nmodes2 - i, numElmt, nquad2, 1.0,
378 base2.data() + mode * nquad2, nquad2,
379 wsp.data() + j * nquad2 * numElmt * nmodes0 + cnt,
380 nquad2, 0.0, output.data() + mode1, totmodes);
381 mode1 += nmodes2 - i;
392 for (
int j = 0; j < nmodes1; ++j)
395 wsp.data() + j * nquad2 * numElmt * nmodes0 +
397 nquad2, base2.data() + nquad2, 1, 1.0,
398 &output[j * nmodes2 + 1], totmodes);
406void PyrIProduct(
bool sortTopVertex,
int numElmt,
int nquad0,
int nquad1,
407 int nquad2,
int nmodes0,
int nmodes1,
int nmodes2,
416 nmodes0, nmodes1, nmodes2);
417 int totpoints = nquad0 * nquad1 * nquad2;
422 numElmt * (nquad1 * nquad2 * nmodes0 +
423 nquad2 * max(nquad0 * nquad1, nmodes0 * nmodes1)),
424 "Insufficient workspace size");
426 Vmath::Vmul(numElmt * totpoints, jac, 1, input, 1, wsp, 1);
429 wsp + numElmt * nquad2 * (max(nquad0 * nquad1, nmodes0 * nmodes1));
432 Blas::Dgemm(
'T',
'N', nquad1 * nquad2 * numElmt, nmodes0, nquad0, 1.0,
433 wsp.data(), nquad0, base0.data(), nquad0, 0.0, wsp1.data(),
434 nquad1 * nquad2 * numElmt);
438 for (
int i = 0; i < nmodes0; ++i)
440 Blas::Dgemm(
'T',
'N', nquad2 * numElmt, nmodes1, nquad1, 1.0,
441 wsp1.data() + i * nquad1 * nquad2 * numElmt, nquad1,
442 base1.data(), nquad1, 0.0,
443 wsp.data() + mode * nquad2 * numElmt, nquad2 * numElmt);
448 mode = mode1 = cnt = 0;
449 for (
int i = 0; i < nmodes0; ++i)
451 for (
int j = 0; j < nmodes1; ++j, ++cnt)
453 int ijmax = max(i, j);
454 Blas::Dgemm(
'T',
'N', nmodes2 - ijmax, numElmt, nquad2, 1.0,
455 base2.data() + mode * nquad2, nquad2,
456 wsp.data() + cnt * nquad2 * numElmt, nquad2, 0.0,
457 output.data() + mode1, totmodes);
458 mode += nmodes2 - ijmax;
459 mode1 += nmodes2 - ijmax;
463 for (
int j = nmodes1; j < nmodes2; ++j)
465 int ijmax = max(i, j);
466 mode += nmodes2 - ijmax;
474 for (
int n = 0; n < numElmt; ++n)
477 output[1 + n * totmodes] +=
479 &wsp[nquad2 * numElmt + n * nquad2], 1);
482 output[1 + n * totmodes] +=
484 &wsp[nquad2 * nmodes1 * numElmt + n * nquad2], 1);
488 nquad2, base2.data() + nquad2, 1,
489 &wsp[nquad2 * (nmodes1 + 1) * numElmt + n * nquad2], 1);
497void TetIProduct(
bool sortTopEdge,
int numElmt,
int nquad0,
int nquad1,
498 int nquad2,
int nmodes0,
int nmodes1,
int nmodes2,
507 nmodes0, nmodes1, nmodes2);
508 int totpoints = nquad0 * nquad1 * nquad2;
512 Vmath::Vmul(numElmt * totpoints, jac, 1, input, 1, wsp, 1);
517 (max(nquad0 * nquad1, nmodes0 * (2 * nmodes1 - nmodes0 + 1) / 2));
520 Blas::Dgemm(
'T',
'N', nquad1 * nquad2 * numElmt, nmodes0, nquad0, 1.0,
521 wsp.data(), nquad0, base0.data(), nquad0, 0.0, wsp1.data(),
522 nquad1 * nquad2 * numElmt);
526 for (
int i = 0; i < nmodes0; ++i)
528 Blas::Dgemm(
'T',
'N', nquad2 * numElmt, nmodes1 - i, nquad1, 1.0,
529 wsp1.data() + i * nquad1 * nquad2 * numElmt, nquad1,
530 base1.data() + mode * nquad1, nquad1, 0.0,
531 wsp.data() + mode * nquad2 * numElmt, nquad2 * numElmt);
541 for (
int n = 0; n < numElmt; ++n)
544 wsp1.data() + numElmt * nquad1 * nquad2 +
546 nquad1, base1.data() + nquad1, 1, 1.0,
547 wsp.data() + nquad2 * numElmt + n * nquad2, 1);
552 mode = mode1 = cnt = 0;
553 for (
int i = 0; i < nmodes0; ++i)
555 for (
int j = 0; j < nmodes1 - i; ++j, ++cnt)
557 Blas::Dgemm(
'T',
'N', nmodes2 - i - j, numElmt, nquad2, 1.0,
558 base2.data() + mode * nquad2, nquad2,
559 wsp.data() + cnt * nquad2 * numElmt, nquad2, 0.0,
560 output.data() + mode1, totmodes);
561 mode += nmodes2 - i - j;
562 mode1 += nmodes2 - i - j;
566 mode += (nmodes2 - nmodes1) * (nmodes2 - nmodes1 + 1) / 2;
573 for (
int n = 0; n < numElmt; ++n)
576 output[1 + n * totmodes] +=
578 &wsp[nquad2 * numElmt + n * nquad2], 1);
581 output[1 + n * totmodes] +=
583 &wsp[nquad2 * nmodes1 * numElmt + n * nquad2], 1);
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode....
static void Dgemv(const char &trans, const int &m, const int &n, const double &alpha, const double *a, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy)
BLAS level 2: Matrix vector multiply y = alpha A x plus beta y where A[m x n].
static double Ddot(const int &n, const double *x, const int &incx, const double *y, const int &incy)
BLAS level 1: output = .
static void Dgemm(const char &transa, const char &transb, const int &m, const int &n, const int &k, const double &alpha, const double *a, const int &lda, const double *b, const int &ldb, const double &beta, double *c, const int &ldc)
BLAS level 3: Matrix-matrix multiply C = A x B where op(A)[m x k], op(B)[k x n], C[m x n] DGEMM perfo...
void TetIProduct(bool sortTopEdge, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void PyrIProduct(bool sortTopVert, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void TriIProduct(bool sortTopVertex, int numElmt, int nquad0, int nquad1, int nmodes0, int nmodes1, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void QuadIProduct(bool colldir0, bool colldir1, int numElmt, int nquad0, int nquad1, int nmodes0, int nmodes1, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void PrismIProduct(bool sortTopVert, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void HexIProduct(bool colldir0, bool colldir1, bool colldir2, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
int getNumberOfCoefficients(int Na, int Nb, int Nc)
int getNumberOfCoefficients(int Na, int Nb, int Nc)
int getNumberOfCoefficients(int Na, int Nb, int Nc)
int getNumberOfCoefficients(int Na, int Nb)
void Vmul(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Multiply vector z = x*y.
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)