50 int nquad0,
int nquad1,
51 int nmodes0,
int nmodes1,
59 int totpoints = nquad0*nquad1;
60 int totmodes = nmodes0*nmodes1;
64 if(colldir0 && colldir1)
66 Vmath::Vcopy(numElmt*totmodes,wsp.get(),1,output.get(),1);
73 for(
int i = 0; i < nquad0; ++i)
76 &wsp1[i*nquad1*numElmt],1);
81 Blas::Dgemm(
'T',
'N', nquad1*numElmt,nmodes0,nquad0,1.0,
82 &wsp[0],nquad0, base0.get(), nquad0,
83 0.0,&wsp1[0], nquad1*numElmt);
92 for(
int i = 0; i < nquad1; ++i)
95 &wsp[i*numElmt*nmodes0],1);
101 Blas::Dgemm(
'T',
'N', numElmt*nmodes0, nmodes1, nquad1,
102 1.0, &wsp1[0], nquad1, base1.get(), nquad1,
103 0.0, &wsp[0], numElmt*nmodes0);
106 for(
int i = 0; i < totmodes; ++i)
108 Vmath::Vcopy(numElmt,&wsp[i*numElmt],1,&output[i],totmodes);
115 for(
int i = 0; i < nquad1; ++i)
118 &output[i*numElmt*nmodes0],1);
123 Blas::Dgemm(
'T',
'N', nmodes0, nmodes1, nquad1,
124 1.0, &wsp1[0], nquad1, base1.get(), nquad1,
125 0.0, &output[0], nmodes0);
136 int nquad1,
int nmodes0,
int nmodes1,
146 int totpoints = nquad0*nquad1;
148 Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
152 Blas::Dgemm(
'T',
'N', nquad1*numElmt,nmodes0,nquad0,1.0,&wsp[0],nquad0,
153 base0.get(), nquad0, 0.0, &wsp1[0], nquad1*numElmt);
157 for (mode=i=0; i < nmodes0; ++i)
159 Blas::Dgemm(
'T',
'N', nmodes1-i, numElmt, nquad1,
160 1.0, base1.get()+mode*nquad1, nquad1,
161 wsp1.get() + i*nquad1*numElmt, nquad1, 0.0,
162 &output[mode], totmodes);
170 Blas::Dgemv(
'T', nquad1,numElmt,1.0,wsp1.get()+nquad1*numElmt,nquad1,
171 base1.get()+nquad1,1,1.0, &output[1],totmodes);
179 void HexIProduct(
bool colldir0,
bool colldir1,
bool colldir2,
int numElmt,
180 int nquad0,
int nquad1,
int nquad2,
181 int nmodes0,
int nmodes1,
int nmodes2,
190 int totmodes = nmodes0*nmodes1*nmodes2;
191 int totpoints = nquad0 *nquad1 *nquad2;
194 if(colldir0 && colldir1 && colldir2)
197 Vmath::Vmul(numElmt*totpoints,jac,1,input,1,output,1);
201 Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
208 if(numElmt < nmodes0 || 1)
213 for(
int n = 0; n < numElmt; ++n)
218 for(
int i = 0; i < nmodes0; ++i)
220 Vmath::Vcopy(nquad1*nquad2,&wsp[n*totpoints] + i,nquad0,
221 wsp1.get()+nquad1*nquad2*i,1);
226 Blas::Dgemm(
'T',
'N', nquad1*nquad2, nmodes0, nquad0,
227 1.0, &wsp[n*totpoints], nquad0,
229 0.0, wsp1.get(), nquad1*nquad2);
236 for(
int i = 0; i < nmodes1; ++i)
239 wsp2.get()+nquad2*nmodes0*i,1);
244 Blas::Dgemm(
'T',
'N', nquad2*nmodes0, nmodes1, nquad1,
245 1.0, wsp1.get(), nquad1,
247 0.0, wsp2.get(), nquad2*nmodes0);
253 for(
int i = 0; i < nmodes2; ++i)
256 &output[n*totmodes]+nmodes0*nmodes1*i,1);
261 Blas::Dgemm(
'T',
'N', nmodes0*nmodes1, nmodes2, nquad2,
262 1.0, wsp2.get(), nquad2,
264 0.0, &output[n*totmodes], nmodes0*nmodes1);
275 for(
int i = 0; i < nquad0; ++i)
278 &wsp1[i*nquad1*nquad2*numElmt],1);
284 Blas::Dgemm(
'T',
'N', nquad1*nquad2*numElmt, nmodes0, nquad0,
285 1.0, &wsp[0], nquad0, base0.get(), nquad0,
286 0.0, &wsp1[0], nquad1*nquad2*numElmt);
291 for(
int i = 0; i < nquad1; ++i)
294 &wsp2[i*nquad2*numElmt*nmodes0],1);
299 Blas::Dgemm(
'T',
'N', nquad2*numElmt*nmodes0, nmodes1, nquad1,
300 1.0, &wsp1[0], nquad1, base1.get(), nquad1,
301 0.0, &wsp2[0], nquad2*numElmt*nmodes0);
309 for(
int i = 0; i < nquad2; ++i)
312 &output[i*nmodes0*nmodes1],1);
317 Blas::Dgemm(
'T',
'N', numElmt*nmodes0*nmodes1, nmodes2,
318 nquad2, 1.0, &wsp2[0], nquad2,
319 base2.get(), nquad2, 0.0,
320 &wsp1[0], numElmt*nmodes0*nmodes1);
323 for(
int i = 0; i < totmodes; ++i)
326 &output[i], totmodes);
334 for(
int i = 0; i < nquad2; ++i)
337 &output[i*nmodes0*nmodes1],1);
342 Blas::Dgemm(
'T',
'N', numElmt*nmodes0*nmodes1, nmodes2,
343 nquad2, 1.0, &wsp2[0], nquad2,
344 base2.get(), nquad2, 0.0,
345 &output[0], numElmt*nmodes0*nmodes1);
357 int nquad0,
int nquad1,
int nquad2,
358 int nmodes0,
int nmodes1,
int nmodes2,
368 nmodes0,nmodes1,nmodes2);
369 int totpoints = nquad0*nquad1*nquad2;
373 Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
376 * (max(nquad0*nquad1,
380 Blas::Dgemm(
'T',
'N', nquad1*nquad2*numElmt, nmodes0, nquad0,
381 1.0, wsp.get(), nquad0, base0.get(),
382 nquad0, 0.0, wsp1.get(), nquad1*nquad2*numElmt);
386 Blas::Dgemm(
'T',
'N', nquad2*numElmt*nmodes0, nmodes1, nquad1,
387 1.0, wsp1.get(), nquad1, base1.get(),
388 nquad1, 0.0, wsp.get(), nquad2*numElmt*nmodes0);
393 mode = mode1 = cnt = 0;
394 for(
int i = 0; i < nmodes0; ++i)
396 cnt = i*nquad2*numElmt;
397 for(
int j = 0; j < nmodes1; ++j)
399 Blas::Dgemm(
'T',
'N', nmodes2-i, numElmt, nquad2,
400 1.0, base2.get()+mode*nquad2, nquad2,
401 wsp.get()+j*nquad2*numElmt*nmodes0 + cnt, nquad2,
402 0.0, output.get()+mode1, totmodes);
414 for(
int j =0; j < nmodes1; ++j)
416 Blas::Dgemv(
'T', nquad2,numElmt,1.0,
417 wsp.get()+j*nquad2*numElmt*nmodes0+nquad2*numElmt,
418 nquad2, base2.get()+nquad2,1,1.0,
419 &output[j*nmodes2+1], totmodes);
429 int nquad0,
int nquad1,
int nquad2,
430 int nmodes0,
int nmodes1,
int nmodes2,
440 nmodes0,nmodes1,nmodes2);
441 int totpoints = nquad0*nquad1*nquad2;
445 Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
448 nquad2*numElmt*(max(nquad0*nquad1,nmodes0*(2*nmodes1-nmodes0+1)/2));
452 Blas::Dgemm(
'T',
'N', nquad1*nquad2*numElmt, nmodes0, nquad0,
453 1.0, wsp.get(), nquad0, base0.get(),
454 nquad0, 0.0, wsp1.get(), nquad1*nquad2*numElmt);
458 for(
int i=0; i < nmodes0; ++i)
460 Blas::Dgemm(
'T',
'N', nquad2*numElmt, nmodes1-i, nquad1,
461 1.0, wsp1.get()+ i*nquad1*nquad2*numElmt, nquad1,
462 base1.get() + mode*nquad1, nquad1,
463 0.0, wsp.get() + mode*nquad2*numElmt,nquad2*numElmt);
474 for(
int n = 0; n < numElmt; ++n)
476 Blas::Dgemv(
'T', nquad1, nquad2,
477 1.0, wsp1.get()+numElmt*nquad1*nquad2 +
478 n*nquad1*nquad2, nquad1,
479 base1.get()+nquad1, 1, 1.0,
480 wsp.get()+nquad2*numElmt + n*nquad2, 1);
485 mode = mode1 = cnt = 0;
486 for(
int i = 0; i < nmodes0; ++i)
488 for(
int j = 0; j < nmodes1-i; ++j, ++cnt)
490 Blas::Dgemm(
'T',
'N', nmodes2-i-j, numElmt, nquad2,
491 1.0, base2.get()+mode*nquad2, nquad2,
492 wsp.get()+cnt*nquad2*numElmt, nquad2,
493 0.0, output.get()+mode1, totmodes);
495 mode1 += nmodes2-i-j;
499 mode += (nmodes2-nmodes1)*(nmodes2-nmodes1+1)/2;
506 for(
int n = 0; n < numElmt; ++n)
510 base2.get()+nquad2,1,
511 &wsp[nquad2*numElmt + n*nquad2],1);
515 base2.get()+nquad2,1,
516 &wsp[nquad2*nmodes1*numElmt+n*nquad2],1);
void HexIProduct(bool colldir0, bool colldir1, bool colldir2, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void QuadIProduct(bool colldir0, bool colldir1, int numElmt, int nquad0, int nquad1, int nmodes0, int nmodes1, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void TetIProduct(bool sortTopEdge, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void TriIProduct(bool sortTopVertex, int numElmt, int nquad0, int nquad1, int nmodes0, int nmodes1, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
int getNumberOfCoefficients(int Na)
T Ddot(int n, const Array< OneD, const T > &w, const int incw, const Array< OneD, const T > &x, const int incx, const Array< OneD, const int > &y, const int incy)
void PrismIProduct(bool sortTopVertex, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
void Vmul(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Multiply vector z = x*y.