Nektar++
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
IProduct.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: IProduct.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // License for the specific language governing rights and limitations under
14 // Permission is hereby granted, free of charge, to any person obtaining a
15 // copy of this software and associated documentation files (the "Software"),
16 // to deal in the Software without restriction, including without limitation
17 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
18 
19 // Software is furnished to do so, subject to the following conditions:
20 //
21 // The above copyright notice and this permission notice shall be included
22 // in all copies or substantial portions of the Software.
23 //
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30 // DEALINGS IN THE SOFTWARE.
31 //
32 // Description: IProduct operators for multiple calls in different operators
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
36 #include <Collections/Collection.h>
37 #include <Collections/IProduct.h>
38 
39 using namespace std;
40 
41 namespace Nektar
42 {
43 namespace Collections
44 {
45 
46 /**
47  *
48  */
49 void QuadIProduct(bool colldir0, bool colldir1, int numElmt,
50  int nquad0, int nquad1,
51  int nmodes0, int nmodes1,
52  const Array<OneD, const NekDouble> &base0,
53  const Array<OneD, const NekDouble> &base1,
55  const Array<OneD, const NekDouble> &input,
56  Array<OneD, NekDouble> &output,
58 {
59  int totpoints = nquad0*nquad1;
60  int totmodes = nmodes0*nmodes1;
61 
62  Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
63 
64  if(colldir0 && colldir1)
65  {
66  Vmath::Vcopy(numElmt*totmodes,wsp.get(),1,output.get(),1);
67  }
68  else
69  {
70  Array<OneD, NekDouble> wsp1 = wsp + max(totpoints,totmodes)*numElmt;
71  if(colldir0)
72  {
73  for(int i = 0; i < nquad0; ++i)
74  {
75  Vmath::Vcopy(nquad1*numElmt,&wsp[i],nquad0,
76  &wsp1[i*nquad1*numElmt],1);
77  }
78  }
79  else
80  {
81  Blas::Dgemm('T','N', nquad1*numElmt,nmodes0,nquad0,1.0,
82  &wsp[0],nquad0, base0.get(), nquad0,
83  0.0,&wsp1[0], nquad1*numElmt);
84  }
85 
86 
87  if(numElmt > 1)
88  {
89 
90  if(colldir1)
91  {
92  for(int i = 0; i < nquad1; ++i)
93  {
94  Vmath::Vcopy(numElmt*nmodes0,&wsp1[i],nquad1,
95  &wsp[i*numElmt*nmodes0],1);
96  }
97  }
98  else
99  {
100 
101  Blas::Dgemm('T','N', numElmt*nmodes0, nmodes1, nquad1,
102  1.0, &wsp1[0], nquad1, base1.get(), nquad1,
103  0.0, &wsp[0], numElmt*nmodes0);
104  }
105 
106  for(int i = 0; i < totmodes; ++i)
107  {
108  Vmath::Vcopy(numElmt,&wsp[i*numElmt],1,&output[i],totmodes);
109  }
110  }
111  else
112  {
113  if(colldir1)
114  {
115  for(int i = 0; i < nquad1; ++i)
116  {
117  Vmath::Vcopy(numElmt*nmodes0,&wsp1[i],nquad1,
118  &output[i*numElmt*nmodes0],1);
119  }
120  }
121  else
122  {
123  Blas::Dgemm('T','N', nmodes0, nmodes1, nquad1,
124  1.0, &wsp1[0], nquad1, base1.get(), nquad1,
125  0.0, &output[0], nmodes0);
126  }
127  }
128  }
129 }
130 
131 
132 /**
133  *
134  */
135 void TriIProduct(bool sortTopVertex, int numElmt, int nquad0,
136  int nquad1, int nmodes0, int nmodes1,
137  const Array<OneD, const NekDouble> &base0,
138  const Array<OneD, const NekDouble> &base1,
139  const Array<OneD, const NekDouble> &jac,
140  const Array<OneD, const NekDouble> &input,
141  Array<OneD, NekDouble> &output,
143 {
145  nmodes0, nmodes1);
146  int totpoints = nquad0*nquad1;
147 
148  Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
149 
150  Array<OneD, NekDouble> wsp1 = wsp + max(totpoints,totmodes)*numElmt;
151 
152  Blas::Dgemm('T','N', nquad1*numElmt,nmodes0,nquad0,1.0,&wsp[0],nquad0,
153  base0.get(), nquad0, 0.0, &wsp1[0], nquad1*numElmt);
154 
155  int i, mode;
156  // Inner product with respect to 'b' direction
157  for (mode=i=0; i < nmodes0; ++i)
158  {
159  Blas::Dgemm('T', 'N', nmodes1-i, numElmt, nquad1,
160  1.0, base1.get()+mode*nquad1, nquad1,
161  wsp1.get() + i*nquad1*numElmt, nquad1, 0.0,
162  &output[mode], totmodes);
163 
164  mode += nmodes1 - i;
165  }
166 
167  // fix for modified basis by splitting top vertex mode
168  if (sortTopVertex)
169  {
170  Blas::Dgemv('T', nquad1,numElmt,1.0,wsp1.get()+nquad1*numElmt,nquad1,
171  base1.get()+nquad1,1,1.0, &output[1],totmodes);
172  }
173 }
174 
175 
176 /**
177  *
178  */
179 void HexIProduct(bool colldir0, bool colldir1, bool colldir2, int numElmt,
180  int nquad0, int nquad1, int nquad2,
181  int nmodes0, int nmodes1, int nmodes2,
182  const Array<OneD, const NekDouble> &base0,
183  const Array<OneD, const NekDouble> &base1,
184  const Array<OneD, const NekDouble> &base2,
185  const Array<OneD, const NekDouble> &jac,
186  const Array<OneD, const NekDouble> &input,
187  Array<OneD, NekDouble> &output,
189 {
190  int totmodes = nmodes0*nmodes1*nmodes2;
191  int totpoints = nquad0 *nquad1 *nquad2;
192 
193 
194  if(colldir0 && colldir1 && colldir2)
195  {
196 
197  Vmath::Vmul(numElmt*totpoints,jac,1,input,1,output,1);
198  }
199  else
200  {
201  Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
202 
203  // Assign second half of workspace for 2nd DGEMM operation.
204  Array<OneD, NekDouble> wsp1 = wsp + totpoints*numElmt;
205 
206  // note sure what criterion we should use to swap around these
207  // strategies
208  if(numElmt < nmodes0 || 1)
209  {
210  Array<OneD, NekDouble> wsp2 = wsp1 + nmodes0*nquad1*nquad2;
211 
212  //loop over elements
213  for(int n = 0; n < numElmt; ++n)
214  {
215  if(colldir0)
216  {
217 
218  for(int i = 0; i < nmodes0; ++i)
219  {
220  Vmath::Vcopy(nquad1*nquad2,&wsp[n*totpoints] + i,nquad0,
221  wsp1.get()+nquad1*nquad2*i,1);
222  }
223  }
224  else
225  {
226  Blas::Dgemm('T', 'N', nquad1*nquad2, nmodes0, nquad0,
227  1.0, &wsp[n*totpoints], nquad0,
228  base0.get(), nquad0,
229  0.0, wsp1.get(), nquad1*nquad2);
230  }
231 
232 
233  if(colldir1)
234  {
235  // reshuffle data for next operation.
236  for(int i = 0; i < nmodes1; ++i)
237  {
238  Vmath::Vcopy(nquad2*nmodes0,wsp1.get()+i,nquad1,
239  wsp2.get()+nquad2*nmodes0*i,1);
240  }
241  }
242  else
243  {
244  Blas::Dgemm('T', 'N', nquad2*nmodes0, nmodes1, nquad1,
245  1.0, wsp1.get(), nquad1,
246  base1.get(), nquad1,
247  0.0, wsp2.get(), nquad2*nmodes0);
248  }
249 
250  if(colldir2)
251  {
252  // reshuffle data for next operation.
253  for(int i = 0; i < nmodes2; ++i)
254  {
255  Vmath::Vcopy(nmodes0*nmodes1,wsp2.get()+i,nquad2,
256  &output[n*totmodes]+nmodes0*nmodes1*i,1);
257  }
258  }
259  else
260  {
261  Blas::Dgemm('T', 'N', nmodes0*nmodes1, nmodes2, nquad2,
262  1.0, wsp2.get(), nquad2,
263  base2.get(), nquad2,
264  0.0, &output[n*totmodes], nmodes0*nmodes1);
265  }
266  }
267  }
268  else
269  {
270  Array<OneD, NekDouble> wsp2 = wsp1 + numElmt*(max(totpoints,
271  totmodes));
272 
273  if(colldir0)
274  {
275  for(int i = 0; i < nquad0; ++i)
276  {
277  Vmath::Vcopy(nquad1*nquad2*numElmt,&wsp[i],nquad0,
278  &wsp1[i*nquad1*nquad2*numElmt],1);
279  }
280  }
281  else
282  {
283  // large degmm but copy at end.
284  Blas::Dgemm('T','N', nquad1*nquad2*numElmt, nmodes0, nquad0,
285  1.0, &wsp[0], nquad0, base0.get(), nquad0,
286  0.0, &wsp1[0], nquad1*nquad2*numElmt);
287  }
288 
289  if(colldir1)
290  {
291  for(int i = 0; i < nquad1; ++i)
292  {
293  Vmath::Vcopy(nquad2*numElmt*nmodes0,&wsp1[i],nquad1,
294  &wsp2[i*nquad2*numElmt*nmodes0],1);
295  }
296  }
297  else
298  {
299  Blas::Dgemm('T','N', nquad2*numElmt*nmodes0, nmodes1, nquad1,
300  1.0, &wsp1[0], nquad1, base1.get(), nquad1,
301  0.0, &wsp2[0], nquad2*numElmt*nmodes0);
302  }
303 
304 
305  if(numElmt > 1)
306  {
307  if(colldir2)
308  {
309  for(int i = 0; i < nquad2; ++i)
310  {
311  Vmath::Vcopy(nmodes0*nmodes1,&wsp2[i],nquad2,
312  &output[i*nmodes0*nmodes1],1);
313  }
314  }
315  else
316  {
317  Blas::Dgemm('T', 'N', numElmt*nmodes0*nmodes1, nmodes2,
318  nquad2, 1.0, &wsp2[0], nquad2,
319  base2.get(), nquad2, 0.0,
320  &wsp1[0], numElmt*nmodes0*nmodes1);
321  }
322 
323  for(int i = 0; i < totmodes; ++i)
324  {
325  Vmath::Vcopy(numElmt, &wsp1[i*numElmt], 1,
326  &output[i], totmodes);
327  }
328 
329  }
330  else
331  {
332  if(colldir2)
333  {
334  for(int i = 0; i < nquad2; ++i)
335  {
336  Vmath::Vcopy(nmodes0*nmodes1,&wsp2[i],nquad2,
337  &output[i*nmodes0*nmodes1],1);
338  }
339  }
340  else
341  {
342  Blas::Dgemm('T','N', numElmt*nmodes0*nmodes1, nmodes2,
343  nquad2, 1.0, &wsp2[0], nquad2,
344  base2.get(), nquad2, 0.0,
345  &output[0], numElmt*nmodes0*nmodes1);
346  }
347  }
348  }
349  }
350 }
351 
352 
353 /**
354  *
355  */
356 void PrismIProduct(bool sortTopVertex, int numElmt,
357  int nquad0, int nquad1, int nquad2,
358  int nmodes0, int nmodes1, int nmodes2,
359  const Array<OneD, const NekDouble> &base0,
360  const Array<OneD, const NekDouble> &base1,
361  const Array<OneD, const NekDouble> &base2,
362  const Array<OneD, const NekDouble> &jac,
363  const Array<OneD, const NekDouble> &input,
364  Array<OneD, NekDouble> &output,
366 {
368  nmodes0,nmodes1,nmodes2);
369  int totpoints = nquad0*nquad1*nquad2;
370  int cnt;
371  int mode, mode1;
372 
373  Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
374 
375  Array<OneD, NekDouble> wsp1 = wsp + numElmt * nquad2
376  * (max(nquad0*nquad1,
377  nmodes0*nmodes1));
378 
379  // Perform iproduct with respect to the '0' direction
380  Blas::Dgemm('T', 'N', nquad1*nquad2*numElmt, nmodes0, nquad0,
381  1.0, wsp.get(), nquad0, base0.get(),
382  nquad0, 0.0, wsp1.get(), nquad1*nquad2*numElmt);
383 
384 
385  // Perform iproduct with respect to the '1' direction
386  Blas::Dgemm('T', 'N', nquad2*numElmt*nmodes0, nmodes1, nquad1,
387  1.0, wsp1.get(), nquad1, base1.get(),
388  nquad1, 0.0, wsp.get(), nquad2*numElmt*nmodes0);
389 
390 
391  // Inner product with respect to the '2' direction (not sure if it would
392  // be better to swap loops?)
393  mode = mode1 = cnt = 0;
394  for(int i = 0; i < nmodes0; ++i)
395  {
396  cnt = i*nquad2*numElmt;
397  for(int j = 0; j < nmodes1; ++j)
398  {
399  Blas::Dgemm('T', 'N', nmodes2-i, numElmt, nquad2,
400  1.0, base2.get()+mode*nquad2, nquad2,
401  wsp.get()+j*nquad2*numElmt*nmodes0 + cnt, nquad2,
402  0.0, output.get()+mode1, totmodes);
403  mode1 += nmodes2-i;
404  }
405  mode += nmodes2-i;
406  }
407 
408  // fix for modified basis by splitting top vertex mode
409  if (sortTopVertex)
410  {
411  // top singular vertex
412  // ((1+a)/2 components entry into (1+c)/2)
413  // Could be made into an mxv if we have specialised base1[1]
414  for(int j =0; j < nmodes1; ++j)
415  {
416  Blas::Dgemv('T', nquad2,numElmt,1.0,
417  wsp.get()+j*nquad2*numElmt*nmodes0+nquad2*numElmt,
418  nquad2, base2.get()+nquad2,1,1.0,
419  &output[j*nmodes2+1], totmodes);
420  }
421  }
422 }
423 
424 
425 /**
426  *
427  */
428 void TetIProduct(bool sortTopEdge, int numElmt,
429  int nquad0, int nquad1, int nquad2,
430  int nmodes0, int nmodes1, int nmodes2,
431  const Array<OneD, const NekDouble> &base0,
432  const Array<OneD, const NekDouble> &base1,
433  const Array<OneD, const NekDouble> &base2,
434  const Array<OneD, const NekDouble> &jac,
435  const Array<OneD, const NekDouble> &input,
436  Array<OneD, NekDouble> &output,
438 {
440  nmodes0,nmodes1,nmodes2);
441  int totpoints = nquad0*nquad1*nquad2;
442  int cnt;
443  int mode, mode1;
444 
445  Vmath::Vmul(numElmt*totpoints,jac,1,input,1,wsp,1);
446 
447  Array<OneD, NekDouble> wsp1 = wsp +
448  nquad2*numElmt*(max(nquad0*nquad1,nmodes0*(2*nmodes1-nmodes0+1)/2));
449 
450 
451  // Perform iproduct with respect to the '0' direction
452  Blas::Dgemm('T', 'N', nquad1*nquad2*numElmt, nmodes0, nquad0,
453  1.0, wsp.get(), nquad0, base0.get(),
454  nquad0, 0.0, wsp1.get(), nquad1*nquad2*numElmt);
455 
456  // Inner product with respect to the '1' direction
457  mode = 0;
458  for(int i=0; i < nmodes0; ++i)
459  {
460  Blas::Dgemm('T', 'N', nquad2*numElmt, nmodes1-i, nquad1,
461  1.0, wsp1.get()+ i*nquad1*nquad2*numElmt, nquad1,
462  base1.get() + mode*nquad1, nquad1,
463  0.0, wsp.get() + mode*nquad2*numElmt,nquad2*numElmt);
464  mode += nmodes1-i;
465  }
466 
467 
468  // fix for modified basis by splitting top vertex mode
469  if (sortTopEdge)
470  {
471  // base singular vertex and singular edge (1+b)/2
472  // ((1+a)/2 components entry into (1+b)/2)
473  // Could be made into an mxm if we have specialised base1[1]
474  for(int n = 0; n < numElmt; ++n)
475  {
476  Blas::Dgemv('T', nquad1, nquad2,
477  1.0, wsp1.get()+numElmt*nquad1*nquad2 +
478  n*nquad1*nquad2, nquad1,
479  base1.get()+nquad1, 1, 1.0,
480  wsp.get()+nquad2*numElmt + n*nquad2, 1);
481  }
482  }
483 
484  // Inner product with respect to the '2' direction
485  mode = mode1 = cnt = 0;
486  for(int i = 0; i < nmodes0; ++i)
487  {
488  for(int j = 0; j < nmodes1-i; ++j, ++cnt)
489  {
490  Blas::Dgemm('T', 'N', nmodes2-i-j, numElmt, nquad2,
491  1.0, base2.get()+mode*nquad2, nquad2,
492  wsp.get()+cnt*nquad2*numElmt, nquad2,
493  0.0, output.get()+mode1, totmodes);
494  mode += nmodes2-i-j;
495  mode1 += nmodes2-i-j;
496  }
497 
498  //increment mode in case order1!=order2
499  mode += (nmodes2-nmodes1)*(nmodes2-nmodes1+1)/2;
500  }
501 
502  // fix for modified basis for top singular vertex component
503  // Already have evaluated (1+c)/2 (1-b)/2 (1-a)/2
504  if(sortTopEdge)
505  {
506  for(int n = 0; n < numElmt; ++n)
507  {
508  // add in (1+c)/2 (1+b)/2 component
509  output[1+n*totmodes] += Blas::Ddot(nquad2,
510  base2.get()+nquad2,1,
511  &wsp[nquad2*numElmt + n*nquad2],1);
512 
513  // add in (1+c)/2 (1-b)/2 (1+a)/2 component
514  output[1+n*totmodes] += Blas::Ddot(nquad2,
515  base2.get()+nquad2,1,
516  &wsp[nquad2*nmodes1*numElmt+n*nquad2],1);
517  }
518  }
519 
520 }
521 
522 }
523 }
void HexIProduct(bool colldir0, bool colldir1, bool colldir2, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Definition: IProduct.cpp:179
STL namespace.
void QuadIProduct(bool colldir0, bool colldir1, int numElmt, int nquad0, int nquad1, int nmodes0, int nmodes1, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Definition: IProduct.cpp:49
void TetIProduct(bool sortTopEdge, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Definition: IProduct.cpp:428
void TriIProduct(bool sortTopVertex, int numElmt, int nquad0, int nquad1, int nmodes0, int nmodes1, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Definition: IProduct.cpp:135
T Ddot(int n, const Array< OneD, const T > &w, const int incw, const Array< OneD, const T > &x, const int incx, const Array< OneD, const int > &y, const int incy)
Definition: VmathArray.hpp:436
void PrismIProduct(bool sortTopVertex, int numElmt, int nquad0, int nquad1, int nquad2, int nmodes0, int nmodes1, int nmodes2, const Array< OneD, const NekDouble > &base0, const Array< OneD, const NekDouble > &base1, const Array< OneD, const NekDouble > &base2, const Array< OneD, const NekDouble > &jac, const Array< OneD, const NekDouble > &input, Array< OneD, NekDouble > &output, Array< OneD, NekDouble > &wsp)
Definition: IProduct.cpp:356
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.cpp:1061
void Vmul(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Multiply vector z = x*y.
Definition: Vmath.cpp:183