Nektar++
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Transposition.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File Transposition.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // License for the specific language governing rights and limitations under
14 // Permission is hereby granted, free of charge, to any person obtaining a
15 // copy of this software and associated documentation files (the "Software"),
16 // to deal in the Software without restriction, including without limitation
17 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
18 // and/or sell copies of the Software, and to permit persons to whom the
19 // Software is furnished to do so, subject to the following conditions:
20 //
21 // The above copyright notice and this permission notice shall be included
22 // in all copies or substantial portions of the Software.
23 //
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30 // DEALINGS IN THE SOFTWARE.
31 //
32 // Description: Data manager for homogeneous transpositions
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
37 
38 #include <LibUtilities/BasicUtils/ErrorUtil.hpp> // for ASSERTL0, etc
39 #include <LibUtilities/BasicUtils/SharedArray.hpp> // for Array
40 #include <LibUtilities/BasicUtils/Vmath.hpp> // for Vcopy
41 #include <LibUtilities/Foundations/Basis.h> // for BasisKey
43 
44 
45 namespace Nektar
46 {
47  namespace LibUtilities
48  {
49  /**
50  * Constructor for 1D transform.
51  */
54  {
55  m_hcomm = hcomm;
57 
59  Array<OneD,int>(m_num_homogeneous_directions);
61  Array<OneD,int>(m_num_homogeneous_directions);
63  Array<OneD,int>(m_num_homogeneous_directions);
65  Array<OneD,int>(m_num_homogeneous_directions);
66 
67  m_num_homogeneous_points[0] = HomoBasis0.GetNumPoints();
68  m_num_homogeneous_coeffs[0] = HomoBasis0.GetNumModes();
69  m_num_processes[0] = m_hcomm->GetSize();
70  m_num_points_per_proc[0] = m_num_homogeneous_points[0] /
71  m_num_processes[0];
72  m_rank_id = m_hcomm->GetRank();
73 
74  //================================================================
75  // TODO: Need to be generalised for 1D, 2D and 3D
76  m_planes_IDs = Array<OneD, unsigned int>(m_num_points_per_proc[0]);
77  m_K = Array<OneD, unsigned int>(m_num_points_per_proc[0]);
78 
79  for(int i = 0 ; i < m_num_points_per_proc[0] ; i++)
80  {
81  m_planes_IDs[i] = m_rank_id*m_num_points_per_proc[0] + i;
82  }
83 
84  if(HomoBasis0.GetBasisType() == LibUtilities::eFourier)
85  {
86  for(int i = 0 ; i < m_num_points_per_proc[0] ; i++)
87  {
88  m_K[i] = m_planes_IDs[i]/2;
89  }
90  }
91 
93  {
94  m_K[0] = 1;
95  m_K[1] = 1;
96  }
97 
100  {
101  m_K[0]=1;
102  }
103  //================================================================
104  }
105 
106 
107  /**
108  * Constructor for 2D transform.
109  */
111  const LibUtilities::BasisKey &HomoBasis1,
113  {
114  m_hcomm = hcomm;
116 
118  Array<OneD,int>(m_num_homogeneous_directions);
120  Array<OneD,int>(m_num_homogeneous_directions);
122  Array<OneD,int>(m_num_homogeneous_directions);
124  Array<OneD,int>(m_num_homogeneous_directions);
125 
126  m_num_homogeneous_points[0] = HomoBasis0.GetNumPoints();
127  m_num_homogeneous_coeffs[0] = HomoBasis0.GetNumModes();
128  m_num_homogeneous_points[1] = HomoBasis1.GetNumPoints();
129  m_num_homogeneous_coeffs[1] = HomoBasis1.GetNumModes();
130 
131  m_num_processes[0] = m_hcomm->GetRowComm()->GetSize();
132  m_num_processes[1] = m_hcomm->GetColumnComm()->GetSize();
133 
134  m_num_points_per_proc[0] = m_num_homogeneous_points[0] /
135  m_num_processes[0];
136  m_num_points_per_proc[1] = m_num_homogeneous_points[1] /
137  m_num_processes[1];
138 
139  //================================================================
140  // TODO: Need set up for 2D lines IDs and Ks if Fourier
141  //================================================================
142  }
143 
144 
145  /**
146  * Constructor for 3D transform.
147  */
149  const LibUtilities::BasisKey &HomoBasis1,
150  const LibUtilities::BasisKey &HomoBasis2,
152  {
153  m_hcomm = hcomm;
155 
157  Array<OneD,int>(m_num_homogeneous_directions);
159  Array<OneD,int>(m_num_homogeneous_directions);
161  Array<OneD,int>(m_num_homogeneous_directions);
163  Array<OneD,int>(m_num_homogeneous_directions);
164 
165  //================================================================
166  // TODO: Need set up for 3D
167  ASSERTL0(false, "Transposition is not set up for 3D.");
168  //================================================================
169  }
170 
171 
172  /**
173  * Destructor
174  */
176  {
177 
178  }
179 
180  //====================================================================
181  // TODO: Need to generalise the following methods for 1D, 2D and 3D
182  unsigned int Transposition::GetK(int i)
183  {
184  return m_K[i];
185  }
186 
187  Array<OneD, unsigned int> Transposition::GetKs(void)
188  {
189  return m_K;
190  }
191 
192  unsigned int Transposition::GetPlaneID(int i)
193  {
194  return m_planes_IDs[i];
195  }
196 
197  Array<OneD, unsigned int> Transposition::GetPlanesIDs(void)
198  {
199  return m_planes_IDs;
200  }
201 
202 
203  /**
204  * Main method: General transposition, the dir parameters define if
205  * 1D,2D,3D and which transposition is required at the same time
206  */
208  const Array<OneD,const NekDouble> &inarray,
209  Array<OneD, NekDouble> &outarray,
210  bool UseNumMode,TranspositionDir dir)
211  {
212  switch(dir)
213  {
214  case eXYtoZ:
215  {
216  TransposeXYtoZ(inarray,outarray,UseNumMode);
217  }
218  break;
219  case eZtoXY:
220  {
221  TransposeZtoXY(inarray,outarray,UseNumMode);
222  }
223  break;
224  case eXtoYZ:
225  {
226  TransposeXtoYZ(inarray,outarray,UseNumMode);
227  }
228  break;
229  case eYZtoX:
230  {
231  TransposeYZtoX(inarray,outarray,UseNumMode);
232  }
233  break;
234  case eYZtoZY:
235  {
236  TransposeYZtoZY(inarray,outarray,UseNumMode);
237  }
238  break;
239  case eZYtoYZ:
240  {
241  TransposeZYtoYZ(inarray,outarray,UseNumMode);
242  }
243  break;
244  case eXtoY:
245  {
246  ASSERTL0(false,"Transposition not implemented yet.");
247  }
248  break;
249  case eYtoZ:
250  {
251  ASSERTL0(false,"Transposition not implemented yet.");
252  }
253  break;
254  case eZtoX:
255  {
256  ASSERTL0(false,"Transposition not implemented yet.");
257  }
258  break;
259  default:
260  {
261  ASSERTL0(false,"Transposition type does not exist.");
262  }
263  }
264  }
265 
266 
267  /**
268  * Homogeneous 1D transposition from SEM to Homogeneous ordering.
269  */
271  const Array<OneD,const NekDouble> &inarray,
272  Array<OneD, NekDouble> &outarray,
273  bool UseNumMode)
274  {
275  if(m_num_processes[0] > 1)
276  {
277  // Paramerers set up
278  int i,packed_len;
279  int copy_len = 0;
280  int index = 0;
281  int cnt = 0;
282 
283  int num_dofs = inarray.num_elements();
284  int num_points_per_plane = num_dofs/m_num_points_per_proc[0];
285  int num_pencil_per_proc =
286  (num_points_per_plane / m_num_processes[0]) +
287  (num_points_per_plane % m_num_processes[0] > 0);
288 
289  m_SizeMap = Array<OneD,int> (m_num_processes[0],0);
290  m_OffsetMap = Array<OneD,int> (m_num_processes[0],0);
291 
292  for(i = 0; i < m_num_processes[0]; i++)
293  {
294  m_SizeMap[i] = num_pencil_per_proc *
296  m_OffsetMap[i] = i * num_pencil_per_proc *
297  m_num_points_per_proc[0];
298  }
299 
300  Array< OneD, NekDouble> tmp_outarray(
301  num_pencil_per_proc*m_num_homogeneous_points[0],
302  0.0);
303 
304  if(UseNumMode)
305  {
306  packed_len = m_num_homogeneous_coeffs[0];
307  }
308  else
309  {
310  packed_len = m_num_homogeneous_points[0];
311  }
312 
313  // Start Transposition
314  while(index < num_points_per_plane)
315  {
316  copy_len =
317  num_pencil_per_proc < (num_points_per_plane - index)
318  ? num_pencil_per_proc
319  : (num_points_per_plane - index);
320 
321  for(i = 0 ; i < m_num_points_per_proc[0]; i++)
322  {
323  Vmath::Vcopy(copy_len,
324  &(inarray[index + (i * num_points_per_plane)]), 1,
325  &(outarray[cnt]), 1);
326 
327  cnt += num_pencil_per_proc;
328  }
329 
330  index += copy_len;
331  }
332 
333  m_hcomm->AlltoAllv(outarray, m_SizeMap, m_OffsetMap,
334  tmp_outarray, m_SizeMap, m_OffsetMap);
335 
336  for(i = 0; i < packed_len; ++i)
337  {
338  Vmath::Vcopy(num_pencil_per_proc,
339  &(tmp_outarray[i * num_pencil_per_proc]), 1,
340  &(outarray[i]), packed_len);
341  }
342  // End Transposition
343  }
344 
345  // Serial case implementation (more efficient then MPI 1 processor
346  // implemenation)
347  else
348  {
349  int i, pts_per_plane;
350  int n = inarray.num_elements();
351  int packed_len;
352 
353  pts_per_plane = n/m_num_points_per_proc[0];
354 
355  if(UseNumMode)
356  {
357  packed_len = m_num_homogeneous_coeffs[0];
358  }
359  else
360  {
361  packed_len = m_num_homogeneous_points[0];
362  }
363 
364  ASSERTL1(&inarray[0] != &outarray[0],
365  "Inarray and outarray cannot be the same");
366 
367  for(i = 0; i < packed_len; ++i)
368  {
369  Vmath::Vcopy(pts_per_plane,
370  &(inarray[i * pts_per_plane]), 1,
371  &(outarray[i]), packed_len);
372  }
373  }
374  }
375 
376 
377  /**
378  * Homogeneous 1D transposition from Homogeneous to SEM ordering.
379  */
381  const Array<OneD,const NekDouble> &inarray,
382  Array<OneD, NekDouble> &outarray,
383  bool UseNumMode)
384  {
385  if(m_num_processes[0] > 1)
386  {
387  // Paramerers set up
388  int i,packed_len;
389  int copy_len = 0;
390  int index = 0;
391  int cnt = 0;
392 
393  int num_dofs = outarray.num_elements();
394  int num_points_per_plane = num_dofs / m_num_points_per_proc[0];
395  int num_pencil_per_proc =
396  (num_points_per_plane / m_num_processes[0]) +
397  (num_points_per_plane % m_num_processes[0] > 0);
398 
399  m_SizeMap = Array<OneD,int> (m_num_processes[0],0);
400  m_OffsetMap = Array<OneD,int> (m_num_processes[0],0);
401 
402  for(i = 0; i < m_num_processes[0]; i++)
403  {
404  m_SizeMap[i] = num_pencil_per_proc *
406  m_OffsetMap[i] = i * num_pencil_per_proc *
407  m_num_points_per_proc[0];
408  }
409 
410  Array< OneD, NekDouble> tmp_inarray(
411  num_pencil_per_proc*m_num_homogeneous_points[0],
412  0.0);
413  Array< OneD, NekDouble> tmp_outarray(
414  num_pencil_per_proc*m_num_homogeneous_points[0],
415  0.0);
416 
417  if(UseNumMode)
418  {
419  packed_len = m_num_homogeneous_coeffs[0];
420  }
421  else
422  {
423  packed_len = m_num_homogeneous_points[0];
424  }
425 
426  // Start Transposition
427  for(i = 0; i < packed_len; ++i)
428  {
429  Vmath::Vcopy(num_pencil_per_proc,&(inarray[i]),packed_len,
430  &(tmp_inarray[i*num_pencil_per_proc]),1);
431  }
432 
433  m_hcomm->AlltoAllv(tmp_inarray, m_SizeMap, m_OffsetMap,
434  tmp_outarray, m_SizeMap, m_OffsetMap);
435 
436  while(index < num_points_per_plane)
437  {
438  copy_len =
439  num_pencil_per_proc < (num_points_per_plane - index)
440  ? num_pencil_per_proc
441  : (num_points_per_plane - index);
442 
443  for(i = 0 ; i < m_num_points_per_proc[0]; i++)
444  {
445  Vmath::Vcopy(copy_len,
446  &(tmp_outarray[cnt]), 1,
447  &(outarray[index + (i * num_points_per_plane)]), 1);
448 
449  cnt += num_pencil_per_proc;
450  }
451 
452  index += copy_len;
453  }
454  // End Transposition
455  }
456 
457  // Serial case implementation (more efficient then MPI 1 processor
458  // implemenation)
459  else
460  {
461  int i,pts_per_plane;
462  int n = inarray.num_elements();
463  int packed_len;
464 
465  // use length of inarray to determine data storage type
466  // (i.e.modal or physical).
467  pts_per_plane = n/m_num_points_per_proc[0];
468 
469  if(UseNumMode)
470  {
471  packed_len = m_num_homogeneous_coeffs[0];
472  }
473  else
474  {
475  packed_len = m_num_homogeneous_points[0];
476  }
477 
478  ASSERTL1(&inarray[0] != &outarray[0],
479  "Inarray and outarray cannot be the same");
480 
481  for(i = 0; i < packed_len; ++i)
482  {
483  Vmath::Vcopy(pts_per_plane,
484  &(inarray[i]), packed_len,
485  &(outarray[i * pts_per_plane]), 1);
486  }
487  }
488  }
489 
490 
491  /**
492  * Homogeneous 2D transposition from SEM to Homogeneous(YZ) ordering.
493  */
495  const Array<OneD,const NekDouble> &inarray,
496  Array<OneD, NekDouble> &outarray,
497  bool UseNumMode)
498  {
499  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
500  {
501  ASSERTL0(false,
502  "Parallel transposition not implemented yet for "
503  "3D-Homo-2D approach.");
504  }
505  else
506  {
507  int i, pts_per_line;
508  int n = inarray.num_elements();
509  int packed_len;
510 
511  pts_per_line = n / (m_num_homogeneous_points[0] *
513 
514  if(UseNumMode)
515  {
516  packed_len = (m_num_homogeneous_coeffs[0] *
518  }
519  else
520  {
521  packed_len = (m_num_homogeneous_points[0] *
523  }
524 
525  ASSERTL1(&inarray[0] != &outarray[0],
526  "Inarray and outarray cannot be the same");
527 
528  for(i = 0; i < packed_len; ++i)
529  {
530  Vmath::Vcopy(pts_per_line,
531  &(inarray[i * pts_per_line]), 1,
532  &(outarray[i]), packed_len);
533  }
534  }
535  }
536 
537 
538  /**
539  * Homogeneous 2D transposition from Homogeneous (YZ) ordering to SEM.
540  */
542  const Array<OneD,const NekDouble> &inarray,
543  Array<OneD, NekDouble> &outarray,
544  bool UseNumMode)
545  {
546  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
547  {
548  ASSERTL0(false,
549  "Parallel transposition not implemented yet for "
550  "3D-Homo-2D approach.");
551  }
552  else
553  {
554  int i,pts_per_line;
555  int n = inarray.num_elements();
556  int packed_len;
557 
558  pts_per_line = n / (m_num_homogeneous_points[0] *
560 
561  if(UseNumMode)
562  {
563  packed_len = (m_num_homogeneous_coeffs[0] *
565  }
566  else
567  {
568  packed_len = (m_num_homogeneous_points[0] *
570  }
571 
572  ASSERTL1(&inarray[0] != &outarray[0],
573  "Inarray and outarray cannot be the same");
574 
575  for(i = 0; i < packed_len; ++i)
576  {
577  Vmath::Vcopy(pts_per_line,
578  &(inarray[i]), packed_len,
579  &(outarray[i * pts_per_line]), 1);
580  }
581  }
582  }
583 
584 
585  /**
586  * Homogeneous 2D transposition from Y ordering to Z.
587  */
589  const Array<OneD,const NekDouble> &inarray,
590  Array<OneD, NekDouble> &outarray,
591  bool UseNumMode)
592  {
593  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
594  {
595  ASSERTL0(false,
596  "Parallel transposition not implemented yet for "
597  "3D-Homo-2D approach.");
598  }
599  else
600  {
601  int n = m_num_homogeneous_points[0] *
603  int s = inarray.num_elements();
604 
605  int pts_per_line = s/n;
606 
607  int packed_len = pts_per_line * m_num_homogeneous_points[1];
608 
609  for(int i = 0; i < m_num_homogeneous_points[0] ; ++i)
610  {
611  Vmath::Vcopy(packed_len,
612  &(inarray[i]), m_num_homogeneous_points[0],
613  &(outarray[i * packed_len]), 1);
614  }
615  }
616  }
617 
618 
619  /**
620  * Homogeneous 2D transposition from Z ordering to Y.
621  */
623  const Array<OneD,const NekDouble> &inarray,
624  Array<OneD, NekDouble> &outarray,
625  bool UseNumMode)
626  {
627  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
628  {
629  ASSERTL0(false,
630  "Parallel transposition not implemented yet for "
631  "3D-Homo-2D approach.");
632  }
633  else
634  {
635  int n = m_num_homogeneous_points[0] *
637  int s = inarray.num_elements();
638 
639  int pts_per_line = s/n;
640 
641  int packed_len = pts_per_line * m_num_homogeneous_points[1];
642 
643  for(int i = 0; i < packed_len ; ++i)
644  {
645  Vmath::Vcopy(m_num_homogeneous_points[0],
646  &(inarray[i]), packed_len,
647  &(outarray[i * m_num_homogeneous_points[0]]), 1);
648  }
649  }
650  }
651 
652 
653  // TODO: Impelement 2D and 3D transposition routines
654  }
655 }