Nektar++
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Transposition.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File Transposition.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // License for the specific language governing rights and limitations under
14 // Permission is hereby granted, free of charge, to any person obtaining a
15 // copy of this software and associated documentation files (the "Software"),
16 // to deal in the Software without restriction, including without limitation
17 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
18 // and/or sell copies of the Software, and to permit persons to whom the
19 // Software is furnished to do so, subject to the following conditions:
20 //
21 // The above copyright notice and this permission notice shall be included
22 // in all copies or substantial portions of the Software.
23 //
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30 // DEALINGS IN THE SOFTWARE.
31 //
32 // Description: Data manager for homogeneous transpositions
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
37 
38 #include <LibUtilities/BasicUtils/ErrorUtil.hpp> // for ASSERTL0, etc
39 #include <LibUtilities/BasicUtils/SharedArray.hpp> // for Array
40 #include <LibUtilities/BasicUtils/Vmath.hpp> // for Vcopy
41 #include <LibUtilities/Foundations/Basis.h> // for BasisKey
43 
44 
45 namespace Nektar
46 {
47  namespace LibUtilities
48  {
49  /**
50  * Constructor for 1D transform.
51  */
55  {
56  m_hcomm = hcomm1;
58 
67 
68  m_num_homogeneous_points[0] = HomoBasis0.GetNumPoints();
69  m_num_homogeneous_coeffs[0] = HomoBasis0.GetNumModes();
70  m_num_processes[0] = m_hcomm->GetSize();
71  m_num_points_per_proc[0] = m_num_homogeneous_points[0] /
72  m_num_processes[0];
73  m_rank_id = m_hcomm->GetRank();
74 
75  //================================================================
76  // TODO: Need to be generalised for 1D, 2D and 3D
79 
80  for(int i = 0 ; i < m_num_points_per_proc[0] ; i++)
81  {
82  m_planes_IDs[i] = m_rank_id*m_num_points_per_proc[0] + i;
83  }
84 
85  int global_rank_id = hcomm0->GetColumnComm()->GetRank();
86  int NumStrips = hcomm0->GetColumnComm()->GetSize() /
87  m_hcomm->GetSize();
88  m_strip_ID = 0;
89 
90  if (NumStrips > 1)
91  {
92  m_strip_ID =
93  (NumStrips > global_rank_id) ? global_rank_id:(global_rank_id - NumStrips);
94  }
95 
96  if(HomoBasis0.GetBasisType() == LibUtilities::eFourier)
97  {
98  for(int i = 0 ; i < m_num_points_per_proc[0] ; i++)
99  {
100  m_K[i] = m_planes_IDs[i]/2;
101  }
102  }
103 
105  {
106  m_K[0] = 1;
107  m_K[1] = 1;
108  }
109 
110  if(HomoBasis0.GetBasisType() == LibUtilities::eFourierHalfModeRe ||
112  {
113  m_K[0]=1;
114  }
115  //================================================================
116  }
117 
118 
119  /**
120  * Constructor for 2D transform.
121  */
123  const LibUtilities::BasisKey &HomoBasis1,
125  {
126  m_hcomm = hcomm;
128 
137 
138  m_num_homogeneous_points[0] = HomoBasis0.GetNumPoints();
139  m_num_homogeneous_coeffs[0] = HomoBasis0.GetNumModes();
140  m_num_homogeneous_points[1] = HomoBasis1.GetNumPoints();
141  m_num_homogeneous_coeffs[1] = HomoBasis1.GetNumModes();
142 
143  m_num_processes[0] = m_hcomm->GetRowComm()->GetSize();
144  m_num_processes[1] = m_hcomm->GetColumnComm()->GetSize();
145 
146  m_num_points_per_proc[0] = m_num_homogeneous_points[0] /
147  m_num_processes[0];
148  m_num_points_per_proc[1] = m_num_homogeneous_points[1] /
149  m_num_processes[1];
150 
151  //================================================================
152  // TODO: Need set up for 2D lines IDs and Ks if Fourier
153  //================================================================
154  }
155 
156 
157  /**
158  * Constructor for 3D transform.
159  */
161  const LibUtilities::BasisKey &HomoBasis1,
162  const LibUtilities::BasisKey &HomoBasis2,
164  {
165  m_hcomm = hcomm;
167 
176 
177  //================================================================
178  // TODO: Need set up for 3D
179  ASSERTL0(false, "Transposition is not set up for 3D.");
180  //================================================================
181  }
182 
183 
184  /**
185  * Destructor
186  */
188  {
189 
190  }
191 
192  //====================================================================
193  // TODO: Need to generalise the following methods for 1D, 2D and 3D
194  unsigned int Transposition::GetK(int i)
195  {
196  return m_K[i];
197  }
198 
200  {
201  return m_K;
202  }
203 
204  unsigned int Transposition::GetPlaneID(int i)
205  {
206  return m_planes_IDs[i];
207  }
208 
210  {
211  return m_planes_IDs;
212  }
213 
214  unsigned int Transposition::GetStripID(void)
215  {
216  return m_strip_ID;
217  }
218 
219  /**
220  * Main method: General transposition, the dir parameters define if
221  * 1D,2D,3D and which transposition is required at the same time
222  */
224  const Array<OneD,const NekDouble> &inarray,
225  Array<OneD, NekDouble> &outarray,
226  bool UseNumMode,TranspositionDir dir)
227  {
228  switch(dir)
229  {
230  case eXYtoZ:
231  {
232  TransposeXYtoZ(inarray,outarray,UseNumMode);
233  }
234  break;
235  case eZtoXY:
236  {
237  TransposeZtoXY(inarray,outarray,UseNumMode);
238  }
239  break;
240  case eXtoYZ:
241  {
242  TransposeXtoYZ(inarray,outarray,UseNumMode);
243  }
244  break;
245  case eYZtoX:
246  {
247  TransposeYZtoX(inarray,outarray,UseNumMode);
248  }
249  break;
250  case eYZtoZY:
251  {
252  TransposeYZtoZY(inarray,outarray,UseNumMode);
253  }
254  break;
255  case eZYtoYZ:
256  {
257  TransposeZYtoYZ(inarray,outarray,UseNumMode);
258  }
259  break;
260  case eXtoY:
261  {
262  ASSERTL0(false,"Transposition not implemented yet.");
263  }
264  break;
265  case eYtoZ:
266  {
267  ASSERTL0(false,"Transposition not implemented yet.");
268  }
269  break;
270  case eZtoX:
271  {
272  ASSERTL0(false,"Transposition not implemented yet.");
273  }
274  break;
275  default:
276  {
277  ASSERTL0(false,"Transposition type does not exist.");
278  }
279  }
280  }
281 
282 
283  /**
284  * Homogeneous 1D transposition from SEM to Homogeneous ordering.
285  */
287  const Array<OneD,const NekDouble> &inarray,
288  Array<OneD, NekDouble> &outarray,
289  bool UseNumMode)
290  {
291  if(m_num_processes[0] > 1)
292  {
293  // Paramerers set up
294  int i,packed_len;
295  int copy_len = 0;
296  int index = 0;
297  int cnt = 0;
298 
299  int num_dofs = inarray.num_elements();
300  int num_points_per_plane = num_dofs/m_num_points_per_proc[0];
301  int num_pencil_per_proc =
302  (num_points_per_plane / m_num_processes[0]) +
303  (num_points_per_plane % m_num_processes[0] > 0);
304 
306  m_OffsetMap = Array<OneD,int> (m_num_processes[0],0);
307 
308  for(i = 0; i < m_num_processes[0]; i++)
309  {
310  m_SizeMap[i] = num_pencil_per_proc *
312  m_OffsetMap[i] = i * num_pencil_per_proc *
313  m_num_points_per_proc[0];
314  }
315 
316  Array< OneD, NekDouble> tmp_outarray(
317  num_pencil_per_proc*m_num_homogeneous_points[0],
318  0.0);
319 
320  if(UseNumMode)
321  {
322  packed_len = m_num_homogeneous_coeffs[0];
323  }
324  else
325  {
326  packed_len = m_num_homogeneous_points[0];
327  }
328 
329  // Start Transposition
330  while(index < num_points_per_plane)
331  {
332  copy_len =
333  num_pencil_per_proc < (num_points_per_plane - index)
334  ? num_pencil_per_proc
335  : (num_points_per_plane - index);
336 
337  for(i = 0 ; i < m_num_points_per_proc[0]; i++)
338  {
339  Vmath::Vcopy(copy_len,
340  &(inarray[index + (i * num_points_per_plane)]), 1,
341  &(outarray[cnt]), 1);
342 
343  cnt += num_pencil_per_proc;
344  }
345 
346  index += copy_len;
347  }
348 
349  m_hcomm->AlltoAllv(outarray, m_SizeMap, m_OffsetMap,
350  tmp_outarray, m_SizeMap, m_OffsetMap);
351 
352  for(i = 0; i < packed_len; ++i)
353  {
354  Vmath::Vcopy(num_pencil_per_proc,
355  &(tmp_outarray[i * num_pencil_per_proc]), 1,
356  &(outarray[i]), packed_len);
357  }
358  // End Transposition
359  }
360 
361  // Serial case implementation (more efficient then MPI 1 processor
362  // implemenation)
363  else
364  {
365  int i, pts_per_plane;
366  int n = inarray.num_elements();
367  int packed_len;
368 
369  pts_per_plane = n/m_num_points_per_proc[0];
370 
371  if(UseNumMode)
372  {
373  packed_len = m_num_homogeneous_coeffs[0];
374  }
375  else
376  {
377  packed_len = m_num_homogeneous_points[0];
378  }
379 
380  ASSERTL1(&inarray[0] != &outarray[0],
381  "Inarray and outarray cannot be the same");
382 
383  for(i = 0; i < packed_len; ++i)
384  {
385  Vmath::Vcopy(pts_per_plane,
386  &(inarray[i * pts_per_plane]), 1,
387  &(outarray[i]), packed_len);
388  }
389  }
390  }
391 
392 
393  /**
394  * Homogeneous 1D transposition from Homogeneous to SEM ordering.
395  */
397  const Array<OneD,const NekDouble> &inarray,
398  Array<OneD, NekDouble> &outarray,
399  bool UseNumMode)
400  {
401  if(m_num_processes[0] > 1)
402  {
403  // Paramerers set up
404  int i,packed_len;
405  int copy_len = 0;
406  int index = 0;
407  int cnt = 0;
408 
409  int num_dofs = outarray.num_elements();
410  int num_points_per_plane = num_dofs / m_num_points_per_proc[0];
411  int num_pencil_per_proc =
412  (num_points_per_plane / m_num_processes[0]) +
413  (num_points_per_plane % m_num_processes[0] > 0);
414 
416  m_OffsetMap = Array<OneD,int> (m_num_processes[0],0);
417 
418  for(i = 0; i < m_num_processes[0]; i++)
419  {
420  m_SizeMap[i] = num_pencil_per_proc *
422  m_OffsetMap[i] = i * num_pencil_per_proc *
423  m_num_points_per_proc[0];
424  }
425 
426  Array< OneD, NekDouble> tmp_inarray(
427  num_pencil_per_proc*m_num_homogeneous_points[0],
428  0.0);
429  Array< OneD, NekDouble> tmp_outarray(
430  num_pencil_per_proc*m_num_homogeneous_points[0],
431  0.0);
432 
433  if(UseNumMode)
434  {
435  packed_len = m_num_homogeneous_coeffs[0];
436  }
437  else
438  {
439  packed_len = m_num_homogeneous_points[0];
440  }
441 
442  // Start Transposition
443  for(i = 0; i < packed_len; ++i)
444  {
445  Vmath::Vcopy(num_pencil_per_proc,&(inarray[i]),packed_len,
446  &(tmp_inarray[i*num_pencil_per_proc]),1);
447  }
448 
449  m_hcomm->AlltoAllv(tmp_inarray, m_SizeMap, m_OffsetMap,
450  tmp_outarray, m_SizeMap, m_OffsetMap);
451 
452  while(index < num_points_per_plane)
453  {
454  copy_len =
455  num_pencil_per_proc < (num_points_per_plane - index)
456  ? num_pencil_per_proc
457  : (num_points_per_plane - index);
458 
459  for(i = 0 ; i < m_num_points_per_proc[0]; i++)
460  {
461  Vmath::Vcopy(copy_len,
462  &(tmp_outarray[cnt]), 1,
463  &(outarray[index + (i * num_points_per_plane)]), 1);
464 
465  cnt += num_pencil_per_proc;
466  }
467 
468  index += copy_len;
469  }
470  // End Transposition
471  }
472 
473  // Serial case implementation (more efficient then MPI 1 processor
474  // implemenation)
475  else
476  {
477  int i,pts_per_plane;
478  int n = inarray.num_elements();
479  int packed_len;
480 
481  // use length of inarray to determine data storage type
482  // (i.e.modal or physical).
483  pts_per_plane = n/m_num_points_per_proc[0];
484 
485  if(UseNumMode)
486  {
487  packed_len = m_num_homogeneous_coeffs[0];
488  }
489  else
490  {
491  packed_len = m_num_homogeneous_points[0];
492  }
493 
494  ASSERTL1(&inarray[0] != &outarray[0],
495  "Inarray and outarray cannot be the same");
496 
497  for(i = 0; i < packed_len; ++i)
498  {
499  Vmath::Vcopy(pts_per_plane,
500  &(inarray[i]), packed_len,
501  &(outarray[i * pts_per_plane]), 1);
502  }
503  }
504  }
505 
506 
507  /**
508  * Homogeneous 2D transposition from SEM to Homogeneous(YZ) ordering.
509  */
511  const Array<OneD,const NekDouble> &inarray,
512  Array<OneD, NekDouble> &outarray,
513  bool UseNumMode)
514  {
515  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
516  {
517  ASSERTL0(false,
518  "Parallel transposition not implemented yet for "
519  "3D-Homo-2D approach.");
520  }
521  else
522  {
523  int i, pts_per_line;
524  int n = inarray.num_elements();
525  int packed_len;
526 
527  pts_per_line = n / (m_num_homogeneous_points[0] *
529 
530  if(UseNumMode)
531  {
532  packed_len = (m_num_homogeneous_coeffs[0] *
534  }
535  else
536  {
537  packed_len = (m_num_homogeneous_points[0] *
539  }
540 
541  ASSERTL1(&inarray[0] != &outarray[0],
542  "Inarray and outarray cannot be the same");
543 
544  for(i = 0; i < packed_len; ++i)
545  {
546  Vmath::Vcopy(pts_per_line,
547  &(inarray[i * pts_per_line]), 1,
548  &(outarray[i]), packed_len);
549  }
550  }
551  }
552 
553 
554  /**
555  * Homogeneous 2D transposition from Homogeneous (YZ) ordering to SEM.
556  */
558  const Array<OneD,const NekDouble> &inarray,
559  Array<OneD, NekDouble> &outarray,
560  bool UseNumMode)
561  {
562  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
563  {
564  ASSERTL0(false,
565  "Parallel transposition not implemented yet for "
566  "3D-Homo-2D approach.");
567  }
568  else
569  {
570  int i,pts_per_line;
571  int n = inarray.num_elements();
572  int packed_len;
573 
574  pts_per_line = n / (m_num_homogeneous_points[0] *
576 
577  if(UseNumMode)
578  {
579  packed_len = (m_num_homogeneous_coeffs[0] *
581  }
582  else
583  {
584  packed_len = (m_num_homogeneous_points[0] *
586  }
587 
588  ASSERTL1(&inarray[0] != &outarray[0],
589  "Inarray and outarray cannot be the same");
590 
591  for(i = 0; i < packed_len; ++i)
592  {
593  Vmath::Vcopy(pts_per_line,
594  &(inarray[i]), packed_len,
595  &(outarray[i * pts_per_line]), 1);
596  }
597  }
598  }
599 
600 
601  /**
602  * Homogeneous 2D transposition from Y ordering to Z.
603  */
605  const Array<OneD,const NekDouble> &inarray,
606  Array<OneD, NekDouble> &outarray,
607  bool UseNumMode)
608  {
609  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
610  {
611  ASSERTL0(false,
612  "Parallel transposition not implemented yet for "
613  "3D-Homo-2D approach.");
614  }
615  else
616  {
617  int n = m_num_homogeneous_points[0] *
619  int s = inarray.num_elements();
620 
621  int pts_per_line = s/n;
622 
623  int packed_len = pts_per_line * m_num_homogeneous_points[1];
624 
625  for(int i = 0; i < m_num_homogeneous_points[0] ; ++i)
626  {
627  Vmath::Vcopy(packed_len,
628  &(inarray[i]), m_num_homogeneous_points[0],
629  &(outarray[i * packed_len]), 1);
630  }
631  }
632  }
633 
634 
635  /**
636  * Homogeneous 2D transposition from Z ordering to Y.
637  */
639  const Array<OneD,const NekDouble> &inarray,
640  Array<OneD, NekDouble> &outarray,
641  bool UseNumMode)
642  {
643  if(m_num_processes[0] > 1 || m_num_processes[1] > 1)
644  {
645  ASSERTL0(false,
646  "Parallel transposition not implemented yet for "
647  "3D-Homo-2D approach.");
648  }
649  else
650  {
651  int n = m_num_homogeneous_points[0] *
653  int s = inarray.num_elements();
654 
655  int pts_per_line = s/n;
656 
657  int packed_len = pts_per_line * m_num_homogeneous_points[1];
658 
659  for(int i = 0; i < packed_len ; ++i)
660  {
661  Vmath::Vcopy(m_num_homogeneous_points[0],
662  &(inarray[i]), packed_len,
663  &(outarray[i * m_num_homogeneous_points[0]]), 1);
664  }
665  }
666  }
667 
668 
669  // TODO: Impelement 2D and 3D transposition routines
670  }
671 }
void TransposeZYtoYZ(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
#define ASSERTL0(condition, msg)
Definition: ErrorUtil.hpp:161
Array< OneD, unsigned int > GetPlanesIDs(void)
void TransposeXYtoZ(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
BasisType GetBasisType() const
Return type of expansion basis.
Definition: Basis.h:139
void TransposeYZtoX(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
Transposition(const LibUtilities::BasisKey &HomoBasis0, LibUtilities::CommSharedPtr hcomm0, LibUtilities::CommSharedPtr hcomm1)
void TransposeZtoXY(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
Array< OneD, int > m_num_homogeneous_points
Total homogeneous points per direction.
Array< OneD, int > m_OffsetMap
MPI_Alltoallv offset map of send/recv buffer in global vector.
void TransposeXtoYZ(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
Fourier Expansion .
Definition: BasisType.h:52
Array< OneD, int > m_SizeMap
MPI_Alltoallv map containing size of send/recv buffer.
Array< OneD, unsigned int > m_planes_IDs
IDs of the planes on the processes.
boost::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:53
void TransposeYZtoZY(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
Fourier Modified expansions with just the real part of the first mode .
Definition: BasisType.h:59
int GetNumPoints() const
Return points order at which basis is defined.
Definition: Basis.h:128
unsigned int m_strip_ID
IDs of the strips on the processes.
Fourier Modified expansions with just the imaginary part of the first mode .
Definition: BasisType.h:60
Array< OneD, unsigned int > m_K
Fourier wave numbers associated with the planes.
Array< OneD, int > m_num_homogeneous_coeffs
Total number of homogeneous coefficients.
Fourier ModifiedExpansion with just the first mode .
Definition: BasisType.h:58
void Transpose(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false, TranspositionDir dir=eNoTrans)
Array< OneD, unsigned int > GetKs(void)
int GetNumModes() const
Returns the order of the basis.
Definition: Basis.h:84
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode...
Definition: ErrorUtil.hpp:191
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.cpp:1047
Describes the specification for a Basis.
Definition: Basis.h:50
Array< OneD, int > m_num_points_per_proc
Number of homogeneous points on each processor per direction.