Nektar++
Transposition.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File Transposition.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: Data manager for homogeneous transpositions
32 //
33 ///////////////////////////////////////////////////////////////////////////////
34 
35 #include <boost/core/ignore_unused.hpp>
36 
38 
39 #include <LibUtilities/BasicUtils/ErrorUtil.hpp> // for ASSERTL0, etc
40 #include <LibUtilities/BasicUtils/SharedArray.hpp> // for Array
41 #include <LibUtilities/BasicUtils/Vmath.hpp> // for Vcopy
42 #include <LibUtilities/Foundations/Basis.h> // for BasisKey
44 
45 namespace Nektar
46 {
47 namespace LibUtilities
48 {
49 /**
50  * Constructor for 1D transform.
51  */
55 {
56  m_hcomm = hcomm1;
58 
63 
64  m_num_homogeneous_points[0] = HomoBasis0.GetNumPoints();
65  m_num_homogeneous_coeffs[0] = HomoBasis0.GetNumModes();
66  m_num_processes[0] = m_hcomm->GetSize();
67  m_num_points_per_proc[0] = m_num_homogeneous_points[0] / m_num_processes[0];
68  m_rank_id = m_hcomm->GetRank();
69 
70  //================================================================
71  // TODO: Need to be generalised for 1D, 2D and 3D
74 
75  for (int i = 0; i < m_num_points_per_proc[0]; i++)
76  {
77  m_planes_IDs[i] = m_rank_id * m_num_points_per_proc[0] + i;
78  }
79 
80  int global_rank_id = hcomm0->GetColumnComm()->GetRank();
81  int NumStrips = hcomm0->GetColumnComm()->GetSize() / m_hcomm->GetSize();
82  m_strip_ID = 0;
83 
84  if (NumStrips > 1)
85  {
86  m_strip_ID = (NumStrips > global_rank_id)
87  ? global_rank_id
88  : (global_rank_id - NumStrips);
89  }
90 
91  if (HomoBasis0.GetBasisType() == LibUtilities::eFourier)
92  {
93  for (int i = 0; i < m_num_points_per_proc[0]; i++)
94  {
95  m_K[i] = m_planes_IDs[i] / 2;
96  }
97  }
98 
100  {
101  m_K[0] = 1;
102  m_K[1] = 1;
103  }
104 
105  if (HomoBasis0.GetBasisType() == LibUtilities::eFourierHalfModeRe ||
107  {
108  m_K[0] = 1;
109  }
110  //================================================================
111 }
112 
113 /**
114  * Constructor for 2D transform.
115  */
117  const LibUtilities::BasisKey &HomoBasis1,
119 {
120  m_hcomm = hcomm;
122 
127 
128  m_num_homogeneous_points[0] = HomoBasis0.GetNumPoints();
129  m_num_homogeneous_coeffs[0] = HomoBasis0.GetNumModes();
130  m_num_homogeneous_points[1] = HomoBasis1.GetNumPoints();
131  m_num_homogeneous_coeffs[1] = HomoBasis1.GetNumModes();
132 
133  m_num_processes[0] = m_hcomm->GetRowComm()->GetSize();
134  m_num_processes[1] = m_hcomm->GetColumnComm()->GetSize();
135 
136  m_num_points_per_proc[0] = m_num_homogeneous_points[0] / m_num_processes[0];
137  m_num_points_per_proc[1] = m_num_homogeneous_points[1] / m_num_processes[1];
138 
139  //================================================================
140  // TODO: Need set up for 2D lines IDs and Ks if Fourier
141  //================================================================
142 }
143 
144 /**
145  * Constructor for 3D transform.
146  */
148  const LibUtilities::BasisKey &HomoBasis1,
149  const LibUtilities::BasisKey &HomoBasis2,
151 {
152  boost::ignore_unused(HomoBasis0, HomoBasis1, HomoBasis2);
153 
154  m_hcomm = hcomm;
156 
161 
162  //================================================================
163  // TODO: Need set up for 3D
164  ASSERTL0(false, "Transposition is not set up for 3D.");
165  //================================================================
166 }
167 
168 /**
169  * Destructor
170  */
172 {
173 }
174 
175 //====================================================================
176 // TODO: Need to generalise the following methods for 1D, 2D and 3D
177 unsigned int Transposition::GetK(int i)
178 {
179  return m_K[i];
180 }
181 
183 {
184  return m_K;
185 }
186 
187 unsigned int Transposition::GetPlaneID(int i)
188 {
189  return m_planes_IDs[i];
190 }
191 
193 {
194  return m_planes_IDs;
195 }
196 
197 unsigned int Transposition::GetStripID(void)
198 {
199  return m_strip_ID;
200 }
201 
202 /**
203  * Main method: General transposition, the dir parameters define if
204  * 1D,2D,3D and which transposition is required at the same time
205  */
207  Array<OneD, NekDouble> &outarray, bool UseNumMode,
208  TranspositionDir dir)
209 {
210  switch (dir)
211  {
212  case eXYtoZ:
213  {
214  TransposeXYtoZ(inarray, outarray, UseNumMode);
215  }
216  break;
217  case eZtoXY:
218  {
219  TransposeZtoXY(inarray, outarray, UseNumMode);
220  }
221  break;
222  case eXtoYZ:
223  {
224  TransposeXtoYZ(inarray, outarray, UseNumMode);
225  }
226  break;
227  case eYZtoX:
228  {
229  TransposeYZtoX(inarray, outarray, UseNumMode);
230  }
231  break;
232  case eYZtoZY:
233  {
234  TransposeYZtoZY(inarray, outarray, UseNumMode);
235  }
236  break;
237  case eZYtoYZ:
238  {
239  TransposeZYtoYZ(inarray, outarray, UseNumMode);
240  }
241  break;
242  case eXtoY:
243  {
244  ASSERTL0(false, "Transposition not implemented yet.");
245  }
246  break;
247  case eYtoZ:
248  {
249  ASSERTL0(false, "Transposition not implemented yet.");
250  }
251  break;
252  case eZtoX:
253  {
254  ASSERTL0(false, "Transposition not implemented yet.");
255  }
256  break;
257  default:
258  {
259  ASSERTL0(false, "Transposition type does not exist.");
260  }
261  }
262 }
263 
264 /**
265  * Homogeneous 1D transposition from SEM to Homogeneous ordering.
266  */
268  Array<OneD, NekDouble> &outarray,
269  bool UseNumMode)
270 {
271  if (m_num_processes[0] > 1)
272  {
273  // Paramerers set up
274  int i, packed_len;
275  int copy_len = 0;
276  int index = 0;
277  int cnt = 0;
278 
279  int num_dofs = inarray.num_elements();
280  int num_points_per_plane = num_dofs / m_num_points_per_proc[0];
281  int num_pencil_per_proc =
282  (num_points_per_plane / m_num_processes[0]) +
283  (num_points_per_plane % m_num_processes[0] > 0);
284 
286  m_OffsetMap = Array<OneD, int>(m_num_processes[0], 0);
287 
288  for (i = 0; i < m_num_processes[0]; i++)
289  {
290  m_SizeMap[i] = num_pencil_per_proc * m_num_points_per_proc[0];
291  m_OffsetMap[i] = i * num_pencil_per_proc * m_num_points_per_proc[0];
292  }
293 
294  Array<OneD, NekDouble> tmp_outarray(
295  num_pencil_per_proc * m_num_homogeneous_points[0], 0.0);
296 
297  if (UseNumMode)
298  {
299  packed_len = m_num_homogeneous_coeffs[0];
300  }
301  else
302  {
303  packed_len = m_num_homogeneous_points[0];
304  }
305 
306  // Start Transposition
307  while (index < num_points_per_plane)
308  {
309  copy_len = num_pencil_per_proc < (num_points_per_plane - index)
310  ? num_pencil_per_proc
311  : (num_points_per_plane - index);
312 
313  for (i = 0; i < m_num_points_per_proc[0]; i++)
314  {
315  Vmath::Vcopy(copy_len,
316  &(inarray[index + (i * num_points_per_plane)]), 1,
317  &(outarray[cnt]), 1);
318 
319  cnt += num_pencil_per_proc;
320  }
321 
322  index += copy_len;
323  }
324 
325  m_hcomm->AlltoAllv(outarray, m_SizeMap, m_OffsetMap, tmp_outarray,
326  m_SizeMap, m_OffsetMap);
327 
328  for (i = 0; i < packed_len; ++i)
329  {
330  Vmath::Vcopy(num_pencil_per_proc,
331  &(tmp_outarray[i * num_pencil_per_proc]), 1,
332  &(outarray[i]), packed_len);
333  }
334  // End Transposition
335  }
336 
337  // Serial case implementation (more efficient then MPI 1 processor
338  // implemenation)
339  else
340  {
341  int i, pts_per_plane;
342  int n = inarray.num_elements();
343  int packed_len;
344 
345  pts_per_plane = n / m_num_points_per_proc[0];
346 
347  if (UseNumMode)
348  {
349  packed_len = m_num_homogeneous_coeffs[0];
350  }
351  else
352  {
353  packed_len = m_num_homogeneous_points[0];
354  }
355 
356  ASSERTL1(&inarray[0] != &outarray[0],
357  "Inarray and outarray cannot be the same");
358 
359  for (i = 0; i < packed_len; ++i)
360  {
361  Vmath::Vcopy(pts_per_plane, &(inarray[i * pts_per_plane]), 1,
362  &(outarray[i]), packed_len);
363  }
364  }
365 }
366 
367 /**
368  * Homogeneous 1D transposition from Homogeneous to SEM ordering.
369  */
371  Array<OneD, NekDouble> &outarray,
372  bool UseNumMode)
373 {
374  if (m_num_processes[0] > 1)
375  {
376  // Paramerers set up
377  int i, packed_len;
378  int copy_len = 0;
379  int index = 0;
380  int cnt = 0;
381 
382  int num_dofs = outarray.num_elements();
383  int num_points_per_plane = num_dofs / m_num_points_per_proc[0];
384  int num_pencil_per_proc =
385  (num_points_per_plane / m_num_processes[0]) +
386  (num_points_per_plane % m_num_processes[0] > 0);
387 
389  m_OffsetMap = Array<OneD, int>(m_num_processes[0], 0);
390 
391  for (i = 0; i < m_num_processes[0]; i++)
392  {
393  m_SizeMap[i] = num_pencil_per_proc * m_num_points_per_proc[0];
394  m_OffsetMap[i] = i * num_pencil_per_proc * m_num_points_per_proc[0];
395  }
396 
397  Array<OneD, NekDouble> tmp_inarray(
398  num_pencil_per_proc * m_num_homogeneous_points[0], 0.0);
399  Array<OneD, NekDouble> tmp_outarray(
400  num_pencil_per_proc * m_num_homogeneous_points[0], 0.0);
401 
402  if (UseNumMode)
403  {
404  packed_len = m_num_homogeneous_coeffs[0];
405  }
406  else
407  {
408  packed_len = m_num_homogeneous_points[0];
409  }
410 
411  // Start Transposition
412  for (i = 0; i < packed_len; ++i)
413  {
414  Vmath::Vcopy(num_pencil_per_proc, &(inarray[i]), packed_len,
415  &(tmp_inarray[i * num_pencil_per_proc]), 1);
416  }
417 
418  m_hcomm->AlltoAllv(tmp_inarray, m_SizeMap, m_OffsetMap, tmp_outarray,
419  m_SizeMap, m_OffsetMap);
420 
421  while (index < num_points_per_plane)
422  {
423  copy_len = num_pencil_per_proc < (num_points_per_plane - index)
424  ? num_pencil_per_proc
425  : (num_points_per_plane - index);
426 
427  for (i = 0; i < m_num_points_per_proc[0]; i++)
428  {
429  Vmath::Vcopy(copy_len, &(tmp_outarray[cnt]), 1,
430  &(outarray[index + (i * num_points_per_plane)]),
431  1);
432 
433  cnt += num_pencil_per_proc;
434  }
435 
436  index += copy_len;
437  }
438  // End Transposition
439  }
440 
441  // Serial case implementation (more efficient then MPI 1 processor
442  // implemenation)
443  else
444  {
445  int i, pts_per_plane;
446  int n = inarray.num_elements();
447  int packed_len;
448 
449  // use length of inarray to determine data storage type
450  // (i.e.modal or physical).
451  pts_per_plane = n / m_num_points_per_proc[0];
452 
453  if (UseNumMode)
454  {
455  packed_len = m_num_homogeneous_coeffs[0];
456  }
457  else
458  {
459  packed_len = m_num_homogeneous_points[0];
460  }
461 
462  ASSERTL1(&inarray[0] != &outarray[0],
463  "Inarray and outarray cannot be the same");
464 
465  for (i = 0; i < packed_len; ++i)
466  {
467  Vmath::Vcopy(pts_per_plane, &(inarray[i]), packed_len,
468  &(outarray[i * pts_per_plane]), 1);
469  }
470  }
471 }
472 
473 /**
474  * Homogeneous 2D transposition from SEM to Homogeneous(YZ) ordering.
475  */
477  Array<OneD, NekDouble> &outarray,
478  bool UseNumMode)
479 {
480  if (m_num_processes[0] > 1 || m_num_processes[1] > 1)
481  {
482  ASSERTL0(false, "Parallel transposition not implemented yet for "
483  "3D-Homo-2D approach.");
484  }
485  else
486  {
487  int i, pts_per_line;
488  int n = inarray.num_elements();
489  int packed_len;
490 
491  pts_per_line =
493 
494  if (UseNumMode)
495  {
496  packed_len =
498  }
499  else
500  {
501  packed_len =
503  }
504 
505  ASSERTL1(&inarray[0] != &outarray[0],
506  "Inarray and outarray cannot be the same");
507 
508  for (i = 0; i < packed_len; ++i)
509  {
510  Vmath::Vcopy(pts_per_line, &(inarray[i * pts_per_line]), 1,
511  &(outarray[i]), packed_len);
512  }
513  }
514 }
515 
516 /**
517  * Homogeneous 2D transposition from Homogeneous (YZ) ordering to SEM.
518  */
520  Array<OneD, NekDouble> &outarray,
521  bool UseNumMode)
522 {
523  if (m_num_processes[0] > 1 || m_num_processes[1] > 1)
524  {
525  ASSERTL0(false, "Parallel transposition not implemented yet for "
526  "3D-Homo-2D approach.");
527  }
528  else
529  {
530  int i, pts_per_line;
531  int n = inarray.num_elements();
532  int packed_len;
533 
534  pts_per_line =
536 
537  if (UseNumMode)
538  {
539  packed_len =
541  }
542  else
543  {
544  packed_len =
546  }
547 
548  ASSERTL1(&inarray[0] != &outarray[0],
549  "Inarray and outarray cannot be the same");
550 
551  for (i = 0; i < packed_len; ++i)
552  {
553  Vmath::Vcopy(pts_per_line, &(inarray[i]), packed_len,
554  &(outarray[i * pts_per_line]), 1);
555  }
556  }
557 }
558 
559 /**
560  * Homogeneous 2D transposition from Y ordering to Z.
561  */
563  Array<OneD, NekDouble> &outarray,
564  bool UseNumMode)
565 {
566  boost::ignore_unused(UseNumMode);
567 
568  if (m_num_processes[0] > 1 || m_num_processes[1] > 1)
569  {
570  ASSERTL0(false, "Parallel transposition not implemented yet for "
571  "3D-Homo-2D approach.");
572  }
573  else
574  {
576  int s = inarray.num_elements();
577 
578  int pts_per_line = s / n;
579 
580  int packed_len = pts_per_line * m_num_homogeneous_points[1];
581 
582  for (int i = 0; i < m_num_homogeneous_points[0]; ++i)
583  {
584  Vmath::Vcopy(packed_len, &(inarray[i]), m_num_homogeneous_points[0],
585  &(outarray[i * packed_len]), 1);
586  }
587  }
588 }
589 
590 /**
591  * Homogeneous 2D transposition from Z ordering to Y.
592  */
594  Array<OneD, NekDouble> &outarray,
595  bool UseNumMode)
596 {
597  boost::ignore_unused(UseNumMode);
598 
599  if (m_num_processes[0] > 1 || m_num_processes[1] > 1)
600  {
601  ASSERTL0(false, "Parallel transposition not implemented yet for "
602  "3D-Homo-2D approach.");
603  }
604  else
605  {
607  int s = inarray.num_elements();
608 
609  int pts_per_line = s / n;
610 
611  int packed_len = pts_per_line * m_num_homogeneous_points[1];
612 
613  for (int i = 0; i < packed_len; ++i)
614  {
615  Vmath::Vcopy(m_num_homogeneous_points[0], &(inarray[i]), packed_len,
616  &(outarray[i * m_num_homogeneous_points[0]]), 1);
617  }
618  }
619 }
620 
621 // TODO: Impelement 2D and 3D transposition routines
622 }
623 }
int GetNumPoints() const
Return points order at which basis is defined.
Definition: Basis.h:133
Array< OneD, int > m_num_homogeneous_points
Total homogeneous points per direction.
void TransposeZYtoYZ(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
#define ASSERTL0(condition, msg)
Definition: ErrorUtil.hpp:216
Array< OneD, unsigned int > GetPlanesIDs(void)
Array< OneD, int > m_num_points_per_proc
Number of homogeneous points on each processor per direction.
void TransposeXYtoZ(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
void TransposeYZtoX(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
Transposition(const LibUtilities::BasisKey &HomoBasis0, LibUtilities::CommSharedPtr hcomm0, LibUtilities::CommSharedPtr hcomm1)
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:53
BasisType GetBasisType() const
Return type of expansion basis.
Definition: Basis.h:144
void TransposeZtoXY(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
void TransposeXtoYZ(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
Fourier Expansion .
Definition: BasisType.h:53
Array< OneD, unsigned int > m_planes_IDs
IDs of the planes on the processes.
void TransposeYZtoZY(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false)
int GetNumModes() const
Returns the order of the basis.
Definition: Basis.h:86
Fourier Modified expansions with just the real part of the first mode .
Definition: BasisType.h:60
Array< OneD, int > m_SizeMap
MPI_Alltoallv map containing size of send/recv buffer.
unsigned int m_strip_ID
IDs of the strips on the processes.
Fourier Modified expansions with just the imaginary part of the first mode .
Definition: BasisType.h:61
Array< OneD, unsigned int > m_K
Fourier wave numbers associated with the planes.
Fourier ModifiedExpansion with just the first mode .
Definition: BasisType.h:59
void Transpose(const Array< OneD, const NekDouble > &inarray, Array< OneD, NekDouble > &outarray, bool UseNumMode=false, TranspositionDir dir=eNoTrans)
Array< OneD, int > m_OffsetMap
MPI_Alltoallv offset map of send/recv buffer in global vector.
Array< OneD, unsigned int > GetKs(void)
Array< OneD, int > m_num_homogeneous_coeffs
Total number of homogeneous coefficients.
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode...
Definition: ErrorUtil.hpp:250
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.cpp:1064
Describes the specification for a Basis.
Definition: Basis.h:49