Nektar++
AssemblyCommDG.cpp
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // File: AssemblyCommDG.cpp
4 //
5 // For more information, please see: http://www.nektar.info/
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: Parallel communication methods for DG with MPI
32 //
33 ////////////////////////////////////////////////////////////////////////////////
34 
38 
39 #include <utility>
40 
41 namespace Nektar
42 {
43 namespace MultiRegions
44 {
45 
46 AllToAll::AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad,
47  const int &nRanks,
48  const std::map<int, std::vector<int>> &rankSharedEdges,
49  const std::map<int, std::vector<int>> &edgeToTrace)
50  : m_comm(comm), m_maxQuad(maxQuad), m_nRanks(nRanks)
51 {
52  for (size_t i = 0; i < nRanks; ++i)
53  {
54  if (rankSharedEdges.find(i) != rankSharedEdges.end())
55  {
56  m_maxCount = (rankSharedEdges.at(i).size() > m_maxCount)
57  ? static_cast<int>(rankSharedEdges.at(i).size())
58  : m_maxCount;
59  }
60  }
61 
62  comm->AllReduce(m_maxCount, LibUtilities::ReduceMax);
63 
64  // Creates the edge index vector where value -1 indicates
65  // padding of value 0 to be inserted instead of value from Fwd
66  for (size_t i = 0; i < nRanks; ++i)
67  {
68  if (rankSharedEdges.find(i) != rankSharedEdges.end())
69  {
70  for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
71  {
72  std::vector<int> edgeIndex =
73  edgeToTrace.at(rankSharedEdges.at(i)[j]);
74  if (edgeIndex.size() < maxQuad)
75  {
76  std::vector<int> diff(maxQuad - edgeIndex.size(), -1);
77  edgeIndex.insert(edgeIndex.end(), diff.begin(), diff.end());
78  }
79 
80  m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
81  edgeIndex.end());
82  }
83 
84  if (rankSharedEdges.at(i).size() < m_maxCount)
85  {
86  std::vector<int> edgeIndex(
87  maxQuad * (m_maxCount - rankSharedEdges.at(i).size()), -1);
88  m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
89  edgeIndex.end());
90  }
91  }
92  else
93  {
94  std::vector<int> edgeIndex(maxQuad * m_maxCount, -1);
95  m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
96  edgeIndex.end());
97  }
98  }
99 }
100 
102  const std::map<int, std::vector<int>> &rankSharedEdges,
103  const std::map<int, std::vector<int>> &edgeToTrace,
104  const int &nRanks)
105  : m_comm(comm)
106 {
107  m_allVSendCount = Array<OneD, int>(nRanks, 0);
108  for (size_t i = 0; i < nRanks; ++i)
109  {
110  if (rankSharedEdges.find(i) != rankSharedEdges.end())
111  {
112  for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
113  {
114  std::vector<int> edgeIndex =
115  edgeToTrace.at(rankSharedEdges.at(i)[j]);
116  m_allVEdgeIndex.insert(m_allVEdgeIndex.end(), edgeIndex.begin(),
117  edgeIndex.end());
118  m_allVSendCount[i] += edgeIndex.size();
119  }
120  }
121  else
122  {
123  m_allVSendCount[i] = 0;
124  }
125  }
126 
127  m_allVSendDisp = Array<OneD, int>(nRanks, 0);
128  for (size_t i = 1; i < nRanks; ++i)
129  {
130  m_allVSendDisp[i] = m_allVSendDisp[i - 1] + m_allVSendCount[i - 1];
131  }
132 }
133 
135  const LibUtilities::CommSharedPtr &comm,
136  const std::map<int, std::vector<int>> &rankSharedEdges,
137  const std::map<int, std::vector<int>> &edgeToTrace)
138  : m_comm(comm)
139 {
140  int nNeighbours = rankSharedEdges.size();
141  Array<OneD, int> destinations(nNeighbours, 0);
142  Array<OneD, int> weights(nNeighbours, 0);
143  int cnt = 0;
144  for (auto &rankEdgeVec : rankSharedEdges)
145  {
146  destinations[cnt] = rankEdgeVec.first;
147  weights[cnt] = rankEdgeVec.second.size();
148  ++cnt;
149  }
150 
151  comm->DistGraphCreateAdjacent(destinations, weights, 1);
152 
153  // Setting up indices
154  m_sendCount = Array<OneD, int>(nNeighbours, 0);
155  cnt = 0;
156 
157  for (const auto &rankEdgeSet : rankSharedEdges)
158  {
159  for (size_t i : rankEdgeSet.second)
160  {
161  std::vector<int> edgeIndex = edgeToTrace.at(i);
162  m_edgeTraceIndex.insert(m_edgeTraceIndex.end(), edgeIndex.begin(),
163  edgeIndex.end());
164  m_sendCount[cnt] += edgeIndex.size();
165  }
166 
167  ++cnt;
168  }
169 
170  m_sendDisp = Array<OneD, int>(nNeighbours, 0);
171  for (size_t i = 1; i < nNeighbours; ++i)
172  {
173  m_sendDisp[i] = m_sendDisp[i - 1] + m_sendCount[i - 1];
174  }
175 }
176 
178  const std::map<int, std::vector<int>> &rankSharedEdges,
179  const std::map<int, std::vector<int>> &edgeToTrace)
180  : m_comm(comm)
181 {
182  int cnt = 0;
183  int nNeighbours = rankSharedEdges.size();
184  Array<OneD, int> sendCount(nNeighbours, -1);
185 
186  // List of partition to trace map indices of the quad points to exchange
187  std::vector<std::pair<int, std::vector<int>>> vecPairPartitionTrace;
188  for (const auto &rankEdgeSet : rankSharedEdges)
189  {
190  std::vector<int> edgeTraceIndex;
191  for (size_t i : rankEdgeSet.second)
192  {
193  std::vector<int> edgeIndex = edgeToTrace.at(i);
194  edgeTraceIndex.insert(edgeTraceIndex.end(), edgeIndex.begin(),
195  edgeIndex.end());
196  }
197 
198  vecPairPartitionTrace.emplace_back(
199  std::make_pair(rankEdgeSet.first, edgeTraceIndex));
200 
201  sendCount[cnt++] = edgeTraceIndex.size();
202  }
203 
204  Array<OneD, int> sendDisp(nNeighbours, 0);
205  for (size_t i = 1; i < nNeighbours; ++i)
206  {
207  sendDisp[i] = sendDisp[i - 1] + sendCount[i - 1];
208  }
209 
210  size_t totSends = std::accumulate(sendCount.begin(), sendCount.end(), 0);
211 
212  m_recvBuff = Array<OneD, NekDouble>(totSends, -1);
213  m_sendBuff = Array<OneD, NekDouble>(totSends, -1);
214  m_edgeTraceIndex = Array<OneD, int>(totSends, -1);
215 
216  // Set up m_edgeTraceIndex to reduce complexity when performing exchange
217  cnt = 0;
218  for (auto &i : vecPairPartitionTrace)
219  {
220  for (auto j : i.second)
221  {
222  m_edgeTraceIndex[cnt++] = j;
223  }
224  }
225 
226  m_recvRequest = m_comm->CreateRequest(vecPairPartitionTrace.size());
227  m_sendRequest = m_comm->CreateRequest(vecPairPartitionTrace.size());
228 
229  // Construct persistent requests
230  for (size_t i = 0; i < vecPairPartitionTrace.size(); ++i)
231  {
232  size_t len = vecPairPartitionTrace[i].second.size();
233 
234  // Initialise receive requests
235  m_comm->RecvInit(vecPairPartitionTrace[i].first,
236  m_recvBuff[sendDisp[i]], len, m_recvRequest, i);
237 
238  // Initialise send requests
239  m_comm->SendInit(vecPairPartitionTrace[i].first,
240  m_sendBuff[sendDisp[i]], len, m_sendRequest, i);
241  }
242 }
243 
245  Array<OneD, NekDouble> &testBwd)
246 {
247  int size = m_maxQuad * m_maxCount * m_nRanks;
248  Array<OneD, NekDouble> sendBuff(size, -1);
249  Array<OneD, NekDouble> recvBuff(size, -1);
250 
251  for (size_t j = 0; j < size; ++j)
252  {
253  if (m_allEdgeIndex[j] == -1)
254  {
255  sendBuff[j] = 0;
256  }
257  else
258  {
259  sendBuff[j] = testFwd[m_allEdgeIndex[j]];
260  }
261  }
262 
263  m_comm->AlltoAll(sendBuff, recvBuff);
264 
265  for (size_t j = 0; j < size; ++j)
266  {
267  if (m_allEdgeIndex[j] != -1)
268  {
269  testBwd[m_allEdgeIndex[j]] = recvBuff[j];
270  }
271  }
272 }
273 
275  Array<OneD, NekDouble> &testBwd)
276 {
277  Array<OneD, NekDouble> sendBuff(m_allVEdgeIndex.size(), -1);
278  Array<OneD, NekDouble> recvBuff(m_allVEdgeIndex.size(), -1);
279 
280  for (size_t i = 0; i < m_allVEdgeIndex.size(); ++i)
281  {
282  sendBuff[i] = testFwd[m_allVEdgeIndex[i]];
283  }
284 
285  m_comm->AlltoAllv(sendBuff, m_allVSendCount, m_allVSendDisp, recvBuff,
287 
288  for (size_t i = 0; i < m_allVEdgeIndex.size(); ++i)
289  {
290  testBwd[m_allVEdgeIndex[i]] = recvBuff[i];
291  }
292 }
293 
295  Array<OneD, NekDouble> &testBwd)
296 {
297  Array<OneD, NekDouble> sendBuff(m_edgeTraceIndex.size(), -1);
298  Array<OneD, NekDouble> recvBuff(m_edgeTraceIndex.size(), -1);
299  for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
300  {
301  sendBuff[i] = testFwd[m_edgeTraceIndex[i]];
302  }
303 
304  m_comm->NeighborAlltoAllv(sendBuff, m_sendCount, m_sendDisp, recvBuff,
306 
307  for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
308  {
309  testBwd[m_edgeTraceIndex[i]] = recvBuff[i];
310  }
311 }
312 
314  Array<OneD, NekDouble> &testBwd)
315 {
316  // Perform receive posts
317  m_comm->StartAll(m_recvRequest);
318 
319  // Fill send buffer from Fwd trace
320  for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
321  {
322  m_sendBuff[i] = testFwd[m_edgeTraceIndex[i]];
323  }
324 
325  // Perform send posts
326  m_comm->StartAll(m_sendRequest);
327 
328  // Wait for all send/recvs to complete
329  m_comm->WaitAll(m_sendRequest);
330  m_comm->WaitAll(m_recvRequest);
331 
332  // Fill Bwd trace from recv buffer
333  for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
334  {
335  testBwd[m_edgeTraceIndex[i]] = m_recvBuff[i];
336  }
337 }
338 
340  const ExpList &locExp, const ExpListSharedPtr &trace,
342  &elmtToTrace,
343  const Array<OneD, const ExpListSharedPtr> &bndCondExp,
345  const PeriodicMap &perMap)
346 {
347  auto comm = locExp.GetComm()->GetRowComm();
348 
349  // If serial then skip initialising graph structure and the MPI timing
350  if (comm->IsSerial())
351  {
352  m_exchange =
354  }
355  else
356  {
357  // Initialise graph structure and link processes across partition
358  // boundaries
359  AssemblyCommDG::InitialiseStructure(locExp, trace, elmtToTrace,
360  bndCondExp, bndCond, perMap, comm);
361 
362  // Timing MPI comm methods, warm up with 10 iterations then time over 50
363  std::vector<ExchangeMethodSharedPtr> MPIFuncs;
364  std::vector<std::string> MPIFuncsNames;
365 
366  // Toggle off AllToAll/AllToAllV methods if cores greater than 16 for
367  // performance reasons unless override solver info parameter is present
368  if (locExp.GetSession()->MatchSolverInfo("OverrideMPI", "ON") ||
369  m_nRanks <= 16)
370  {
371  MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
374  m_edgeToTrace)));
375  MPIFuncsNames.emplace_back("AllToAll");
376 
377  MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
380  MPIFuncsNames.emplace_back("AllToAllV");
381  }
382 
383  MPIFuncs.emplace_back(
386  MPIFuncsNames.emplace_back("PairwiseSendRecv");
387 
388  // Disable neighbor MPI method on unsupported MPI version (below 3.0)
389  if (std::get<0>(comm->GetVersion()) >= 3)
390  {
391  MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
394  MPIFuncsNames.emplace_back("NeighborAllToAllV");
395  }
396 
397  int numPoints = trace->GetNpoints();
398  int warmup = 10, iter = 50;
399  NekDouble min, max;
400  std::vector<NekDouble> avg(MPIFuncs.size(), -1);
401  bool verbose = locExp.GetSession()->DefinesCmdLineArgument("verbose");
402 
403  if (verbose && comm->TreatAsRankZero())
404  {
405  std::cout << "MPI setup for trace exchange: " << std::endl;
406  }
407 
408  // Padding for output
409  int maxStrLen = 0;
410  for (size_t i = 0; i < MPIFuncs.size(); ++i)
411  {
412  maxStrLen = MPIFuncsNames[i].size() > maxStrLen
413  ? MPIFuncsNames[i].size()
414  : maxStrLen;
415  }
416 
417  for (size_t i = 0; i < MPIFuncs.size(); ++i)
418  {
419  Timing(comm, warmup, numPoints, MPIFuncs[i]);
420  std::tie(avg[i], min, max) =
421  Timing(comm, iter, numPoints, MPIFuncs[i]);
422  if (verbose && comm->TreatAsRankZero())
423  {
424  std::cout << " " << MPIFuncsNames[i]
425  << " times (avg, min, max)"
426  << std::string(maxStrLen - MPIFuncsNames[i].size(),
427  ' ')
428  << ": " << avg[i] << " " << min << " " << max
429  << std::endl;
430  }
431  }
432 
433  // Gets the fastest MPI method
434  int fastestMPI = std::distance(
435  avg.begin(), std::min_element(avg.begin(), avg.end()));
436 
437  if (verbose && comm->TreatAsRankZero())
438  {
439  std::cout << " Chosen fastest method: "
440  << MPIFuncsNames[fastestMPI] << std::endl;
441  }
442 
443  m_exchange = MPIFuncs[fastestMPI];
444  }
445 }
446 
447 /**
448  * This function sets up the initial structure to allow for the exchange methods
449  * to be created. This structure is contained within the member variable
450  * #m_rankSharedEdges which is a map of rank to vector of the shared edges with
451  * that rank. This is filled by:
452  *
453  * - Create an edge to trace mapping, and realign periodic edges within this
454  * mapping so that they have the same data layout for ranks sharing periodic
455  * boundaries. - Create a list of all local edge IDs and calculate the maximum
456  * number of quadrature points used locally, then perform an AllReduce to find
457  * the maximum number of quadrature points across all ranks (for the AllToAll
458  * method). - Create a list of all boundary edge IDs except for those which are
459  * periodic - Using the boundary ID list, and all local ID list we can construct
460  * a unique list of IDs which are on a partition boundary (e.g. if doesn't occur
461  * in the local list twice, and doesn't occur in the boundary list it is on a
462  * partition boundary). We also check, if it is a periodic edge, whether the
463  * other side is local, if not we add the minimum of thetwo periodic IDs to the
464  * unique list as we must have a consistent numbering scheme across ranks. - We
465  * send the unique list to all other ranks/partitions. Each ranks unique list is
466  * then compared with the local unique edge ID list, if a match is found then
467  * the member variable #m_rankSharedEdges is filled with the matching rank and
468  * unique edge ID.
469  */
471  const ExpList &locExp, const ExpListSharedPtr &trace,
473  &elmtToTrace,
474  const Array<OneD, const ExpListSharedPtr> &bndCondExp,
476  const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
477 {
478  Array<OneD, int> tmp;
479  int quad = 0, nDim = 0, eid = 0, offset = 0;
480  const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp());
481 
482  // Assume that each element of the expansion is of the same
483  // dimension.
484  nDim = locExpVector[0]->GetShapeDimension();
485 
486  // This sets up the edge to trace mapping and realigns periodic edges
487  if (nDim == 1)
488  {
489  for (size_t i = 0; i < trace->GetExpSize(); ++i)
490  {
491  eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
492  offset = trace->GetPhys_Offset(i);
493 
494  // Check to see if this vert is periodic. If it is, then we
495  // need use the unique eid of the two points
496  auto it = perMap.find(eid);
497  if (perMap.count(eid) > 0)
498  {
499  PeriodicEntity ent = it->second[0];
500  if (!ent.isLocal) // Not sure if true in 1D
501  {
502  eid = std::min(eid, ent.id);
503  }
504  }
505 
506  m_edgeToTrace[eid].emplace_back(offset);
507  }
508  }
509  else
510  {
511  for (size_t i = 0; i < trace->GetExpSize(); ++i)
512  {
513  eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
514  offset = trace->GetPhys_Offset(i);
515  quad = trace->GetExp(i)->GetTotPoints();
516 
517  // Check to see if this edge is periodic. If it is, then we
518  // need to reverse the trace order of one edge only in the
519  // edge to trace map so that the data are reversed w.r.t each
520  // other. We do this by using the minimum of the two IDs.
521  auto it = perMap.find(eid);
522  bool realign = false;
523  if (perMap.count(eid) > 0)
524  {
525  PeriodicEntity ent = it->second[0];
526  if (!ent.isLocal)
527  {
528  realign = eid == std::min(eid, ent.id);
529  eid = std::min(eid, ent.id);
530  }
531  }
532 
533  for (size_t j = 0; j < quad; ++j)
534  {
535  m_edgeToTrace[eid].emplace_back(offset + j);
536  }
537 
538  if (realign)
539  {
540  // Realign some periodic edges in m_edgeToTrace
541  Array<OneD, int> tmpArray(m_edgeToTrace[eid].size());
542  for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
543  {
544  tmpArray[j] = m_edgeToTrace[eid][j];
545  }
546 
547  StdRegions::Orientation orient = it->second[0].orient;
548 
549  if (nDim == 2)
550  {
551  AssemblyMapDG::RealignTraceElement(tmpArray, orient, quad);
552  }
553  else
554  {
555  // Orient is going from face 2 -> face 1 but we want face 1
556  // -> face 2; in all cases except below these are
557  // equivalent. However below is not equivalent so we use the
558  // reverse of the mapping.
560  {
562  }
563  else if (orient == StdRegions::eDir1BwdDir2_Dir2FwdDir1)
564  {
566  }
567 
569  tmpArray, orient, trace->GetExp(i)->GetNumPoints(0),
570  trace->GetExp(i)->GetNumPoints(1));
571  }
572 
573  for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
574  {
575  m_edgeToTrace[eid][j] = tmpArray[j];
576  }
577  }
578  }
579  }
580 
581  // This creates a list of all geometry of problem dimension - 1
582  // and populates the maxQuad member variable for AllToAll
583  std::vector<int> localEdgeIds;
584  for (eid = 0; eid < locExpVector.size(); ++eid)
585  {
586  LocalRegions::ExpansionSharedPtr locExpansion = locExpVector[eid];
587 
588  for (size_t j = 0; j < locExpansion->GetNtraces(); ++j)
589  {
590  int id = elmtToTrace[eid][j]->GetGeom()->GetGlobalID();
591  localEdgeIds.emplace_back(id);
592  }
593 
594  quad = locExpansion->GetTotPoints();
595  if (quad > m_maxQuad)
596  {
597  m_maxQuad = quad;
598  }
599  }
600 
601  // Find max quadrature points across all processes for AllToAll method
602  comm->AllReduce(m_maxQuad, LibUtilities::ReduceMax);
603 
604  // Create list of boundary edge IDs
605  std::set<int> bndIdList;
606  for (size_t i = 0; i < bndCond.size(); ++i)
607  {
608  // Don't add if periodic boundary type
609  if ((bndCond[i]->GetBoundaryConditionType() ==
611  {
612  continue;
613  }
614  else
615  {
616  for (size_t j = 0; j < bndCondExp[i]->GetExpSize(); ++j)
617  {
618  eid = bndCondExp[i]->GetExp(j)->GetGeom()->GetGlobalID();
619  bndIdList.insert(eid);
620  }
621  }
622  }
623 
624  // Get unique edges to send
625  std::vector<int> uniqueEdgeIds;
626  std::vector<bool> duplicated(localEdgeIds.size(), false);
627  for (size_t i = 0; i < localEdgeIds.size(); ++i)
628  {
629  eid = localEdgeIds[i];
630  for (size_t j = i + 1; j < localEdgeIds.size(); ++j)
631  {
632  if (eid == localEdgeIds[j])
633  {
634  duplicated[i] = duplicated[j] = true;
635  }
636  }
637 
638  if (!duplicated[i]) // Not duplicated in local partition
639  {
640  if (bndIdList.find(eid) == bndIdList.end()) // Not a boundary edge
641  {
642  // Check if periodic and if not local set eid to other side
643  auto it = perMap.find(eid);
644  if (it != perMap.end())
645  {
646  if (!it->second[0].isLocal)
647  {
648  uniqueEdgeIds.emplace_back(
649  std::min(eid, it->second[0].id));
650  }
651  }
652  else
653  {
654  uniqueEdgeIds.emplace_back(eid);
655  }
656  }
657  }
658  }
659 
660  // Send uniqueEdgeIds size so all partitions can prepare buffers
661  m_nRanks = comm->GetSize();
662  Array<OneD, int> rankNumEdges(m_nRanks);
663  Array<OneD, int> localEdgeSize(1, uniqueEdgeIds.size());
664  comm->AllGather(localEdgeSize, rankNumEdges);
665 
666  Array<OneD, int> rankLocalEdgeDisp(m_nRanks, 0);
667  for (size_t i = 1; i < m_nRanks; ++i)
668  {
669  rankLocalEdgeDisp[i] = rankLocalEdgeDisp[i - 1] + rankNumEdges[i - 1];
670  }
671 
672  Array<OneD, int> localEdgeIdsArray(uniqueEdgeIds.size());
673  for (size_t i = 0; i < uniqueEdgeIds.size(); ++i)
674  {
675  localEdgeIdsArray[i] = uniqueEdgeIds[i];
676  }
677 
678  // Sort localEdgeIdsArray before sending (this is important!)
679  std::sort(localEdgeIdsArray.begin(), localEdgeIdsArray.end());
680 
681  Array<OneD, int> rankLocalEdgeIds(
682  std::accumulate(rankNumEdges.begin(), rankNumEdges.end(), 0), 0);
683 
684  // Send all unique edge IDs to all partitions
685  comm->AllGatherv(localEdgeIdsArray, rankLocalEdgeIds, rankNumEdges,
686  rankLocalEdgeDisp);
687 
688  // Find what edge Ids match with other ranks
689  size_t myRank = comm->GetRank();
690  for (size_t i = 0; i < m_nRanks; ++i)
691  {
692  if (i == myRank)
693  {
694  continue;
695  }
696 
697  for (size_t j = 0; j < rankNumEdges[i]; ++j)
698  {
699  int edgeId = rankLocalEdgeIds[rankLocalEdgeDisp[i] + j];
700  if (std::find(uniqueEdgeIds.begin(), uniqueEdgeIds.end(), edgeId) !=
701  uniqueEdgeIds.end())
702  {
703  m_rankSharedEdges[i].emplace_back(edgeId);
704  }
705  }
706  }
707 }
708 
709 /**
710  * Timing of the exchange method @p f, performing the exchange @p count times
711  * for array of length @p num.
712  *
713  * @param comm Communicator
714  * @param count Number of timing iterations to run
715  * @param num Number of quadrature points to communicate
716  * @param f #ExchangeMethod to time
717  *
718  * @return tuple of loop times {avg, min, max}
719  */
720 std::tuple<NekDouble, NekDouble, NekDouble> AssemblyCommDG::Timing(
721  const LibUtilities::CommSharedPtr &comm, const int &count, const int &num,
722  const ExchangeMethodSharedPtr &f)
723 {
724  Array<OneD, NekDouble> testFwd(num, 1);
725  Array<OneD, NekDouble> testBwd(num, -2);
726 
728  t.Start();
729  for (size_t i = 0; i < count; ++i)
730  {
731  f->PerformExchange(testFwd, testBwd);
732  }
733  t.Stop();
734 
735  // These can just be 'reduce' but need to setup the wrapper in comm.h
736  Array<OneD, NekDouble> minTime(1, t.TimePerTest(count));
737  comm->AllReduce(minTime, LibUtilities::ReduceMin);
738 
739  Array<OneD, NekDouble> maxTime(1, t.TimePerTest(count));
740  comm->AllReduce(maxTime, LibUtilities::ReduceMax);
741 
742  Array<OneD, NekDouble> sumTime(1, t.TimePerTest(count));
743  comm->AllReduce(sumTime, LibUtilities::ReduceSum);
744 
745  NekDouble avgTime = sumTime[0] / comm->GetSize();
746  return std::make_tuple(avgTime, minTime[0], maxTime[0]);
747 }
748 
749 } // namespace MultiRegions
750 } // namespace Nektar
NekDouble TimePerTest(unsigned int n)
Returns amount of seconds per iteration in a test with n iterations.
Definition: Timer.cpp:68
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
LibUtilities::CommSharedPtr m_comm
Communicator.
std::vector< int > m_allEdgeIndex
List of trace map indices of the quad points to exchange.
AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad, const int &nRanks, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
Default constructor.
int m_nRanks
Number of ranks/processes/partitions.
int m_maxQuad
Max number of quadrature points in an element.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
int m_maxCount
Largest shared partition edge.
std::vector< int > m_allVEdgeIndex
List of trace map indices of the quad points to exchange.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
AllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace, const int &nRanks)
Default constructor.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_allVSendCount
List of counts for MPI_alltoallv.
Array< OneD, int > m_allVSendDisp
List of displacements for MPI_alltoallv.
void InitialiseStructure(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr >> &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
Initalises the structure for the MPI communication.
int m_maxQuad
Max number of quadrature points in an element.
std::map< int, std::vector< int > > m_edgeToTrace
Map of edge ID to quad point trace indices.
int m_nRanks
Number of ranks/processes/partitions.
static std::tuple< NekDouble, NekDouble, NekDouble > Timing(const LibUtilities::CommSharedPtr &comm, const int &count, const int &num, const ExchangeMethodSharedPtr &f)
Timing of the MPI exchange method.
AssemblyCommDG(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr >> &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap)
std::map< int, std::vector< int > > m_rankSharedEdges
Map of process to shared edge IDs.
ExchangeMethodSharedPtr m_exchange
Chosen exchange method (either fastest parallel or serial)
static void RealignTraceElement(Array< OneD, int > &toAlign, StdRegions::Orientation orient, int nquad1, int nquad2=0)
Base class for all multi-elemental spectral/hp expansions.
Definition: ExpList.h:103
std::shared_ptr< LibUtilities::Comm > GetComm() const
Returns the comm object.
Definition: ExpList.h:960
const std::shared_ptr< LocalRegions::ExpansionVector > GetExp() const
This function returns the vector of elements in the expansion.
Definition: ExpList.h:2029
std::shared_ptr< LibUtilities::SessionReader > GetSession() const
Returns the session object.
Definition: ExpList.h:955
NeighborAllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
Default constructor.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
std::vector< int > m_edgeTraceIndex
List of trace map indices of the quad points to exchange.
Array< OneD, int > m_sendDisp
List of displacements.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_sendCount
List of counts.
LibUtilities::CommRequestSharedPtr m_sendRequest
List of send requests.
Array< OneD, NekDouble > m_sendBuff
Send buffer for exchange.
Array< OneD, NekDouble > m_recvBuff
Receive buffer for exchange.
LibUtilities::CommRequestSharedPtr m_recvRequest
List of receive requests.
Array< OneD, int > m_edgeTraceIndex
List of trace index locations in recv/send buff.
Pairwise(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:54
std::shared_ptr< Expansion > ExpansionSharedPtr
Definition: Expansion.h:68
std::vector< ExpansionSharedPtr > ExpansionVector
Definition: Expansion.h:70
std::shared_ptr< ExchangeMethod > ExchangeMethodSharedPtr
std::shared_ptr< ExpList > ExpListSharedPtr
Shared pointer to an ExpList object.
std::map< int, std::vector< PeriodicEntity > > PeriodicMap
InputIterator find(InputIterator first, InputIterator last, InputIterator startingpoint, const EqualityComparable &value)
Definition: StdRegions.hpp:444
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
double NekDouble
int id
Geometry ID of entity.
bool isLocal
Flag specifying if this entity is local to this partition.