Nektar++
AssemblyCommDG.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File AssemblyCommDG.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: Parallel communication methods for DG with MPI
32 //
33 ///////////////////////////////////////////////////////////////////////////////
34 
38 
39 #include <utility>
40 
41 namespace Nektar
42 {
43 namespace MultiRegions
44 {
45 
46 AllToAll::AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad,
47  const int &nRanks,
48  const std::map<int, std::vector<int>> &rankSharedEdges,
49  const std::map<int, std::vector<int>> &edgeToTrace)
50  : m_comm(comm), m_maxQuad(maxQuad), m_nRanks(nRanks)
51 {
52  for (size_t i = 0; i < nRanks; ++i)
53  {
54  if (rankSharedEdges.find(i) != rankSharedEdges.end())
55  {
56  m_maxCount = (rankSharedEdges.at(i).size() > m_maxCount)
57  ? static_cast<int>(rankSharedEdges.at(i).size())
58  : m_maxCount;
59  }
60  }
61 
62  comm->AllReduce(m_maxCount, LibUtilities::ReduceMax);
63 
64  // Creates the edge index vector where value -1 indicates
65  // padding of value 0 to be inserted instead of value from Fwd
66  for (size_t i = 0; i < nRanks; ++i)
67  {
68  if (rankSharedEdges.find(i) != rankSharedEdges.end())
69  {
70  for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
71  {
72  std::vector<int> edgeIndex =
73  edgeToTrace.at(rankSharedEdges.at(i)[j]);
74  if (edgeIndex.size() < maxQuad)
75  {
76  std::vector<int> diff(maxQuad - edgeIndex.size(), -1);
77  edgeIndex.insert(edgeIndex.end(), diff.begin(), diff.end());
78  }
79 
80  m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
81  edgeIndex.end());
82  }
83 
84  if (rankSharedEdges.at(i).size() < m_maxCount)
85  {
86  std::vector<int> edgeIndex(
87  maxQuad * (m_maxCount - rankSharedEdges.at(i).size()), -1);
88  m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
89  edgeIndex.end());
90  }
91  }
92  else
93  {
94  std::vector<int> edgeIndex(maxQuad * m_maxCount, -1);
95  m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
96  edgeIndex.end());
97  }
98  }
99 }
100 
102  const std::map<int, std::vector<int>> &rankSharedEdges,
103  const std::map<int, std::vector<int>> &edgeToTrace,
104  const int &nRanks)
105  : m_comm(comm)
106 {
107  m_allVSendCount = Array<OneD, int>(nRanks, 0);
108  for (size_t i = 0; i < nRanks; ++i)
109  {
110  if (rankSharedEdges.find(i) != rankSharedEdges.end())
111  {
112  for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
113  {
114  std::vector<int> edgeIndex =
115  edgeToTrace.at(rankSharedEdges.at(i)[j]);
116  m_allVEdgeIndex.insert(m_allVEdgeIndex.end(), edgeIndex.begin(),
117  edgeIndex.end());
118  m_allVSendCount[i] += edgeIndex.size();
119  }
120  }
121  else
122  {
123  m_allVSendCount[i] = 0;
124  }
125  }
126 
127  m_allVSendDisp = Array<OneD, int>(nRanks, 0);
128  for (size_t i = 1; i < nRanks; ++i)
129  {
130  m_allVSendDisp[i] = m_allVSendDisp[i - 1] + m_allVSendCount[i - 1];
131  }
132 }
133 
135  const LibUtilities::CommSharedPtr &comm,
136  const std::map<int, std::vector<int>> &rankSharedEdges,
137  const std::map<int, std::vector<int>> &edgeToTrace)
138  : m_comm(comm)
139 {
140  int nNeighbours = rankSharedEdges.size();
141  Array<OneD, int> destinations(nNeighbours, 0);
142  Array<OneD, int> weights(nNeighbours, 0);
143  int cnt = 0;
144  for (auto &rankEdgeVec : rankSharedEdges)
145  {
146  destinations[cnt] = rankEdgeVec.first;
147  weights[cnt] = rankEdgeVec.second.size();
148  ++cnt;
149  }
150 
151  comm->DistGraphCreateAdjacent(destinations, weights, 1);
152 
153  // Setting up indices
154  m_sendCount = Array<OneD, int>(nNeighbours, 0);
155  cnt = 0;
156 
157  for (const auto &rankEdgeSet : rankSharedEdges)
158  {
159  for (size_t i : rankEdgeSet.second)
160  {
161  std::vector<int> edgeIndex = edgeToTrace.at(i);
162  m_edgeTraceIndex.insert(m_edgeTraceIndex.end(), edgeIndex.begin(),
163  edgeIndex.end());
164  m_sendCount[cnt] += edgeIndex.size();
165  }
166 
167  ++cnt;
168  }
169 
170  m_sendDisp = Array<OneD, int>(nNeighbours, 0);
171  for (size_t i = 1; i < nNeighbours; ++i)
172  {
173  m_sendDisp[i] = m_sendDisp[i - 1] + m_sendCount[i - 1];
174  }
175 }
176 
178  const std::map<int, std::vector<int>> &rankSharedEdges,
179  const std::map<int, std::vector<int>> &edgeToTrace)
180  : m_comm(comm)
181 {
182  int cnt = 0;
183  int nNeighbours = rankSharedEdges.size();
184  Array<OneD, int> sendCount(nNeighbours, -1);
185 
186  for (const auto &rankEdgeSet : rankSharedEdges)
187  {
188  std::vector<int> edgeTraceIndex;
189  for (size_t i : rankEdgeSet.second)
190  {
191  std::vector<int> edgeIndex = edgeToTrace.at(i);
192  edgeTraceIndex.insert(edgeTraceIndex.end(), edgeIndex.begin(),
193  edgeIndex.end());
194  }
195 
196  m_vecPairPartitionTrace.emplace_back(
197  std::make_pair(rankEdgeSet.first, edgeTraceIndex));
198 
199  sendCount[cnt++] = edgeTraceIndex.size();
200  }
201 
202  m_sendDisp = Array<OneD, int>(nNeighbours, 0);
203 
204  for (size_t i = 1; i < nNeighbours; ++i)
205  {
206  m_sendDisp[i] = m_sendDisp[i - 1] + sendCount[i - 1];
207  }
208 
209  size_t totSends = std::accumulate(sendCount.begin(), sendCount.end(), 0);
210 
211  m_recvBuff = Array<OneD, NekDouble>(totSends, -1);
212  m_sendBuff = Array<OneD, NekDouble>(totSends, -1);
213 
214  m_recvRequest = m_comm->CreateRequest(m_vecPairPartitionTrace.size());
215  m_sendRequest = m_comm->CreateRequest(m_vecPairPartitionTrace.size());
216 
217  // Construct persistent requests
218  for (size_t i = 0; i < m_vecPairPartitionTrace.size(); ++i)
219  {
220  size_t len = m_vecPairPartitionTrace[i].second.size();
221 
222  // Initialise receive requests
223  m_comm->RecvInit(m_vecPairPartitionTrace[i].first,
224  m_recvBuff[m_sendDisp[i]], len, m_recvRequest, i);
225 
226  // Initialise send requests
227  m_comm->SendInit(m_vecPairPartitionTrace[i].first,
228  m_sendBuff[m_sendDisp[i]], len, m_sendRequest, i);
229  }
230 }
231 
233  Array<OneD, NekDouble> &testBwd)
234 {
235  int size = m_maxQuad * m_maxCount * m_nRanks;
236  Array<OneD, NekDouble> sendBuff(size, -1);
237  Array<OneD, NekDouble> recvBuff(size, -1);
238 
239  for (size_t j = 0; j < size; ++j)
240  {
241  if (m_allEdgeIndex[j] == -1)
242  {
243  sendBuff[j] = 0;
244  }
245  else
246  {
247  sendBuff[j] = testFwd[m_allEdgeIndex[j]];
248  }
249  }
250 
251  m_comm->AlltoAll(sendBuff, recvBuff);
252 
253  for (size_t j = 0; j < size; ++j)
254  {
255  if (m_allEdgeIndex[j] != -1)
256  {
257  testBwd[m_allEdgeIndex[j]] = recvBuff[j];
258  }
259  }
260 }
261 
263  Array<OneD, NekDouble> &testBwd)
264 {
265  Array<OneD, NekDouble> sendBuff(m_allVEdgeIndex.size(), -1);
266  Array<OneD, NekDouble> recvBuff(m_allVEdgeIndex.size(), -1);
267 
268  for (size_t i = 0; i < m_allVEdgeIndex.size(); ++i)
269  {
270  sendBuff[i] = testFwd[m_allVEdgeIndex[i]];
271  }
272 
273  m_comm->AlltoAllv(sendBuff, m_allVSendCount, m_allVSendDisp, recvBuff,
275 
276  for (size_t i = 0; i < m_allVEdgeIndex.size(); ++i)
277  {
278  testBwd[m_allVEdgeIndex[i]] = recvBuff[i];
279  }
280 }
281 
283  Array<OneD, NekDouble> &testBwd)
284 {
285  Array<OneD, NekDouble> sendBuff(m_edgeTraceIndex.size(), -1);
286  Array<OneD, NekDouble> recvBuff(m_edgeTraceIndex.size(), -1);
287  for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
288  {
289  sendBuff[i] = testFwd[m_edgeTraceIndex[i]];
290  }
291 
292  m_comm->NeighborAlltoAllv(sendBuff, m_sendCount, m_sendDisp, recvBuff,
294 
295  for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
296  {
297  testBwd[m_edgeTraceIndex[i]] = recvBuff[i];
298  }
299 }
300 
302  Array<OneD, NekDouble> &testBwd)
303 {
304  // Perform receive posts
305  m_comm->StartAll(m_recvRequest);
306 
307  // Fill send buffer from Fwd trace
308  for (size_t i = 0; i < m_vecPairPartitionTrace.size(); ++i)
309  {
310  size_t len = m_vecPairPartitionTrace[i].second.size();
311  for (size_t j = 0; j < len; ++j)
312  {
313  m_sendBuff[m_sendDisp[i] + j] =
314  testFwd[m_vecPairPartitionTrace[i].second[j]];
315  }
316  }
317 
318  // Perform send posts
319  m_comm->StartAll(m_sendRequest);
320 
321  // Wait for all send/recvs to complete
322  m_comm->WaitAll(m_sendRequest);
323  m_comm->WaitAll(m_recvRequest);
324 
325  // Fill Bwd trace from recv buffer
326  for (size_t i = 0; i < m_vecPairPartitionTrace.size(); ++i)
327  {
328  size_t len = m_vecPairPartitionTrace[i].second.size();
329  for (size_t j = 0; j < len; ++j)
330  {
331  testBwd[m_vecPairPartitionTrace[i].second[j]] =
332  m_recvBuff[m_sendDisp[i] + j];
333  }
334  }
335 }
336 
338  const ExpList &locExp, const ExpListSharedPtr &trace,
340  &elmtToTrace,
341  const Array<OneD, const ExpListSharedPtr> &bndCondExp,
343  const PeriodicMap &perMap)
344 {
345  auto comm = locExp.GetSession()->GetComm()->GetRowComm();
346 
347  // If serial then skip initialising graph structure and the MPI timing
348  if (comm->IsSerial())
349  {
350  m_exchange =
352  }
353  else
354  {
355  // Initialise graph structure and link processes across partition
356  // boundaries
357  AssemblyCommDG::InitialiseStructure(locExp, trace, elmtToTrace,
358  bndCondExp, bndCond, perMap, comm);
359 
360  // Timing MPI comm methods, warm up with 10 iterations then time over 50
361  std::vector<ExchangeMethodSharedPtr> MPIFuncs;
362  std::vector<std::string> MPIFuncsNames;
363 
364  // Toggle off AllToAll/AllToAllV methods if cores greater than 16 for
365  // performance reasons unless override solver info parameter is present
366  if (locExp.GetSession()->MatchSolverInfo("OverrideMPI", "ON") ||
367  m_nRanks <= 16)
368  {
369  MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
372  m_edgeToTrace)));
373  MPIFuncsNames.emplace_back("AllToAll");
374 
375  MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
378  MPIFuncsNames.emplace_back("AllToAllV");
379  }
380 
381  MPIFuncs.emplace_back(
384  MPIFuncsNames.emplace_back("PairwiseSendRecv");
385 
386  // Disable neighbor MPI method on unsupported MPI version (below 3.0)
387  if (std::get<0>(comm->GetVersion()) >= 3)
388  {
389  MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
392  MPIFuncsNames.emplace_back("NeighborAllToAllV");
393  }
394 
395  int numPoints = trace->GetNpoints();
396  int warmup = 10, iter = 50;
397  NekDouble min, max;
398  std::vector<NekDouble> avg(MPIFuncs.size(), -1);
399  bool verbose = locExp.GetSession()->DefinesCmdLineArgument("verbose");
400 
401  if (verbose && comm->GetRank() == 0)
402  {
403  std::cout << "MPI setup for trace exchange: " << std::endl;
404  }
405 
406  for (size_t i = 0; i < MPIFuncs.size(); ++i)
407  {
408  Timing(comm, warmup, numPoints, MPIFuncs[i]);
409  std::tie(avg[i], min, max) =
410  Timing(comm, iter, numPoints, MPIFuncs[i]);
411  if (verbose && comm->GetRank() == 0)
412  {
413  std::cout << " " << MPIFuncsNames[i]
414  << " times (avg, min, max): " << avg[i] << " " << min
415  << " " << max << std::endl;
416  }
417  }
418 
419  // Gets the fastest MPI method
420  int fastestMPI = std::distance(
421  avg.begin(), std::min_element(avg.begin(), avg.end()));
422 
423  if (verbose && comm->GetRank() == 0)
424  {
425  std::cout << " Chosen fastest method: "
426  << MPIFuncsNames[fastestMPI] << std::endl;
427  }
428 
429  m_exchange = MPIFuncs[fastestMPI];
430  }
431 }
432 
433 /**
434  * This function sets up the initial structure to allow for the exchange methods
435  * to be created. This structure is contained within the member variable
436  * #m_rankSharedEdges which is a map of rank to vector of the shared edges with
437  * that rank. This is filled by:
438  *
439  * - Create an edge to trace mapping, and realign periodic edges within this
440  * mapping so that they have the same data layout for ranks sharing periodic
441  * boundaries.
442  * - Create a list of all local edge IDs and calculate the maximum number of
443  * quadrature points used locally, then perform an AllReduce to find the
444  * maximum number of quadrature points across all ranks (for the AllToAll
445  * method).
446  * - Create a list of all boundary edge IDs except for those which are periodic
447  * - Using the boundary ID list, and all local ID list we can construct a unique
448  * list of IDs which are on a partition boundary (e.g. if doesn't occur in the
449  * local list twice, and doesn't occur in the boundary list it is on a
450  * partition boundary). We also check, if it is a periodic edge, whether the
451  * other side is local, if not we add the minimum of the two periodic IDs to
452  * the unique list as we must have a consistent numbering scheme across ranks.
453  * - We send the unique list to all other ranks/partitions. Each ranks unique
454  * list is then compared with the local unique edge ID list, if a match is
455  * found then the member variable #m_rankSharedEdges is filled with the
456  * matching rank and unique edge ID.
457  */
459  const ExpList &locExp, const ExpListSharedPtr &trace,
461  &elmtToTrace,
462  const Array<OneD, const ExpListSharedPtr> &bndCondExp,
464  const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
465 {
466  Array<OneD, int> tmp;
467  int quad = 0, nDim = 0, eid = 0, offset = 0;
468  const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp());
469 
470  // Assume that each element of the expansion is of the same
471  // dimension.
472  nDim = locExpVector[0]->GetShapeDimension();
473 
474  // This sets up the edge to trace mapping and realigns periodic edges
475  if (nDim == 1)
476  {
477  for (size_t i = 0; i < trace->GetExpSize(); ++i)
478  {
479  eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
480  offset = trace->GetPhys_Offset(i);
481 
482  // Check to see if this vert is periodic. If it is, then we
483  // need use the unique eid of the two points
484  auto it = perMap.find(eid);
485  if (perMap.count(eid) > 0)
486  {
487  PeriodicEntity ent = it->second[0];
488  if (!ent.isLocal) // Not sure if true in 1D
489  {
490  eid = std::min(eid, ent.id);
491  }
492  }
493 
494  m_edgeToTrace[eid].emplace_back(offset);
495  }
496  }
497  else
498  {
499  for (size_t i = 0; i < trace->GetExpSize(); ++i)
500  {
501  eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
502  offset = trace->GetPhys_Offset(i);
503  quad = trace->GetExp(i)->GetTotPoints();
504 
505  // Check to see if this edge is periodic. If it is, then we
506  // need to reverse the trace order of one edge only in the
507  // edge to trace map so that the data are reversed w.r.t each
508  // other. We do this by using the minimum of the two IDs.
509  auto it = perMap.find(eid);
510  bool realign = false;
511  if (perMap.count(eid) > 0)
512  {
513  PeriodicEntity ent = it->second[0];
514  if (!ent.isLocal)
515  {
516  realign = eid == std::min(eid, ent.id);
517  eid = std::min(eid, ent.id);
518  }
519  }
520 
521  for (size_t j = 0; j < quad; ++j)
522  {
523  m_edgeToTrace[eid].emplace_back(offset + j);
524  }
525 
526  if (realign)
527  {
528  // Realign some periodic edges in m_edgeToTrace
529  Array<OneD, int> tmpArray(m_edgeToTrace[eid].size());
530  for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
531  {
532  tmpArray[j] = m_edgeToTrace[eid][j];
533  }
534 
535  StdRegions::Orientation orient = it->second[0].orient;
536 
537  if (nDim == 2)
538  {
539  AssemblyMapDG::RealignTraceElement(tmpArray, orient, quad);
540  }
541  else
542  {
543  // Orient is going from face 2 -> face 1 but we want face 1
544  // -> face 2; in all cases except below these are
545  // equivalent. However below is not equivalent so we use the
546  // reverse of the mapping.
548  {
550  }
551  else if (orient == StdRegions::eDir1BwdDir2_Dir2FwdDir1)
552  {
554  }
555 
557  tmpArray, orient, trace->GetExp(i)->GetNumPoints(0),
558  trace->GetExp(i)->GetNumPoints(1));
559  }
560 
561  for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
562  {
563  m_edgeToTrace[eid][j] = tmpArray[j];
564  }
565  }
566  }
567  }
568 
569  // This creates a list of all geometry of problem dimension - 1
570  // and populates the maxQuad member variable
571  std::vector<int> localEdgeIds;
572  for (eid = 0; eid < locExpVector.size(); ++eid)
573  {
574  LocalRegions::ExpansionSharedPtr locExpansion = locExpVector[eid];
575 
576  for (size_t j = 0; j < locExpansion->GetNtraces(); ++j)
577  {
578  int id = elmtToTrace[eid][j]->GetGeom()->GetGlobalID();
579  localEdgeIds.emplace_back(id);
580  }
581 
582  quad = locExpansion->GetTotPoints();
583  if (quad > m_maxQuad)
584  {
585  m_maxQuad = quad;
586  }
587  }
588 
589  // Find max quadrature points across all processes
590  comm->AllReduce(m_maxQuad, LibUtilities::ReduceMax);
591 
592  // Create list of boundary edge IDs
593  std::set<int> bndIdList;
594  for (size_t i = 0; i < bndCond.size(); ++i)
595  {
596  for (size_t j = 0; j < bndCondExp[i]->GetExpSize(); ++j)
597  {
598  eid = bndCondExp[i]->GetExp(j)->GetGeom()->GetGlobalID();
599  if (perMap.find(eid) ==
600  perMap.end()) // Don't add if periodic boundary
601  {
602  bndIdList.insert(eid);
603  }
604  }
605  }
606 
607  // Get unique edges to send
608  std::vector<int> uniqueEdgeIds;
609  std::vector<bool> duplicated(localEdgeIds.size(), false);
610  for (size_t i = 0; i < localEdgeIds.size(); ++i)
611  {
612  eid = localEdgeIds[i];
613  for (size_t j = i + 1; j < localEdgeIds.size(); ++j)
614  {
615  if (eid == localEdgeIds[j])
616  {
617  duplicated[i] = duplicated[j] = true;
618  }
619  }
620 
621  if (!duplicated[i]) // Not duplicated in local partition
622  {
623  if (bndIdList.find(eid) == bndIdList.end()) // Not a boundary edge
624  {
625  // Check if periodic and if not local set eid to other side
626  auto it = perMap.find(eid);
627  if (it != perMap.end())
628  {
629  if (!it->second[0].isLocal)
630  {
631  uniqueEdgeIds.emplace_back(
632  std::min(eid, it->second[0].id));
633  }
634  }
635  else
636  {
637  uniqueEdgeIds.emplace_back(eid);
638  }
639  }
640  }
641  }
642 
643  // Send uniqueEdgeIds size so all partitions can prepare buffers
644  m_nRanks = comm->GetSize();
645  Array<OneD, int> rankNumEdges(m_nRanks);
646  Array<OneD, int> localEdgeSize(1, uniqueEdgeIds.size());
647  comm->AllGather(localEdgeSize, rankNumEdges);
648 
649  Array<OneD, int> rankLocalEdgeDisp(m_nRanks, 0);
650  for (size_t i = 1; i < m_nRanks; ++i)
651  {
652  rankLocalEdgeDisp[i] = rankLocalEdgeDisp[i - 1] + rankNumEdges[i - 1];
653  }
654 
655  Array<OneD, int> localEdgeIdsArray(uniqueEdgeIds.size());
656  for (size_t i = 0; i < uniqueEdgeIds.size(); ++i)
657  {
658  localEdgeIdsArray[i] = uniqueEdgeIds[i];
659  }
660 
661  // Sort localEdgeIdsArray before sending (this is important!)
662  std::sort(localEdgeIdsArray.begin(), localEdgeIdsArray.end());
663 
664  Array<OneD, int> rankLocalEdgeIds(
665  std::accumulate(rankNumEdges.begin(), rankNumEdges.end(), 0), 0);
666 
667  // Send all unique edge IDs to all partitions
668  comm->AllGatherv(localEdgeIdsArray, rankLocalEdgeIds, rankNumEdges,
669  rankLocalEdgeDisp);
670 
671  // Find what edge Ids match with other ranks
672  size_t myRank = comm->GetRank();
673  Array<OneD, int> perTraceSend(m_nRanks, 0);
674  for (size_t i = 0; i < m_nRanks; ++i)
675  {
676  if (i == myRank)
677  {
678  continue;
679  }
680 
681  for (size_t j = 0; j < rankNumEdges[i]; ++j)
682  {
683  int edgeId = rankLocalEdgeIds[rankLocalEdgeDisp[i] + j];
684  if (std::find(uniqueEdgeIds.begin(), uniqueEdgeIds.end(), edgeId) !=
685  uniqueEdgeIds.end())
686  {
687  m_rankSharedEdges[i].emplace_back(edgeId);
688  }
689  }
690  }
691 }
692 
693 /**
694  * Timing of the exchange method @p f, performing the exchange @p count times
695  * for array of length @p num.
696  *
697  * @param comm Communicator
698  * @param count Number of timing iterations to run
699  * @param num Number of quadrature points to communicate
700  * @param f #ExchangeMethod to time
701  *
702  * @return tuple of loop times {avg, min, max}
703  */
704 std::tuple<NekDouble, NekDouble, NekDouble> AssemblyCommDG::Timing(
705  const LibUtilities::CommSharedPtr &comm, const int &count, const int &num,
706  const ExchangeMethodSharedPtr& f)
707 {
708  Array<OneD, NekDouble> testFwd(num, 1);
709  Array<OneD, NekDouble> testBwd(num, -2);
710 
712  t.Start();
713  for (size_t i = 0; i < count; ++i)
714  {
715  f->PerformExchange(testFwd, testBwd);
716  }
717  t.Stop();
718 
719  // These can just be 'reduce' but need to setup the wrapper in comm.h
720  Array<OneD, NekDouble> minTime(1, t.TimePerTest(count));
721  comm->AllReduce(minTime, LibUtilities::ReduceMin);
722 
723  Array<OneD, NekDouble> maxTime(1, t.TimePerTest(count));
724  comm->AllReduce(maxTime, LibUtilities::ReduceMax);
725 
726  Array<OneD, NekDouble> sumTime(1, t.TimePerTest(count));
727  comm->AllReduce(sumTime, LibUtilities::ReduceSum);
728 
729  NekDouble avgTime = sumTime[0] / comm->GetSize();
730  return std::make_tuple(avgTime, minTime[0], maxTime[0]);
731 }
732 
733 } // namespace MultiRegions
734 } // namespace Nektar
NekDouble TimePerTest(unsigned int n)
Returns amount of seconds per iteration in a test with n iterations.
Definition: Timer.cpp:62
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
LibUtilities::CommSharedPtr m_comm
Communicator.
std::vector< int > m_allEdgeIndex
List of trace map indices of the quad points to exchange.
AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad, const int &nRanks, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
Default constructor.
int m_nRanks
Number of ranks/processes/partitions.
int m_maxQuad
Max number of quadrature points in an element.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
int m_maxCount
Largest shared partition edge.
std::vector< int > m_allVEdgeIndex
List of trace map indices of the quad points to exchange.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
AllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace, const int &nRanks)
Default constructor.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_allVSendCount
List of counts for MPI_alltoallv.
Array< OneD, int > m_allVSendDisp
List of displacements for MPI_alltoallv.
void InitialiseStructure(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr >> &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
Initalises the structure for the MPI communication.
int m_maxQuad
Max number of quadrature points in an element.
std::map< int, std::vector< int > > m_edgeToTrace
Map of edge ID to quad point trace indices.
int m_nRanks
Number of ranks/processes/partitions.
static std::tuple< NekDouble, NekDouble, NekDouble > Timing(const LibUtilities::CommSharedPtr &comm, const int &count, const int &num, const ExchangeMethodSharedPtr &f)
Timing of the MPI exchange method.
AssemblyCommDG(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr >> &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap)
std::map< int, std::vector< int > > m_rankSharedEdges
Map of process to shared edge IDs.
ExchangeMethodSharedPtr m_exchange
Chosen exchange method (either fastest parallel or serial)
static void RealignTraceElement(Array< OneD, int > &toAlign, StdRegions::Orientation orient, int nquad1, int nquad2=0)
Base class for all multi-elemental spectral/hp expansions.
Definition: ExpList.h:107
const std::shared_ptr< LocalRegions::ExpansionVector > GetExp() const
This function returns the vector of elements in the expansion.
Definition: ExpList.h:2422
std::shared_ptr< LibUtilities::SessionReader > GetSession() const
Returns the session object.
Definition: ExpList.h:1135
NeighborAllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
Default constructor.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
std::vector< int > m_edgeTraceIndex
List of trace map indices of the quad points to exchange.
Array< OneD, int > m_sendDisp
List of displacements.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_sendCount
List of counts.
LibUtilities::CommRequestSharedPtr m_sendRequest
List of send requests.
Array< OneD, NekDouble > m_sendBuff
Send buffer for exchange.
Array< OneD, NekDouble > m_recvBuff
Receive buffer for exchange.
LibUtilities::CommRequestSharedPtr m_recvRequest
List of receive requests.
Array< OneD, int > m_sendDisp
List of displacements.
std::vector< std::pair< int, std::vector< int > > > m_vecPairPartitionTrace
List of partition to trace map indices of the quad points to exchange.
Pairwise(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:54
std::shared_ptr< Expansion > ExpansionSharedPtr
Definition: Expansion.h:68
std::vector< ExpansionSharedPtr > ExpansionVector
Definition: Expansion.h:70
std::shared_ptr< ExchangeMethod > ExchangeMethodSharedPtr
std::shared_ptr< ExpList > ExpListSharedPtr
Shared pointer to an ExpList object.
std::map< int, std::vector< PeriodicEntity > > PeriodicMap
InputIterator find(InputIterator first, InputIterator last, InputIterator startingpoint, const EqualityComparable &value)
Definition: StdRegions.hpp:362
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:1
double NekDouble
int id
Geometry ID of entity.
bool isLocal
Flag specifying if this entity is local to this partition.