Nektar++
AssemblyCommDG.cpp
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////////////
2//
3// File: AssemblyCommDG.cpp
4//
5// For more information, please see: http://www.nektar.info/
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description: Parallel communication methods for DG with MPI
32//
33////////////////////////////////////////////////////////////////////////////////
34
38
39#include <utility>
40
41namespace Nektar
42{
43namespace MultiRegions
44{
45
46AllToAll::AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad,
47 const int &nRanks,
48 const std::map<int, std::vector<int>> &rankSharedEdges,
49 const std::map<int, std::vector<int>> &edgeToTrace)
50 : m_comm(comm), m_maxQuad(maxQuad), m_nRanks(nRanks)
51{
52 for (size_t i = 0; i < nRanks; ++i)
53 {
54 if (rankSharedEdges.find(i) != rankSharedEdges.end())
55 {
56 m_maxCount = (rankSharedEdges.at(i).size() > m_maxCount)
57 ? static_cast<int>(rankSharedEdges.at(i).size())
58 : m_maxCount;
59 }
60 }
61
62 comm->AllReduce(m_maxCount, LibUtilities::ReduceMax);
63
64 // Creates the edge index vector where value -1 indicates
65 // padding of value 0 to be inserted instead of value from Fwd
66 for (size_t i = 0; i < nRanks; ++i)
67 {
68 if (rankSharedEdges.find(i) != rankSharedEdges.end())
69 {
70 for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
71 {
72 std::vector<int> edgeIndex =
73 edgeToTrace.at(rankSharedEdges.at(i)[j]);
74 if (edgeIndex.size() < maxQuad)
75 {
76 std::vector<int> diff(maxQuad - edgeIndex.size(), -1);
77 edgeIndex.insert(edgeIndex.end(), diff.begin(), diff.end());
78 }
79
80 m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
81 edgeIndex.end());
82 }
83
84 if (rankSharedEdges.at(i).size() < m_maxCount)
85 {
86 std::vector<int> edgeIndex(
87 maxQuad * (m_maxCount - rankSharedEdges.at(i).size()), -1);
88 m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
89 edgeIndex.end());
90 }
91 }
92 else
93 {
94 std::vector<int> edgeIndex(maxQuad * m_maxCount, -1);
95 m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
96 edgeIndex.end());
97 }
98 }
99}
100
102 const std::map<int, std::vector<int>> &rankSharedEdges,
103 const std::map<int, std::vector<int>> &edgeToTrace,
104 const int &nRanks)
105 : m_comm(comm)
106{
108 for (size_t i = 0; i < nRanks; ++i)
109 {
110 if (rankSharedEdges.find(i) != rankSharedEdges.end())
111 {
112 for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
113 {
114 std::vector<int> edgeIndex =
115 edgeToTrace.at(rankSharedEdges.at(i)[j]);
116 m_allVEdgeIndex.insert(m_allVEdgeIndex.end(), edgeIndex.begin(),
117 edgeIndex.end());
118 m_allVSendCount[i] += edgeIndex.size();
119 }
120 }
121 else
122 {
123 m_allVSendCount[i] = 0;
124 }
125 }
126
127 m_allVSendDisp = Array<OneD, int>(nRanks, 0);
128 for (size_t i = 1; i < nRanks; ++i)
129 {
130 m_allVSendDisp[i] = m_allVSendDisp[i - 1] + m_allVSendCount[i - 1];
131 }
132}
133
135 const LibUtilities::CommSharedPtr &comm,
136 const std::map<int, std::vector<int>> &rankSharedEdges,
137 const std::map<int, std::vector<int>> &edgeToTrace)
138 : m_comm(comm)
139{
140 int nNeighbours = rankSharedEdges.size();
141 Array<OneD, int> destinations(nNeighbours, 0);
142 Array<OneD, int> weights(nNeighbours, 0);
143 int cnt = 0;
144 for (auto &rankEdgeVec : rankSharedEdges)
145 {
146 destinations[cnt] = rankEdgeVec.first;
147 weights[cnt] = rankEdgeVec.second.size();
148 ++cnt;
149 }
150
151 comm->DistGraphCreateAdjacent(destinations, weights, 1);
152
153 // Setting up indices
154 m_sendCount = Array<OneD, int>(nNeighbours, 0);
155 cnt = 0;
156
157 for (const auto &rankEdgeSet : rankSharedEdges)
158 {
159 for (size_t i : rankEdgeSet.second)
160 {
161 std::vector<int> edgeIndex = edgeToTrace.at(i);
162 m_edgeTraceIndex.insert(m_edgeTraceIndex.end(), edgeIndex.begin(),
163 edgeIndex.end());
164 m_sendCount[cnt] += edgeIndex.size();
165 }
166
167 ++cnt;
168 }
169
170 m_sendDisp = Array<OneD, int>(nNeighbours, 0);
171 for (size_t i = 1; i < nNeighbours; ++i)
172 {
173 m_sendDisp[i] = m_sendDisp[i - 1] + m_sendCount[i - 1];
174 }
175}
176
178 const std::map<int, std::vector<int>> &rankSharedEdges,
179 const std::map<int, std::vector<int>> &edgeToTrace)
180 : m_comm(comm)
181{
182 int cnt = 0;
183 int nNeighbours = rankSharedEdges.size();
184 Array<OneD, int> sendCount(nNeighbours, -1);
185
186 // List of partition to trace map indices of the quad points to exchange
187 std::vector<std::pair<int, std::vector<int>>> vecPairPartitionTrace;
188 for (const auto &rankEdgeSet : rankSharedEdges)
189 {
190 std::vector<int> edgeTraceIndex;
191 for (size_t i : rankEdgeSet.second)
192 {
193 std::vector<int> edgeIndex = edgeToTrace.at(i);
194 edgeTraceIndex.insert(edgeTraceIndex.end(), edgeIndex.begin(),
195 edgeIndex.end());
196 }
197
198 vecPairPartitionTrace.emplace_back(
199 std::make_pair(rankEdgeSet.first, edgeTraceIndex));
200
201 sendCount[cnt++] = edgeTraceIndex.size();
202 }
203
204 Array<OneD, int> sendDisp(nNeighbours, 0);
205 for (size_t i = 1; i < nNeighbours; ++i)
206 {
207 sendDisp[i] = sendDisp[i - 1] + sendCount[i - 1];
208 }
209
210 size_t totSends = std::accumulate(sendCount.begin(), sendCount.end(), 0);
211
212 m_recvBuff = Array<OneD, NekDouble>(totSends, -1);
213 m_sendBuff = Array<OneD, NekDouble>(totSends, -1);
214 m_edgeTraceIndex = Array<OneD, int>(totSends, -1);
215
216 // Set up m_edgeTraceIndex to reduce complexity when performing exchange
217 cnt = 0;
218 for (auto &i : vecPairPartitionTrace)
219 {
220 for (auto j : i.second)
221 {
222 m_edgeTraceIndex[cnt++] = j;
223 }
224 }
225
226 m_recvRequest = m_comm->CreateRequest(vecPairPartitionTrace.size());
227 m_sendRequest = m_comm->CreateRequest(vecPairPartitionTrace.size());
228
229 // Construct persistent requests
230 for (size_t i = 0; i < vecPairPartitionTrace.size(); ++i)
231 {
232 size_t len = vecPairPartitionTrace[i].second.size();
233
234 // Initialise receive requests
235 m_comm->RecvInit(vecPairPartitionTrace[i].first,
236 m_recvBuff[sendDisp[i]], len, m_recvRequest, i);
237
238 // Initialise send requests
239 m_comm->SendInit(vecPairPartitionTrace[i].first,
240 m_sendBuff[sendDisp[i]], len, m_sendRequest, i);
241 }
242}
243
245 Array<OneD, NekDouble> &testBwd)
246{
247 int size = m_maxQuad * m_maxCount * m_nRanks;
248 Array<OneD, NekDouble> sendBuff(size, -1);
249 Array<OneD, NekDouble> recvBuff(size, -1);
250
251 for (size_t j = 0; j < size; ++j)
252 {
253 if (m_allEdgeIndex[j] == -1)
254 {
255 sendBuff[j] = 0;
256 }
257 else
258 {
259 sendBuff[j] = testFwd[m_allEdgeIndex[j]];
260 }
261 }
262
263 m_comm->AlltoAll(sendBuff, recvBuff);
264
265 for (size_t j = 0; j < size; ++j)
266 {
267 if (m_allEdgeIndex[j] != -1)
268 {
269 testBwd[m_allEdgeIndex[j]] = recvBuff[j];
270 }
271 }
272}
273
275 Array<OneD, NekDouble> &testBwd)
276{
277 Array<OneD, NekDouble> sendBuff(m_allVEdgeIndex.size(), -1);
278 Array<OneD, NekDouble> recvBuff(m_allVEdgeIndex.size(), -1);
279
280 for (size_t i = 0; i < m_allVEdgeIndex.size(); ++i)
281 {
282 sendBuff[i] = testFwd[m_allVEdgeIndex[i]];
283 }
284
285 m_comm->AlltoAllv(sendBuff, m_allVSendCount, m_allVSendDisp, recvBuff,
287
288 for (size_t i = 0; i < m_allVEdgeIndex.size(); ++i)
289 {
290 testBwd[m_allVEdgeIndex[i]] = recvBuff[i];
291 }
292}
293
295 Array<OneD, NekDouble> &testBwd)
296{
297 Array<OneD, NekDouble> sendBuff(m_edgeTraceIndex.size(), -1);
298 Array<OneD, NekDouble> recvBuff(m_edgeTraceIndex.size(), -1);
299 for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
300 {
301 sendBuff[i] = testFwd[m_edgeTraceIndex[i]];
302 }
303
304 m_comm->NeighborAlltoAllv(sendBuff, m_sendCount, m_sendDisp, recvBuff,
306
307 for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
308 {
309 testBwd[m_edgeTraceIndex[i]] = recvBuff[i];
310 }
311}
312
314 Array<OneD, NekDouble> &testBwd)
315{
316 // Perform receive posts
317 m_comm->StartAll(m_recvRequest);
318
319 // Fill send buffer from Fwd trace
320 for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
321 {
322 m_sendBuff[i] = testFwd[m_edgeTraceIndex[i]];
323 }
324
325 // Perform send posts
326 m_comm->StartAll(m_sendRequest);
327
328 // Wait for all send/recvs to complete
329 m_comm->WaitAll(m_sendRequest);
330 m_comm->WaitAll(m_recvRequest);
331
332 // Fill Bwd trace from recv buffer
333 for (size_t i = 0; i < m_edgeTraceIndex.size(); ++i)
334 {
335 testBwd[m_edgeTraceIndex[i]] = m_recvBuff[i];
336 }
337}
338
340 const ExpList &locExp, const ExpListSharedPtr &trace,
342 &elmtToTrace,
343 const Array<OneD, const ExpListSharedPtr> &bndCondExp,
345 const PeriodicMap &perMap)
346{
347 auto comm = locExp.GetComm()->GetRowComm();
348
349 // If serial then skip initialising graph structure and the MPI timing
350 if (comm->IsSerial())
351 {
352 m_exchange =
354 }
355 else
356 {
357 // Initialise graph structure and link processes across partition
358 // boundaries
359 AssemblyCommDG::InitialiseStructure(locExp, trace, elmtToTrace,
360 bndCondExp, bndCond, perMap, comm);
361
362 // Timing MPI comm methods, warm up with 10 iterations then time over 50
363 std::vector<ExchangeMethodSharedPtr> MPIFuncs;
364 std::vector<std::string> MPIFuncsNames;
365
366 // Toggle off AllToAll/AllToAllV methods if cores greater than 16 for
367 // performance reasons unless override solver info parameter is present
368 if (locExp.GetSession()->MatchSolverInfo("OverrideMPI", "ON") ||
369 m_nRanks <= 16)
370 {
371 MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
374 m_edgeToTrace)));
375 MPIFuncsNames.emplace_back("AllToAll");
376
377 MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
380 MPIFuncsNames.emplace_back("AllToAllV");
381 }
382
383 MPIFuncs.emplace_back(
386 MPIFuncsNames.emplace_back("PairwiseSendRecv");
387
388 // Disable neighbor MPI method on unsupported MPI version (below 3.0)
389 if (std::get<0>(comm->GetVersion()) >= 3)
390 {
391 MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
394 MPIFuncsNames.emplace_back("NeighborAllToAllV");
395 }
396
397 int numPoints = trace->GetNpoints();
398 int warmup = 10, iter = 50;
399 NekDouble min, max;
400 std::vector<NekDouble> avg(MPIFuncs.size(), -1);
401 bool verbose = locExp.GetSession()->DefinesCmdLineArgument("verbose");
402
403 if (verbose && comm->TreatAsRankZero())
404 {
405 std::cout << "MPI setup for trace exchange: " << std::endl;
406 }
407
408 // Padding for output
409 int maxStrLen = 0;
410 for (size_t i = 0; i < MPIFuncs.size(); ++i)
411 {
412 maxStrLen = MPIFuncsNames[i].size() > maxStrLen
413 ? MPIFuncsNames[i].size()
414 : maxStrLen;
415 }
416
417 for (size_t i = 0; i < MPIFuncs.size(); ++i)
418 {
419 Timing(comm, warmup, numPoints, MPIFuncs[i]);
420 std::tie(avg[i], min, max) =
421 Timing(comm, iter, numPoints, MPIFuncs[i]);
422 if (verbose && comm->TreatAsRankZero())
423 {
424 std::cout << " " << MPIFuncsNames[i]
425 << " times (avg, min, max)"
426 << std::string(maxStrLen - MPIFuncsNames[i].size(),
427 ' ')
428 << ": " << avg[i] << " " << min << " " << max
429 << std::endl;
430 }
431 }
432
433 // Gets the fastest MPI method
434 int fastestMPI = std::distance(
435 avg.begin(), std::min_element(avg.begin(), avg.end()));
436
437 if (verbose && comm->TreatAsRankZero())
438 {
439 std::cout << " Chosen fastest method: "
440 << MPIFuncsNames[fastestMPI] << std::endl;
441 }
442
443 m_exchange = MPIFuncs[fastestMPI];
444 }
445}
446
447/**
448 * This function sets up the initial structure to allow for the exchange methods
449 * to be created. This structure is contained within the member variable
450 * #m_rankSharedEdges which is a map of rank to vector of the shared edges with
451 * that rank. This is filled by:
452 *
453 * - Create an edge to trace mapping, and realign periodic edges within this
454 * mapping so that they have the same data layout for ranks sharing periodic
455 * boundaries. - Create a list of all local edge IDs and calculate the maximum
456 * number of quadrature points used locally, then perform an AllReduce to find
457 * the maximum number of quadrature points across all ranks (for the AllToAll
458 * method). - Create a list of all boundary edge IDs except for those which are
459 * periodic - Using the boundary ID list, and all local ID list we can construct
460 * a unique list of IDs which are on a partition boundary (e.g. if doesn't occur
461 * in the local list twice, and doesn't occur in the boundary list it is on a
462 * partition boundary). We also check, if it is a periodic edge, whether the
463 * other side is local, if not we add the minimum of thetwo periodic IDs to the
464 * unique list as we must have a consistent numbering scheme across ranks. - We
465 * send the unique list to all other ranks/partitions. Each ranks unique list is
466 * then compared with the local unique edge ID list, if a match is found then
467 * the member variable #m_rankSharedEdges is filled with the matching rank and
468 * unique edge ID.
469 */
471 const ExpList &locExp, const ExpListSharedPtr &trace,
473 &elmtToTrace,
474 const Array<OneD, const ExpListSharedPtr> &bndCondExp,
476 const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
477{
479 int quad = 0, nDim = 0, eid = 0, offset = 0;
480 const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp());
481
482 // Assume that each element of the expansion is of the same
483 // dimension.
484 nDim = locExpVector[0]->GetShapeDimension();
485
486 // This sets up the edge to trace mapping and realigns periodic edges
487 if (nDim == 1)
488 {
489 for (size_t i = 0; i < trace->GetExpSize(); ++i)
490 {
491 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
492 offset = trace->GetPhys_Offset(i);
493
494 // Check to see if this vert is periodic. If it is, then we
495 // need use the unique eid of the two points
496 auto it = perMap.find(eid);
497 if (perMap.count(eid) > 0)
498 {
499 PeriodicEntity ent = it->second[0];
500 if (!ent.isLocal) // Not sure if true in 1D
501 {
502 eid = std::min(eid, ent.id);
503 }
504 }
505
506 m_edgeToTrace[eid].emplace_back(offset);
507 }
508 }
509 else
510 {
511 for (size_t i = 0; i < trace->GetExpSize(); ++i)
512 {
513 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
514 offset = trace->GetPhys_Offset(i);
515 quad = trace->GetExp(i)->GetTotPoints();
516
517 // Check to see if this edge is periodic. If it is, then we
518 // need to reverse the trace order of one edge only in the
519 // edge to trace map so that the data are reversed w.r.t each
520 // other. We do this by using the minimum of the two IDs.
521 auto it = perMap.find(eid);
522 bool realign = false;
523 if (perMap.count(eid) > 0)
524 {
525 PeriodicEntity ent = it->second[0];
526 if (!ent.isLocal)
527 {
528 realign = eid == std::min(eid, ent.id);
529 eid = std::min(eid, ent.id);
530 }
531 }
532
533 for (size_t j = 0; j < quad; ++j)
534 {
535 m_edgeToTrace[eid].emplace_back(offset + j);
536 }
537
538 if (realign)
539 {
540 // Realign some periodic edges in m_edgeToTrace
541 Array<OneD, int> tmpArray(m_edgeToTrace[eid].size());
542 for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
543 {
544 tmpArray[j] = m_edgeToTrace[eid][j];
545 }
546
547 StdRegions::Orientation orient = it->second[0].orient;
548
549 if (nDim == 2)
550 {
551 AssemblyMapDG::RealignTraceElement(tmpArray, orient, quad);
552 }
553 else
554 {
555 // Orient is going from face 2 -> face 1 but we want face 1
556 // -> face 2; in all cases except below these are
557 // equivalent. However below is not equivalent so we use the
558 // reverse of the mapping.
560 {
562 }
563 else if (orient == StdRegions::eDir1BwdDir2_Dir2FwdDir1)
564 {
566 }
567
569 tmpArray, orient, trace->GetExp(i)->GetNumPoints(0),
570 trace->GetExp(i)->GetNumPoints(1));
571 }
572
573 for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
574 {
575 m_edgeToTrace[eid][j] = tmpArray[j];
576 }
577 }
578 }
579 }
580
581 // This creates a list of all geometry of problem dimension - 1
582 // and populates the maxQuad member variable for AllToAll
583 std::vector<int> localEdgeIds;
584 for (eid = 0; eid < locExpVector.size(); ++eid)
585 {
586 LocalRegions::ExpansionSharedPtr locExpansion = locExpVector[eid];
587
588 for (size_t j = 0; j < locExpansion->GetNtraces(); ++j)
589 {
590 int id = elmtToTrace[eid][j]->GetGeom()->GetGlobalID();
591 localEdgeIds.emplace_back(id);
592 }
593
594 quad = locExpansion->GetTotPoints();
595 if (quad > m_maxQuad)
596 {
597 m_maxQuad = quad;
598 }
599 }
600
601 // Find max quadrature points across all processes for AllToAll method
602 comm->AllReduce(m_maxQuad, LibUtilities::ReduceMax);
603
604 // Create list of boundary edge IDs
605 std::set<int> bndIdList;
606 for (size_t i = 0; i < bndCond.size(); ++i)
607 {
608 // Don't add if periodic boundary type
609 if ((bndCond[i]->GetBoundaryConditionType() ==
611 {
612 continue;
613 }
614 else
615 {
616 for (size_t j = 0; j < bndCondExp[i]->GetExpSize(); ++j)
617 {
618 eid = bndCondExp[i]->GetExp(j)->GetGeom()->GetGlobalID();
619 bndIdList.insert(eid);
620 }
621 }
622 }
623
624 // Get unique edges to send
625 std::vector<int> uniqueEdgeIds;
626 std::vector<bool> duplicated(localEdgeIds.size(), false);
627 for (size_t i = 0; i < localEdgeIds.size(); ++i)
628 {
629 eid = localEdgeIds[i];
630 for (size_t j = i + 1; j < localEdgeIds.size(); ++j)
631 {
632 if (eid == localEdgeIds[j])
633 {
634 duplicated[i] = duplicated[j] = true;
635 }
636 }
637
638 if (!duplicated[i]) // Not duplicated in local partition
639 {
640 if (bndIdList.find(eid) == bndIdList.end()) // Not a boundary edge
641 {
642 // Check if periodic and if not local set eid to other side
643 auto it = perMap.find(eid);
644 if (it != perMap.end())
645 {
646 if (!it->second[0].isLocal)
647 {
648 uniqueEdgeIds.emplace_back(
649 std::min(eid, it->second[0].id));
650 }
651 }
652 else
653 {
654 uniqueEdgeIds.emplace_back(eid);
655 }
656 }
657 }
658 }
659
660 // Send uniqueEdgeIds size so all partitions can prepare buffers
661 m_nRanks = comm->GetSize();
662 Array<OneD, int> rankNumEdges(m_nRanks);
663 Array<OneD, int> localEdgeSize(1, uniqueEdgeIds.size());
664 comm->AllGather(localEdgeSize, rankNumEdges);
665
666 Array<OneD, int> rankLocalEdgeDisp(m_nRanks, 0);
667 for (size_t i = 1; i < m_nRanks; ++i)
668 {
669 rankLocalEdgeDisp[i] = rankLocalEdgeDisp[i - 1] + rankNumEdges[i - 1];
670 }
671
672 Array<OneD, int> localEdgeIdsArray(uniqueEdgeIds.size());
673 for (size_t i = 0; i < uniqueEdgeIds.size(); ++i)
674 {
675 localEdgeIdsArray[i] = uniqueEdgeIds[i];
676 }
677
678 // Sort localEdgeIdsArray before sending (this is important!)
679 std::sort(localEdgeIdsArray.begin(), localEdgeIdsArray.end());
680
681 Array<OneD, int> rankLocalEdgeIds(
682 std::accumulate(rankNumEdges.begin(), rankNumEdges.end(), 0), 0);
683
684 // Send all unique edge IDs to all partitions
685 comm->AllGatherv(localEdgeIdsArray, rankLocalEdgeIds, rankNumEdges,
686 rankLocalEdgeDisp);
687
688 // Find what edge Ids match with other ranks
689 size_t myRank = comm->GetRank();
690 for (size_t i = 0; i < m_nRanks; ++i)
691 {
692 if (i == myRank)
693 {
694 continue;
695 }
696
697 for (size_t j = 0; j < rankNumEdges[i]; ++j)
698 {
699 int edgeId = rankLocalEdgeIds[rankLocalEdgeDisp[i] + j];
700 if (std::find(uniqueEdgeIds.begin(), uniqueEdgeIds.end(), edgeId) !=
701 uniqueEdgeIds.end())
702 {
703 m_rankSharedEdges[i].emplace_back(edgeId);
704 }
705 }
706 }
707}
708
709/**
710 * Timing of the exchange method @p f, performing the exchange @p count times
711 * for array of length @p num.
712 *
713 * @param comm Communicator
714 * @param count Number of timing iterations to run
715 * @param num Number of quadrature points to communicate
716 * @param f #ExchangeMethod to time
717 *
718 * @return tuple of loop times {avg, min, max}
719 */
720std::tuple<NekDouble, NekDouble, NekDouble> AssemblyCommDG::Timing(
721 const LibUtilities::CommSharedPtr &comm, const int &count, const int &num,
723{
724 Array<OneD, NekDouble> testFwd(num, 1);
725 Array<OneD, NekDouble> testBwd(num, -2);
726
728 t.Start();
729 for (size_t i = 0; i < count; ++i)
730 {
731 f->PerformExchange(testFwd, testBwd);
732 }
733 t.Stop();
734
735 // These can just be 'reduce' but need to setup the wrapper in comm.h
736 Array<OneD, NekDouble> minTime(1, t.TimePerTest(count));
737 comm->AllReduce(minTime, LibUtilities::ReduceMin);
738
739 Array<OneD, NekDouble> maxTime(1, t.TimePerTest(count));
740 comm->AllReduce(maxTime, LibUtilities::ReduceMax);
741
742 Array<OneD, NekDouble> sumTime(1, t.TimePerTest(count));
743 comm->AllReduce(sumTime, LibUtilities::ReduceSum);
744
745 NekDouble avgTime = sumTime[0] / comm->GetSize();
746 return std::make_tuple(avgTime, minTime[0], maxTime[0]);
747}
748
749} // namespace MultiRegions
750} // namespace Nektar
NekDouble TimePerTest(unsigned int n)
Returns amount of seconds per iteration in a test with n iterations.
Definition: Timer.cpp:67
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
LibUtilities::CommSharedPtr m_comm
Communicator.
std::vector< int > m_allEdgeIndex
List of trace map indices of the quad points to exchange.
AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad, const int &nRanks, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
int m_nRanks
Number of ranks/processes/partitions.
int m_maxQuad
Max number of quadrature points in an element.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
int m_maxCount
Largest shared partition edge.
AllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace, const int &nRanks)
Default constructor.
std::vector< int > m_allVEdgeIndex
List of trace map indices of the quad points to exchange.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_allVSendCount
List of counts for MPI_alltoallv.
Array< OneD, int > m_allVSendDisp
List of displacements for MPI_alltoallv.
int m_maxQuad
Max number of quadrature points in an element.
std::map< int, std::vector< int > > m_edgeToTrace
Map of edge ID to quad point trace indices.
AssemblyCommDG(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap)
int m_nRanks
Number of ranks/processes/partitions.
static std::tuple< NekDouble, NekDouble, NekDouble > Timing(const LibUtilities::CommSharedPtr &comm, const int &count, const int &num, const ExchangeMethodSharedPtr &f)
Timing of the MPI exchange method.
void InitialiseStructure(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
Initalises the structure for the MPI communication.
std::map< int, std::vector< int > > m_rankSharedEdges
Map of process to shared edge IDs.
ExchangeMethodSharedPtr m_exchange
Chosen exchange method (either fastest parallel or serial)
static void RealignTraceElement(Array< OneD, int > &toAlign, StdRegions::Orientation orient, int nquad1, int nquad2=0)
Base class for all multi-elemental spectral/hp expansions.
Definition: ExpList.h:102
std::shared_ptr< LibUtilities::SessionReader > GetSession() const
Returns the session object.
Definition: ExpList.h:971
const std::shared_ptr< LocalRegions::ExpansionVector > GetExp() const
This function returns the vector of elements in the expansion.
Definition: ExpList.h:2094
std::shared_ptr< LibUtilities::Comm > GetComm() const
Returns the comm object.
Definition: ExpList.h:976
NeighborAllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
std::vector< int > m_edgeTraceIndex
List of trace map indices of the quad points to exchange.
Array< OneD, int > m_sendDisp
List of displacements.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_sendCount
List of counts.
LibUtilities::CommRequestSharedPtr m_sendRequest
List of send requests.
Pairwise(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Array< OneD, NekDouble > m_sendBuff
Send buffer for exchange.
Array< OneD, NekDouble > m_recvBuff
Receive buffer for exchange.
LibUtilities::CommRequestSharedPtr m_recvRequest
List of receive requests.
Array< OneD, int > m_edgeTraceIndex
List of trace index locations in recv/send buff.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:57
std::shared_ptr< Expansion > ExpansionSharedPtr
Definition: Expansion.h:68
std::vector< ExpansionSharedPtr > ExpansionVector
Definition: Expansion.h:70
std::shared_ptr< ExchangeMethod > ExchangeMethodSharedPtr
std::shared_ptr< ExpList > ExpListSharedPtr
Shared pointer to an ExpList object.
std::map< int, std::vector< PeriodicEntity > > PeriodicMap
InputIterator find(InputIterator first, InputIterator last, InputIterator startingpoint, const EqualityComparable &value)
Definition: StdRegions.hpp:453
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
double NekDouble
int id
Geometry ID of entity.
bool isLocal
Flag specifying if this entity is local to this partition.