Nektar++
AssemblyCommDG.cpp
Go to the documentation of this file.
1////////////////////////////////////////////////////////////////////////////////
2//
3// File: AssemblyCommDG.cpp
4//
5// For more information, please see: http://www.nektar.info/
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description: Parallel communication methods for DG with MPI
32//
33////////////////////////////////////////////////////////////////////////////////
34
38
39#include <utility>
40
42{
43
44AllToAll::AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad,
45 const int &nRanks,
46 const std::map<int, std::vector<int>> &rankSharedEdges,
47 const std::map<int, std::vector<int>> &edgeToTrace)
48 : m_comm(comm), m_maxQuad(maxQuad), m_nRanks(nRanks)
49{
50 for (size_t i = 0; i < nRanks; ++i)
51 {
52 if (rankSharedEdges.find(i) != rankSharedEdges.end())
53 {
54 m_maxCount = (rankSharedEdges.at(i).size() > m_maxCount)
55 ? static_cast<int>(rankSharedEdges.at(i).size())
56 : m_maxCount;
57 }
58 }
59
60 comm->AllReduce(m_maxCount, LibUtilities::ReduceMax);
61
62 // Creates the edge index vector where value -1 indicates
63 // padding of value 0 to be inserted instead of value from Fwd
64 for (size_t i = 0; i < nRanks; ++i)
65 {
66 if (rankSharedEdges.find(i) != rankSharedEdges.end())
67 {
68 for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
69 {
70 std::vector<int> edgeIndex =
71 edgeToTrace.at(rankSharedEdges.at(i)[j]);
72 if (edgeIndex.size() < maxQuad)
73 {
74 std::vector<int> diff(maxQuad - edgeIndex.size(), -1);
75 edgeIndex.insert(edgeIndex.end(), diff.begin(), diff.end());
76 }
77
78 m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
79 edgeIndex.end());
80 }
81
82 if (rankSharedEdges.at(i).size() < m_maxCount)
83 {
84 std::vector<int> edgeIndex(
85 maxQuad * (m_maxCount - rankSharedEdges.at(i).size()), -1);
86 m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
87 edgeIndex.end());
88 }
89 }
90 else
91 {
92 std::vector<int> edgeIndex(maxQuad * m_maxCount, -1);
93 m_allEdgeIndex.insert(m_allEdgeIndex.end(), edgeIndex.begin(),
94 edgeIndex.end());
95 }
96 }
97}
98
100 const std::map<int, std::vector<int>> &rankSharedEdges,
101 const std::map<int, std::vector<int>> &edgeToTrace,
102 const int &nRanks)
103 : m_comm(comm)
104{
106 for (size_t i = 0; i < nRanks; ++i)
107 {
108 if (rankSharedEdges.find(i) != rankSharedEdges.end())
109 {
110 for (size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
111 {
112 std::vector<int> edgeIndex =
113 edgeToTrace.at(rankSharedEdges.at(i)[j]);
114 m_allVEdgeIndex.insert(m_allVEdgeIndex.end(), edgeIndex.begin(),
115 edgeIndex.end());
116 m_allVSendCount[i] += edgeIndex.size();
117 }
118 }
119 else
120 {
121 m_allVSendCount[i] = 0;
122 }
123 }
124
125 m_allVSendDisp = Array<OneD, int>(nRanks, 0);
126 for (size_t i = 1; i < nRanks; ++i)
127 {
128 m_allVSendDisp[i] = m_allVSendDisp[i - 1] + m_allVSendCount[i - 1];
129 }
130}
131
133 const LibUtilities::CommSharedPtr &comm,
134 const std::map<int, std::vector<int>> &rankSharedEdges,
135 const std::map<int, std::vector<int>> &edgeToTrace)
136 : m_comm(comm)
137{
138 int nNeighbours = rankSharedEdges.size();
139 Array<OneD, int> destinations(nNeighbours, 0);
140 Array<OneD, int> weights(nNeighbours, 0);
141 int cnt = 0;
142 for (auto &rankEdgeVec : rankSharedEdges)
143 {
144 destinations[cnt] = rankEdgeVec.first;
145 weights[cnt] = rankEdgeVec.second.size();
146 ++cnt;
147 }
148
149 comm->DistGraphCreateAdjacent(destinations, weights, 1);
150
151 // Setting up indices
152 m_sendCount = Array<OneD, int>(nNeighbours, 0);
153 cnt = 0;
154
155 for (const auto &rankEdgeSet : rankSharedEdges)
156 {
157 for (size_t i : rankEdgeSet.second)
158 {
159 std::vector<int> edgeIndex = edgeToTrace.at(i);
160 m_edgeTraceIndex.insert(m_edgeTraceIndex.end(), edgeIndex.begin(),
161 edgeIndex.end());
162 m_sendCount[cnt] += edgeIndex.size();
163 }
164
165 ++cnt;
166 }
167
168 m_sendDisp = Array<OneD, int>(nNeighbours, 0);
169 for (size_t i = 1; i < nNeighbours; ++i)
170 {
171 m_sendDisp[i] = m_sendDisp[i - 1] + m_sendCount[i - 1];
172 }
173}
174
176 const std::map<int, std::vector<int>> &rankSharedEdges,
177 const std::map<int, std::vector<int>> &edgeToTrace)
178 : m_comm(comm)
179{
180 int cnt = 0;
181 int nNeighbours = rankSharedEdges.size();
182 Array<OneD, int> sendCount(nNeighbours, -1);
183
184 // List of partition to trace map indices of the quad points to exchange
185 std::vector<std::pair<int, std::vector<int>>> vecPairPartitionTrace;
186 for (const auto &rankEdgeSet : rankSharedEdges)
187 {
188 std::vector<int> edgeTraceIndex;
189 for (size_t i : rankEdgeSet.second)
190 {
191 std::vector<int> edgeIndex = edgeToTrace.at(i);
192 edgeTraceIndex.insert(edgeTraceIndex.end(), edgeIndex.begin(),
193 edgeIndex.end());
194 }
195
196 vecPairPartitionTrace.emplace_back(
197 std::make_pair(rankEdgeSet.first, edgeTraceIndex));
198
199 sendCount[cnt++] = edgeTraceIndex.size();
200 }
201
202 Array<OneD, int> sendDisp(nNeighbours, 0);
203 for (size_t i = 1; i < nNeighbours; ++i)
204 {
205 sendDisp[i] = sendDisp[i - 1] + sendCount[i - 1];
206 }
207
208 size_t totSends = std::accumulate(sendCount.begin(), sendCount.end(), 0);
209
210 m_recvBuff = Array<OneD, NekDouble>(totSends, -1);
211 m_sendBuff = Array<OneD, NekDouble>(totSends, -1);
212 m_edgeTraceIndex = Array<OneD, int>(totSends, -1);
213
214 // Set up m_edgeTraceIndex to reduce complexity when performing exchange
215 cnt = 0;
216 for (auto &i : vecPairPartitionTrace)
217 {
218 for (auto j : i.second)
219 {
220 m_edgeTraceIndex[cnt++] = j;
221 }
222 }
223
224 m_recvRequest = m_comm->CreateRequest(vecPairPartitionTrace.size());
225 m_sendRequest = m_comm->CreateRequest(vecPairPartitionTrace.size());
226
227 // Construct persistent requests
228 for (size_t i = 0; i < vecPairPartitionTrace.size(); ++i)
229 {
230 size_t len = vecPairPartitionTrace[i].second.size();
231
232 // Initialise receive requests
233 m_comm->RecvInit(vecPairPartitionTrace[i].first,
234 m_recvBuff[sendDisp[i]], len, m_recvRequest, i);
235
236 // Initialise send requests
237 m_comm->SendInit(vecPairPartitionTrace[i].first,
238 m_sendBuff[sendDisp[i]], len, m_sendRequest, i);
239 }
240}
241
243 Array<OneD, NekDouble> &testBwd)
244{
245 int size = m_maxQuad * m_maxCount * m_nRanks;
246 Array<OneD, NekDouble> sendBuff(size, -1);
247 Array<OneD, NekDouble> recvBuff(size, -1);
248
249 for (size_t j = 0; j < size; ++j)
250 {
251 if (m_allEdgeIndex[j] == -1)
252 {
253 sendBuff[j] = 0;
254 }
255 else
256 {
257 sendBuff[j] = testFwd[m_allEdgeIndex[j]];
258 }
259 }
260
261 m_comm->AlltoAll(sendBuff, recvBuff);
262
263 for (size_t j = 0; j < size; ++j)
264 {
265 if (m_allEdgeIndex[j] != -1)
266 {
267 testBwd[m_allEdgeIndex[j]] = recvBuff[j];
268 }
269 }
270}
271
273 Array<OneD, NekDouble> &testBwd)
274{
275 Array<OneD, NekDouble> sendBuff(m_allVEdgeIndex.size(), -1);
276 Array<OneD, NekDouble> recvBuff(m_allVEdgeIndex.size(), -1);
277
278 Vmath::Gathr(int(m_allVEdgeIndex.size()), testFwd.data(),
279 m_allVEdgeIndex.data(), sendBuff.data());
280
281 m_comm->AlltoAllv(sendBuff, m_allVSendCount, m_allVSendDisp, recvBuff,
283
284 Vmath::Scatr(int(m_allVEdgeIndex.size()), recvBuff.data(),
285 m_allVEdgeIndex.data(), testBwd.data());
286}
287
289 Array<OneD, NekDouble> &testBwd)
290{
291 Array<OneD, NekDouble> sendBuff(m_edgeTraceIndex.size(), -1);
292 Array<OneD, NekDouble> recvBuff(m_edgeTraceIndex.size(), -1);
293 Vmath::Gathr(int(m_edgeTraceIndex.size()), testFwd.data(),
294 m_edgeTraceIndex.data(), sendBuff.data());
295
296 m_comm->NeighborAlltoAllv(sendBuff, m_sendCount, m_sendDisp, recvBuff,
298
299 Vmath::Scatr(int(m_edgeTraceIndex.size()), recvBuff.data(),
300 m_edgeTraceIndex.data(), testBwd.data());
301}
302
304 Array<OneD, NekDouble> &testBwd)
305{
306 // Perform receive posts
307 m_comm->StartAll(m_recvRequest);
308
309 // Fill send buffer from Fwd trace
310 Vmath::Gathr(int(m_edgeTraceIndex.size()), testFwd.data(),
311 m_edgeTraceIndex.data(), m_sendBuff.data());
312
313 // Perform send posts
314 m_comm->StartAll(m_sendRequest);
315
316 // Wait for all send/recvs to complete
317 m_comm->WaitAll(m_sendRequest);
318 m_comm->WaitAll(m_recvRequest);
319
320 // Fill Bwd trace from recv buffer
321 Vmath::Scatr(int(m_edgeTraceIndex.size()), m_recvBuff.data(),
322 m_edgeTraceIndex.data(), testBwd.data());
323}
324
326 const ExpList &locExp, const ExpListSharedPtr &trace,
328 &elmtToTrace,
329 const Array<OneD, const ExpListSharedPtr> &bndCondExp,
331 const PeriodicMap &perMap)
332{
333 auto comm = locExp.GetComm()->GetRowComm();
334
335 // If serial then skip initialising graph structure and the MPI timing
336 if (comm->IsSerial())
337 {
338 m_exchange =
340 }
341 else
342 {
343 // Initialise graph structure and link processes across partition
344 // boundaries
345 AssemblyCommDG::InitialiseStructure(locExp, trace, elmtToTrace,
346 bndCondExp, bndCond, perMap, comm);
347
348 // Timing MPI comm methods, warm up with 10 iterations then time over 50
349 std::vector<ExchangeMethodSharedPtr> MPIFuncs;
350 std::vector<std::string> MPIFuncsNames;
351
352 // Toggle off AllToAll/AllToAllV methods if cores greater than 16 for
353 // performance reasons unless override solver info parameter is present
354 if (locExp.GetSession()->MatchSolverInfo("OverrideMPI", "ON") ||
355 m_nRanks <= 16)
356 {
357 MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
360 m_edgeToTrace)));
361 MPIFuncsNames.emplace_back("AllToAll");
362
363 MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
366 MPIFuncsNames.emplace_back("AllToAllV");
367 }
368
369 MPIFuncs.emplace_back(
372 MPIFuncsNames.emplace_back("PairwiseSendRecv");
373
374 // Disable neighbor MPI method on unsupported MPI version (below 3.0)
375 if (std::get<0>(comm->GetVersion()) >= 3)
376 {
377 MPIFuncs.emplace_back(ExchangeMethodSharedPtr(
380 MPIFuncsNames.emplace_back("NeighborAllToAllV");
381 }
382
383 int numPoints = trace->GetNpoints();
384 int warmup = 10, iter = 50;
385 NekDouble min, max;
386 std::vector<NekDouble> avg(MPIFuncs.size(), -1);
387 bool verbose = locExp.GetSession()->DefinesCmdLineArgument("verbose");
388
389 if (verbose && comm->TreatAsRankZero())
390 {
391 std::cout << "MPI setup for trace exchange: " << std::endl;
392 }
393
394 // Padding for output
395 int maxStrLen = 0;
396 for (size_t i = 0; i < MPIFuncs.size(); ++i)
397 {
398 maxStrLen = MPIFuncsNames[i].size() > maxStrLen
399 ? MPIFuncsNames[i].size()
400 : maxStrLen;
401 }
402
403 for (size_t i = 0; i < MPIFuncs.size(); ++i)
404 {
405 Timing(comm, warmup, numPoints, MPIFuncs[i]);
406 std::tie(avg[i], min, max) =
407 Timing(comm, iter, numPoints, MPIFuncs[i]);
408 if (verbose && comm->TreatAsRankZero())
409 {
410 std::cout << " " << MPIFuncsNames[i]
411 << " times (avg, min, max)"
412 << std::string(maxStrLen - MPIFuncsNames[i].size(),
413 ' ')
414 << ": " << avg[i] << " " << min << " " << max
415 << std::endl;
416 }
417 }
418
419 // Gets the fastest MPI method
420 int fastestMPI = std::distance(
421 avg.begin(), std::min_element(avg.begin(), avg.end()));
422
423 if (verbose && comm->TreatAsRankZero())
424 {
425 std::cout << " Chosen fastest method: "
426 << MPIFuncsNames[fastestMPI] << std::endl;
427 }
428
429 m_exchange = MPIFuncs[fastestMPI];
430 }
431}
432
433/**
434 * This function sets up the initial structure to allow for the exchange methods
435 * to be created. This structure is contained within the member variable
436 * #m_rankSharedEdges which is a map of rank to vector of the shared edges with
437 * that rank. This is filled by:
438 *
439 * - Create an edge to trace mapping, and realign periodic edges within this
440 * mapping so that they have the same data layout for ranks sharing periodic
441 * boundaries. - Create a list of all local edge IDs and calculate the maximum
442 * number of quadrature points used locally, then perform an AllReduce to find
443 * the maximum number of quadrature points across all ranks (for the AllToAll
444 * method). - Create a list of all boundary edge IDs except for those which are
445 * periodic - Using the boundary ID list, and all local ID list we can construct
446 * a unique list of IDs which are on a partition boundary (e.g. if doesn't occur
447 * in the local list twice, and doesn't occur in the boundary list it is on a
448 * partition boundary). We also check, if it is a periodic edge, whether the
449 * other side is local, if not we add the minimum of thetwo periodic IDs to the
450 * unique list as we must have a consistent numbering scheme across ranks. - We
451 * send the unique list to all other ranks/partitions. Each ranks unique list is
452 * then compared with the local unique edge ID list, if a match is found then
453 * the member variable #m_rankSharedEdges is filled with the matching rank and
454 * unique edge ID.
455 */
457 const ExpList &locExp, const ExpListSharedPtr &trace,
459 &elmtToTrace,
460 const Array<OneD, const ExpListSharedPtr> &bndCondExp,
462 const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
463{
465 int quad = 0, nDim = 0, eid = 0, offset = 0;
466 const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp());
467
468 // Assume that each element of the expansion is of the same
469 // dimension.
470 nDim = locExpVector[0]->GetShapeDimension();
471
472 // This sets up the edge to trace mapping and realigns periodic edges
473 if (nDim == 1)
474 {
475 for (size_t i = 0; i < trace->GetExpSize(); ++i)
476 {
477 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
478 offset = trace->GetPhys_Offset(i);
479
480 // Check to see if this vert is periodic. If it is, then we
481 // need use the unique eid of the two points
482 auto it = perMap.find(eid);
483 if (perMap.count(eid) > 0)
484 {
485 PeriodicEntity ent = it->second[0];
486 if (!ent.isLocal) // Not sure if true in 1D
487 {
488 eid = std::min(eid, ent.id);
489 }
490 }
491
492 m_edgeToTrace[eid].emplace_back(offset);
493 }
494 }
495 else
496 {
497 for (size_t i = 0; i < trace->GetExpSize(); ++i)
498 {
499 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
500 offset = trace->GetPhys_Offset(i);
501 quad = trace->GetExp(i)->GetTotPoints();
502
503 // Check to see if this edge is periodic. If it is, then we
504 // need to reverse the trace order of one edge only in the
505 // edge to trace map so that the data are reversed w.r.t each
506 // other. We do this by using the minimum of the two IDs.
507 auto it = perMap.find(eid);
508 bool realign = false;
509 if (perMap.count(eid) > 0)
510 {
511 PeriodicEntity ent = it->second[0];
512 if (!ent.isLocal)
513 {
514 realign = eid == std::min(eid, ent.id);
515 eid = std::min(eid, ent.id);
516 }
517 }
518
519 for (size_t j = 0; j < quad; ++j)
520 {
521 m_edgeToTrace[eid].emplace_back(offset + j);
522 }
523
524 if (realign)
525 {
526 // Realign some periodic edges in m_edgeToTrace
527 Array<OneD, int> tmpArray(m_edgeToTrace[eid].size());
528 for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
529 {
530 tmpArray[j] = m_edgeToTrace[eid][j];
531 }
532
533 StdRegions::Orientation orient = it->second[0].orient;
534
535 if (nDim == 2)
536 {
537 AssemblyMapDG::RealignTraceElement(tmpArray, orient, quad);
538 }
539 else
540 {
541 // Orient is going from face 2 -> face 1 but we want face 1
542 // -> face 2; in all cases except below these are
543 // equivalent. However below is not equivalent so we use the
544 // reverse of the mapping.
546 {
548 }
549 else if (orient == StdRegions::eDir1BwdDir2_Dir2FwdDir1)
550 {
552 }
553
555 tmpArray, orient, trace->GetExp(i)->GetNumPoints(0),
556 trace->GetExp(i)->GetNumPoints(1));
557 }
558
559 for (size_t j = 0; j < m_edgeToTrace[eid].size(); ++j)
560 {
561 m_edgeToTrace[eid][j] = tmpArray[j];
562 }
563 }
564 }
565 }
566
567 // This creates a list of all geometry of problem dimension - 1
568 // and populates the maxQuad member variable for AllToAll
569 std::vector<int> localEdgeIds;
570 for (eid = 0; eid < locExpVector.size(); ++eid)
571 {
572 LocalRegions::ExpansionSharedPtr locExpansion = locExpVector[eid];
573
574 for (size_t j = 0; j < locExpansion->GetNtraces(); ++j)
575 {
576 int id = elmtToTrace[eid][j]->GetGeom()->GetGlobalID();
577 localEdgeIds.emplace_back(id);
578 }
579
580 quad = locExpansion->GetTotPoints();
581 if (quad > m_maxQuad)
582 {
583 m_maxQuad = quad;
584 }
585 }
586
587 // Find max quadrature points across all processes for AllToAll method
588 comm->AllReduce(m_maxQuad, LibUtilities::ReduceMax);
589
590 // Create list of boundary edge IDs
591 std::set<int> bndIdList;
592 for (size_t i = 0; i < bndCond.size(); ++i)
593 {
594 // Don't add if periodic boundary type
595 if ((bndCond[i]->GetBoundaryConditionType() ==
597 {
598 continue;
599 }
600 else
601 {
602 for (size_t j = 0; j < bndCondExp[i]->GetExpSize(); ++j)
603 {
604 eid = bndCondExp[i]->GetExp(j)->GetGeom()->GetGlobalID();
605 bndIdList.insert(eid);
606 }
607 }
608 }
609
610 // Get unique edges to send
611 std::vector<int> uniqueEdgeIds;
612 std::vector<bool> duplicated(localEdgeIds.size(), false);
613 for (size_t i = 0; i < localEdgeIds.size(); ++i)
614 {
615 eid = localEdgeIds[i];
616 for (size_t j = i + 1; j < localEdgeIds.size(); ++j)
617 {
618 if (eid == localEdgeIds[j])
619 {
620 duplicated[i] = duplicated[j] = true;
621 }
622 }
623
624 if (!duplicated[i]) // Not duplicated in local partition
625 {
626 if (bndIdList.find(eid) == bndIdList.end()) // Not a boundary edge
627 {
628 // Check if periodic and if not local set eid to other side
629 auto it = perMap.find(eid);
630 if (it != perMap.end())
631 {
632 if (!it->second[0].isLocal)
633 {
634 uniqueEdgeIds.emplace_back(
635 std::min(eid, it->second[0].id));
636 }
637 }
638 else
639 {
640 uniqueEdgeIds.emplace_back(eid);
641 }
642 }
643 }
644 }
645
646 // Send uniqueEdgeIds size so all partitions can prepare buffers
647 m_nRanks = comm->GetSize();
648 Array<OneD, int> rankNumEdges(m_nRanks);
649 Array<OneD, int> localEdgeSize(1, uniqueEdgeIds.size());
650 comm->AllGather(localEdgeSize, rankNumEdges);
651
652 Array<OneD, int> rankLocalEdgeDisp(m_nRanks, 0);
653 for (size_t i = 1; i < m_nRanks; ++i)
654 {
655 rankLocalEdgeDisp[i] = rankLocalEdgeDisp[i - 1] + rankNumEdges[i - 1];
656 }
657
658 Array<OneD, int> localEdgeIdsArray(uniqueEdgeIds.size());
659 for (size_t i = 0; i < uniqueEdgeIds.size(); ++i)
660 {
661 localEdgeIdsArray[i] = uniqueEdgeIds[i];
662 }
663
664 // Sort localEdgeIdsArray before sending (this is important!)
665 std::sort(localEdgeIdsArray.begin(), localEdgeIdsArray.end());
666
667 Array<OneD, int> rankLocalEdgeIds(
668 std::accumulate(rankNumEdges.begin(), rankNumEdges.end(), 0), 0);
669
670 // Send all unique edge IDs to all partitions
671 comm->AllGatherv(localEdgeIdsArray, rankLocalEdgeIds, rankNumEdges,
672 rankLocalEdgeDisp);
673
674 // Find what edge Ids match with other ranks
675 size_t myRank = comm->GetRank();
676 for (size_t i = 0; i < m_nRanks; ++i)
677 {
678 if (i == myRank)
679 {
680 continue;
681 }
682
683 for (size_t j = 0; j < rankNumEdges[i]; ++j)
684 {
685 int edgeId = rankLocalEdgeIds[rankLocalEdgeDisp[i] + j];
686 if (std::find(uniqueEdgeIds.begin(), uniqueEdgeIds.end(), edgeId) !=
687 uniqueEdgeIds.end())
688 {
689 m_rankSharedEdges[i].emplace_back(edgeId);
690 }
691 }
692 }
693}
694
695/**
696 * Timing of the exchange method @p f, performing the exchange @p count times
697 * for array of length @p num.
698 *
699 * @param comm Communicator
700 * @param count Number of timing iterations to run
701 * @param num Number of quadrature points to communicate
702 * @param f #ExchangeMethod to time
703 *
704 * @return tuple of loop times {avg, min, max}
705 */
706std::tuple<NekDouble, NekDouble, NekDouble> AssemblyCommDG::Timing(
707 const LibUtilities::CommSharedPtr &comm, const int &count, const int &num,
709{
710 Array<OneD, NekDouble> testFwd(num, 1);
711 Array<OneD, NekDouble> testBwd(num, -2);
712
714 t.Start();
715 for (size_t i = 0; i < count; ++i)
716 {
717 f->PerformExchange(testFwd, testBwd);
718 }
719 t.Stop();
720
721 // These can just be 'reduce' but need to setup the wrapper in comm.h
722 Array<OneD, NekDouble> minTime(1, t.TimePerTest(count));
723 comm->AllReduce(minTime, LibUtilities::ReduceMin);
724
725 Array<OneD, NekDouble> maxTime(1, t.TimePerTest(count));
726 comm->AllReduce(maxTime, LibUtilities::ReduceMax);
727
728 Array<OneD, NekDouble> sumTime(1, t.TimePerTest(count));
729 comm->AllReduce(sumTime, LibUtilities::ReduceSum);
730
731 NekDouble avgTime = sumTime[0] / comm->GetSize();
732 return std::make_tuple(avgTime, minTime[0], maxTime[0]);
733}
734
735} // namespace Nektar::MultiRegions
NekDouble TimePerTest(unsigned int n)
Returns amount of seconds per iteration in a test with n iterations.
Definition: Timer.cpp:65
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
LibUtilities::CommSharedPtr m_comm
Communicator.
std::vector< int > m_allEdgeIndex
List of trace map indices of the quad points to exchange.
AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad, const int &nRanks, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
int m_nRanks
Number of ranks/processes/partitions.
int m_maxQuad
Max number of quadrature points in an element.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
int m_maxCount
Largest shared partition edge.
AllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace, const int &nRanks)
Default constructor.
std::vector< int > m_allVEdgeIndex
List of trace map indices of the quad points to exchange.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_allVSendCount
List of counts for MPI_alltoallv.
Array< OneD, int > m_allVSendDisp
List of displacements for MPI_alltoallv.
int m_maxQuad
Max number of quadrature points in an element.
std::map< int, std::vector< int > > m_edgeToTrace
Map of edge ID to quad point trace indices.
AssemblyCommDG(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap)
int m_nRanks
Number of ranks/processes/partitions.
static std::tuple< NekDouble, NekDouble, NekDouble > Timing(const LibUtilities::CommSharedPtr &comm, const int &count, const int &num, const ExchangeMethodSharedPtr &f)
Timing of the MPI exchange method.
void InitialiseStructure(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
Initalises the structure for the MPI communication.
std::map< int, std::vector< int > > m_rankSharedEdges
Map of process to shared edge IDs.
ExchangeMethodSharedPtr m_exchange
Chosen exchange method (either fastest parallel or serial)
static void RealignTraceElement(Array< OneD, int > &toAlign, StdRegions::Orientation orient, int nquad1, int nquad2=0)
Base class for all multi-elemental spectral/hp expansions.
Definition: ExpList.h:99
std::shared_ptr< LibUtilities::SessionReader > GetSession() const
Returns the session object.
Definition: ExpList.h:961
const std::shared_ptr< LocalRegions::ExpansionVector > GetExp() const
This function returns the vector of elements in the expansion.
Definition: ExpList.h:2070
std::shared_ptr< LibUtilities::Comm > GetComm() const
Returns the comm object.
Definition: ExpList.h:966
NeighborAllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
std::vector< int > m_edgeTraceIndex
List of trace map indices of the quad points to exchange.
Array< OneD, int > m_sendDisp
List of displacements.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_sendCount
List of counts.
LibUtilities::CommRequestSharedPtr m_sendRequest
List of send requests.
Pairwise(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Array< OneD, NekDouble > m_sendBuff
Send buffer for exchange.
Array< OneD, NekDouble > m_recvBuff
Receive buffer for exchange.
LibUtilities::CommRequestSharedPtr m_recvRequest
List of receive requests.
Array< OneD, int > m_edgeTraceIndex
List of trace index locations in recv/send buff.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:55
std::shared_ptr< Expansion > ExpansionSharedPtr
Definition: Expansion.h:66
std::vector< ExpansionSharedPtr > ExpansionVector
Definition: Expansion.h:68
std::shared_ptr< ExchangeMethod > ExchangeMethodSharedPtr
std::shared_ptr< ExpList > ExpListSharedPtr
Shared pointer to an ExpList object.
std::map< int, std::vector< PeriodicEntity > > PeriodicMap
InputIterator find(InputIterator first, InputIterator last, InputIterator startingpoint, const EqualityComparable &value)
Definition: StdRegions.hpp:475
double NekDouble
void Gathr(I n, const T *x, const I *y, T *z)
Gather vector z[i] = x[y[i]].
Definition: Vmath.hpp:507
void Scatr(int n, const T *x, const int *y, T *z)
Scatter vector z[y[i]] = x[i].
Definition: Vmath.hpp:539
int id
Geometry ID of entity.
bool isLocal
Flag specifying if this entity is local to this partition.