46 const std::map<
int, std::vector<int>> &rankSharedEdges,
47 const std::map<
int, std::vector<int>> &edgeToTrace)
48 : m_comm(comm), m_maxQuad(maxQuad), m_nRanks(nRanks)
50 for (
size_t i = 0; i < nRanks; ++i)
52 if (rankSharedEdges.find(i) != rankSharedEdges.end())
55 ?
static_cast<int>(rankSharedEdges.at(i).size())
64 for (
size_t i = 0; i < nRanks; ++i)
66 if (rankSharedEdges.find(i) != rankSharedEdges.end())
68 for (
size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
70 std::vector<int> edgeIndex =
71 edgeToTrace.at(rankSharedEdges.at(i)[j]);
72 if (edgeIndex.size() < maxQuad)
74 std::vector<int> diff(maxQuad - edgeIndex.size(), -1);
75 edgeIndex.insert(edgeIndex.end(), diff.begin(), diff.end());
84 std::vector<int> edgeIndex(
85 maxQuad * (
m_maxCount - rankSharedEdges.at(i).size()), -1);
92 std::vector<int> edgeIndex(maxQuad *
m_maxCount, -1);
100 const std::map<
int, std::vector<int>> &rankSharedEdges,
101 const std::map<
int, std::vector<int>> &edgeToTrace,
106 for (
size_t i = 0; i < nRanks; ++i)
108 if (rankSharedEdges.find(i) != rankSharedEdges.end())
110 for (
size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
112 std::vector<int> edgeIndex =
113 edgeToTrace.at(rankSharedEdges.at(i)[j]);
126 for (
size_t i = 1; i < nRanks; ++i)
134 const std::map<
int, std::vector<int>> &rankSharedEdges,
135 const std::map<
int, std::vector<int>> &edgeToTrace)
138 int nNeighbours = rankSharedEdges.size();
142 for (
auto &rankEdgeVec : rankSharedEdges)
144 destinations[cnt] = rankEdgeVec.first;
145 weights[cnt] = rankEdgeVec.second.size();
149 comm->DistGraphCreateAdjacent(destinations, weights, 1);
155 for (
const auto &rankEdgeSet : rankSharedEdges)
157 for (
size_t i : rankEdgeSet.second)
159 std::vector<int> edgeIndex = edgeToTrace.at(i);
169 for (
size_t i = 1; i < nNeighbours; ++i)
176 const std::map<
int, std::vector<int>> &rankSharedEdges,
177 const std::map<
int, std::vector<int>> &edgeToTrace)
181 int nNeighbours = rankSharedEdges.size();
185 std::vector<std::pair<int, std::vector<int>>> vecPairPartitionTrace;
186 for (
const auto &rankEdgeSet : rankSharedEdges)
188 std::vector<int> edgeTraceIndex;
189 for (
size_t i : rankEdgeSet.second)
191 std::vector<int> edgeIndex = edgeToTrace.at(i);
192 edgeTraceIndex.insert(edgeTraceIndex.end(), edgeIndex.begin(),
196 vecPairPartitionTrace.emplace_back(
197 std::make_pair(rankEdgeSet.first, edgeTraceIndex));
199 sendCount[cnt++] = edgeTraceIndex.size();
203 for (
size_t i = 1; i < nNeighbours; ++i)
205 sendDisp[i] = sendDisp[i - 1] + sendCount[i - 1];
208 size_t totSends = std::accumulate(sendCount.begin(), sendCount.end(), 0);
216 for (
auto &i : vecPairPartitionTrace)
218 for (
auto j : i.second)
228 for (
size_t i = 0; i < vecPairPartitionTrace.size(); ++i)
230 size_t len = vecPairPartitionTrace[i].second.size();
233 m_comm->RecvInit(vecPairPartitionTrace[i].first,
237 m_comm->SendInit(vecPairPartitionTrace[i].first,
249 for (
size_t j = 0; j < size; ++j)
261 m_comm->AlltoAll(sendBuff, recvBuff);
263 for (
size_t j = 0; j < size; ++j)
333 auto comm = locExp.
GetComm()->GetRowComm();
336 if (comm->IsSerial())
346 bndCondExp, bndCond, perMap, comm);
349 std::vector<ExchangeMethodSharedPtr> MPIFuncs;
350 std::vector<std::string> MPIFuncsNames;
354 if (locExp.
GetSession()->MatchSolverInfo(
"OverrideMPI",
"ON") ||
361 MPIFuncsNames.emplace_back(
"AllToAll");
366 MPIFuncsNames.emplace_back(
"AllToAllV");
369 MPIFuncs.emplace_back(
372 MPIFuncsNames.emplace_back(
"PairwiseSendRecv");
375 if (std::get<0>(comm->GetVersion()) >= 3)
380 MPIFuncsNames.emplace_back(
"NeighborAllToAllV");
383 int numPoints = trace->GetNpoints();
384 int warmup = 10, iter = 50;
386 std::vector<NekDouble> avg(MPIFuncs.size(), -1);
387 bool verbose = locExp.
GetSession()->DefinesCmdLineArgument(
"verbose");
389 if (verbose && comm->TreatAsRankZero())
391 std::cout <<
"MPI setup for trace exchange: " << std::endl;
396 for (
size_t i = 0; i < MPIFuncs.size(); ++i)
398 maxStrLen = MPIFuncsNames[i].size() > maxStrLen
399 ? MPIFuncsNames[i].size()
403 for (
size_t i = 0; i < MPIFuncs.size(); ++i)
405 Timing(comm, warmup, numPoints, MPIFuncs[i]);
406 std::tie(avg[i], min, max) =
407 Timing(comm, iter, numPoints, MPIFuncs[i]);
408 if (verbose && comm->TreatAsRankZero())
410 std::cout <<
" " << MPIFuncsNames[i]
411 <<
" times (avg, min, max)"
412 << std::string(maxStrLen - MPIFuncsNames[i].size(),
414 <<
": " << avg[i] <<
" " << min <<
" " << max
420 int fastestMPI = std::distance(
421 avg.begin(), std::min_element(avg.begin(), avg.end()));
423 if (verbose && comm->TreatAsRankZero())
425 std::cout <<
" Chosen fastest method: "
426 << MPIFuncsNames[fastestMPI] << std::endl;
465 int quad = 0, nDim = 0, eid = 0, offset = 0;
470 nDim = locExpVector[0]->GetShapeDimension();
475 for (
size_t i = 0; i < trace->GetExpSize(); ++i)
477 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
478 offset = trace->GetPhys_Offset(i);
482 auto it = perMap.find(eid);
483 if (perMap.count(eid) > 0)
488 eid = std::min(eid, ent.
id);
497 for (
size_t i = 0; i < trace->GetExpSize(); ++i)
499 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
500 offset = trace->GetPhys_Offset(i);
501 quad = trace->GetExp(i)->GetTotPoints();
507 auto it = perMap.find(eid);
508 bool realign =
false;
509 if (perMap.count(eid) > 0)
514 realign = eid == std::min(eid, ent.
id);
515 eid = std::min(eid, ent.
id);
519 for (
size_t j = 0; j < quad; ++j)
555 tmpArray, orient, trace->GetExp(i)->GetNumPoints(0),
556 trace->GetExp(i)->GetNumPoints(1));
569 std::vector<int> localEdgeIds;
570 for (eid = 0; eid < locExpVector.size(); ++eid)
574 for (
size_t j = 0; j < locExpansion->GetNtraces(); ++j)
576 int id = elmtToTrace[eid][j]->GetGeom()->GetGlobalID();
577 localEdgeIds.emplace_back(
id);
580 quad = locExpansion->GetTotPoints();
591 std::set<int> bndIdList;
592 for (
size_t i = 0; i < bndCond.size(); ++i)
595 if ((bndCond[i]->GetBoundaryConditionType() ==
602 for (
size_t j = 0; j < bndCondExp[i]->GetExpSize(); ++j)
604 eid = bndCondExp[i]->GetExp(j)->GetGeom()->GetGlobalID();
605 bndIdList.insert(eid);
611 std::vector<int> uniqueEdgeIds;
612 std::vector<bool> duplicated(localEdgeIds.size(),
false);
613 for (
size_t i = 0; i < localEdgeIds.size(); ++i)
615 eid = localEdgeIds[i];
616 for (
size_t j = i + 1; j < localEdgeIds.size(); ++j)
618 if (eid == localEdgeIds[j])
620 duplicated[i] = duplicated[j] =
true;
626 if (bndIdList.find(eid) == bndIdList.end())
629 auto it = perMap.find(eid);
630 if (it != perMap.end())
632 if (!it->second[0].isLocal)
634 uniqueEdgeIds.emplace_back(
635 std::min(eid, it->second[0].id));
640 uniqueEdgeIds.emplace_back(eid);
650 comm->AllGather(localEdgeSize, rankNumEdges);
653 for (
size_t i = 1; i <
m_nRanks; ++i)
655 rankLocalEdgeDisp[i] = rankLocalEdgeDisp[i - 1] + rankNumEdges[i - 1];
659 for (
size_t i = 0; i < uniqueEdgeIds.size(); ++i)
661 localEdgeIdsArray[i] = uniqueEdgeIds[i];
665 std::sort(localEdgeIdsArray.begin(), localEdgeIdsArray.end());
668 std::accumulate(rankNumEdges.begin(), rankNumEdges.end(), 0), 0);
671 comm->AllGatherv(localEdgeIdsArray, rankLocalEdgeIds, rankNumEdges,
675 size_t myRank = comm->GetRank();
676 for (
size_t i = 0; i <
m_nRanks; ++i)
683 for (
size_t j = 0; j < rankNumEdges[i]; ++j)
685 int edgeId = rankLocalEdgeIds[rankLocalEdgeDisp[i] + j];
686 if (
std::find(uniqueEdgeIds.begin(), uniqueEdgeIds.end(), edgeId) !=
715 for (
size_t i = 0; i < count; ++i)
717 f->PerformExchange(testFwd, testBwd);
731 NekDouble avgTime = sumTime[0] / comm->GetSize();
732 return std::make_tuple(avgTime, minTime[0], maxTime[0]);
NekDouble TimePerTest(unsigned int n)
Returns amount of seconds per iteration in a test with n iterations.
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
LibUtilities::CommSharedPtr m_comm
Communicator.
std::vector< int > m_allEdgeIndex
List of trace map indices of the quad points to exchange.
AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad, const int &nRanks, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
int m_nRanks
Number of ranks/processes/partitions.
int m_maxQuad
Max number of quadrature points in an element.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
int m_maxCount
Largest shared partition edge.
AllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace, const int &nRanks)
Default constructor.
std::vector< int > m_allVEdgeIndex
List of trace map indices of the quad points to exchange.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_allVSendCount
List of counts for MPI_alltoallv.
Array< OneD, int > m_allVSendDisp
List of displacements for MPI_alltoallv.
int m_maxQuad
Max number of quadrature points in an element.
std::map< int, std::vector< int > > m_edgeToTrace
Map of edge ID to quad point trace indices.
AssemblyCommDG(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap)
int m_nRanks
Number of ranks/processes/partitions.
static std::tuple< NekDouble, NekDouble, NekDouble > Timing(const LibUtilities::CommSharedPtr &comm, const int &count, const int &num, const ExchangeMethodSharedPtr &f)
Timing of the MPI exchange method.
void InitialiseStructure(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
Initalises the structure for the MPI communication.
std::map< int, std::vector< int > > m_rankSharedEdges
Map of process to shared edge IDs.
ExchangeMethodSharedPtr m_exchange
Chosen exchange method (either fastest parallel or serial)
static void RealignTraceElement(Array< OneD, int > &toAlign, StdRegions::Orientation orient, int nquad1, int nquad2=0)
Base class for all multi-elemental spectral/hp expansions.
std::shared_ptr< LibUtilities::SessionReader > GetSession() const
Returns the session object.
const std::shared_ptr< LocalRegions::ExpansionVector > GetExp() const
This function returns the vector of elements in the expansion.
std::shared_ptr< LibUtilities::Comm > GetComm() const
Returns the comm object.
NeighborAllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
std::vector< int > m_edgeTraceIndex
List of trace map indices of the quad points to exchange.
Array< OneD, int > m_sendDisp
List of displacements.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_sendCount
List of counts.
LibUtilities::CommRequestSharedPtr m_sendRequest
List of send requests.
Pairwise(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Array< OneD, NekDouble > m_sendBuff
Send buffer for exchange.
Array< OneD, NekDouble > m_recvBuff
Receive buffer for exchange.
LibUtilities::CommRequestSharedPtr m_recvRequest
List of receive requests.
Array< OneD, int > m_edgeTraceIndex
List of trace index locations in recv/send buff.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
std::shared_ptr< Expansion > ExpansionSharedPtr
std::vector< ExpansionSharedPtr > ExpansionVector
std::shared_ptr< ExchangeMethod > ExchangeMethodSharedPtr
std::shared_ptr< ExpList > ExpListSharedPtr
Shared pointer to an ExpList object.
std::map< int, std::vector< PeriodicEntity > > PeriodicMap
InputIterator find(InputIterator first, InputIterator last, InputIterator startingpoint, const EqualityComparable &value)
@ eDir1BwdDir2_Dir2FwdDir1
@ eDir1FwdDir2_Dir2BwdDir1
void Gathr(I n, const T *x, const I *y, T *z)
Gather vector z[i] = x[y[i]].
void Scatr(int n, const T *x, const int *y, T *z)
Scatter vector z[y[i]] = x[i].
int id
Geometry ID of entity.
bool isLocal
Flag specifying if this entity is local to this partition.