48 const std::map<
int, std::vector<int>> &rankSharedEdges,
49 const std::map<
int, std::vector<int>> &edgeToTrace)
50 : m_comm(comm), m_maxQuad(maxQuad), m_nRanks(nRanks)
52 for (
size_t i = 0; i < nRanks; ++i)
54 if (rankSharedEdges.find(i) != rankSharedEdges.end())
57 ?
static_cast<int>(rankSharedEdges.at(i).size())
66 for (
size_t i = 0; i < nRanks; ++i)
68 if (rankSharedEdges.find(i) != rankSharedEdges.end())
70 for (
size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
72 std::vector<int> edgeIndex =
73 edgeToTrace.at(rankSharedEdges.at(i)[j]);
74 if (edgeIndex.size() < maxQuad)
76 std::vector<int> diff(maxQuad - edgeIndex.size(), -1);
77 edgeIndex.insert(edgeIndex.end(), diff.begin(), diff.end());
86 std::vector<int> edgeIndex(
87 maxQuad * (
m_maxCount - rankSharedEdges.at(i).size()), -1);
94 std::vector<int> edgeIndex(maxQuad *
m_maxCount, -1);
102 const std::map<
int, std::vector<int>> &rankSharedEdges,
103 const std::map<
int, std::vector<int>> &edgeToTrace,
108 for (
size_t i = 0; i < nRanks; ++i)
110 if (rankSharedEdges.find(i) != rankSharedEdges.end())
112 for (
size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
114 std::vector<int> edgeIndex =
115 edgeToTrace.at(rankSharedEdges.at(i)[j]);
128 for (
size_t i = 1; i < nRanks; ++i)
136 const std::map<
int, std::vector<int>> &rankSharedEdges,
137 const std::map<
int, std::vector<int>> &edgeToTrace)
140 int nNeighbours = rankSharedEdges.size();
144 for (
auto &rankEdgeVec : rankSharedEdges)
146 destinations[cnt] = rankEdgeVec.first;
147 weights[cnt] = rankEdgeVec.second.size();
151 comm->DistGraphCreateAdjacent(destinations, weights, 1);
157 for (
const auto &rankEdgeSet : rankSharedEdges)
159 for (
size_t i : rankEdgeSet.second)
161 std::vector<int> edgeIndex = edgeToTrace.at(i);
171 for (
size_t i = 1; i < nNeighbours; ++i)
178 const std::map<
int, std::vector<int>> &rankSharedEdges,
179 const std::map<
int, std::vector<int>> &edgeToTrace)
183 int nNeighbours = rankSharedEdges.size();
187 std::vector<std::pair<int, std::vector<int>>> vecPairPartitionTrace;
188 for (
const auto &rankEdgeSet : rankSharedEdges)
190 std::vector<int> edgeTraceIndex;
191 for (
size_t i : rankEdgeSet.second)
193 std::vector<int> edgeIndex = edgeToTrace.at(i);
194 edgeTraceIndex.insert(edgeTraceIndex.end(), edgeIndex.begin(),
198 vecPairPartitionTrace.emplace_back(
199 std::make_pair(rankEdgeSet.first, edgeTraceIndex));
201 sendCount[cnt++] = edgeTraceIndex.size();
205 for (
size_t i = 1; i < nNeighbours; ++i)
207 sendDisp[i] = sendDisp[i - 1] + sendCount[i - 1];
210 size_t totSends = std::accumulate(sendCount.begin(), sendCount.end(), 0);
218 for (
auto &i : vecPairPartitionTrace)
220 for (
auto j : i.second)
230 for (
size_t i = 0; i < vecPairPartitionTrace.size(); ++i)
232 size_t len = vecPairPartitionTrace[i].second.size();
235 m_comm->RecvInit(vecPairPartitionTrace[i].first,
239 m_comm->SendInit(vecPairPartitionTrace[i].first,
251 for (
size_t j = 0; j < size; ++j)
263 m_comm->AlltoAll(sendBuff, recvBuff);
265 for (
size_t j = 0; j < size; ++j)
347 auto comm = locExp.
GetComm()->GetRowComm();
350 if (comm->IsSerial())
360 bndCondExp, bndCond, perMap, comm);
363 std::vector<ExchangeMethodSharedPtr> MPIFuncs;
364 std::vector<std::string> MPIFuncsNames;
368 if (locExp.
GetSession()->MatchSolverInfo(
"OverrideMPI",
"ON") ||
375 MPIFuncsNames.emplace_back(
"AllToAll");
380 MPIFuncsNames.emplace_back(
"AllToAllV");
383 MPIFuncs.emplace_back(
386 MPIFuncsNames.emplace_back(
"PairwiseSendRecv");
389 if (std::get<0>(comm->GetVersion()) >= 3)
394 MPIFuncsNames.emplace_back(
"NeighborAllToAllV");
397 int numPoints = trace->GetNpoints();
398 int warmup = 10, iter = 50;
400 std::vector<NekDouble> avg(MPIFuncs.size(), -1);
401 bool verbose = locExp.
GetSession()->DefinesCmdLineArgument(
"verbose");
403 if (verbose && comm->TreatAsRankZero())
405 std::cout <<
"MPI setup for trace exchange: " << std::endl;
410 for (
size_t i = 0; i < MPIFuncs.size(); ++i)
412 maxStrLen = MPIFuncsNames[i].size() > maxStrLen
413 ? MPIFuncsNames[i].size()
417 for (
size_t i = 0; i < MPIFuncs.size(); ++i)
419 Timing(comm, warmup, numPoints, MPIFuncs[i]);
420 std::tie(avg[i], min, max) =
421 Timing(comm, iter, numPoints, MPIFuncs[i]);
422 if (verbose && comm->TreatAsRankZero())
424 std::cout <<
" " << MPIFuncsNames[i]
425 <<
" times (avg, min, max)"
426 << std::string(maxStrLen - MPIFuncsNames[i].size(),
428 <<
": " << avg[i] <<
" " << min <<
" " << max
434 int fastestMPI = std::distance(
435 avg.begin(), std::min_element(avg.begin(), avg.end()));
437 if (verbose && comm->TreatAsRankZero())
439 std::cout <<
" Chosen fastest method: "
440 << MPIFuncsNames[fastestMPI] << std::endl;
479 int quad = 0, nDim = 0, eid = 0, offset = 0;
484 nDim = locExpVector[0]->GetShapeDimension();
489 for (
size_t i = 0; i < trace->GetExpSize(); ++i)
491 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
492 offset = trace->GetPhys_Offset(i);
496 auto it = perMap.find(eid);
497 if (perMap.count(eid) > 0)
502 eid = std::min(eid, ent.
id);
511 for (
size_t i = 0; i < trace->GetExpSize(); ++i)
513 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
514 offset = trace->GetPhys_Offset(i);
515 quad = trace->GetExp(i)->GetTotPoints();
521 auto it = perMap.find(eid);
522 bool realign =
false;
523 if (perMap.count(eid) > 0)
528 realign = eid == std::min(eid, ent.
id);
529 eid = std::min(eid, ent.
id);
533 for (
size_t j = 0; j < quad; ++j)
569 tmpArray, orient, trace->GetExp(i)->GetNumPoints(0),
570 trace->GetExp(i)->GetNumPoints(1));
583 std::vector<int> localEdgeIds;
584 for (eid = 0; eid < locExpVector.size(); ++eid)
588 for (
size_t j = 0; j < locExpansion->GetNtraces(); ++j)
590 int id = elmtToTrace[eid][j]->GetGeom()->GetGlobalID();
591 localEdgeIds.emplace_back(
id);
594 quad = locExpansion->GetTotPoints();
605 std::set<int> bndIdList;
606 for (
size_t i = 0; i < bndCond.size(); ++i)
609 if ((bndCond[i]->GetBoundaryConditionType() ==
616 for (
size_t j = 0; j < bndCondExp[i]->GetExpSize(); ++j)
618 eid = bndCondExp[i]->GetExp(j)->GetGeom()->GetGlobalID();
619 bndIdList.insert(eid);
625 std::vector<int> uniqueEdgeIds;
626 std::vector<bool> duplicated(localEdgeIds.size(),
false);
627 for (
size_t i = 0; i < localEdgeIds.size(); ++i)
629 eid = localEdgeIds[i];
630 for (
size_t j = i + 1; j < localEdgeIds.size(); ++j)
632 if (eid == localEdgeIds[j])
634 duplicated[i] = duplicated[j] =
true;
640 if (bndIdList.find(eid) == bndIdList.end())
643 auto it = perMap.find(eid);
644 if (it != perMap.end())
646 if (!it->second[0].isLocal)
648 uniqueEdgeIds.emplace_back(
649 std::min(eid, it->second[0].id));
654 uniqueEdgeIds.emplace_back(eid);
664 comm->AllGather(localEdgeSize, rankNumEdges);
667 for (
size_t i = 1; i <
m_nRanks; ++i)
669 rankLocalEdgeDisp[i] = rankLocalEdgeDisp[i - 1] + rankNumEdges[i - 1];
673 for (
size_t i = 0; i < uniqueEdgeIds.size(); ++i)
675 localEdgeIdsArray[i] = uniqueEdgeIds[i];
679 std::sort(localEdgeIdsArray.begin(), localEdgeIdsArray.end());
682 std::accumulate(rankNumEdges.begin(), rankNumEdges.end(), 0), 0);
685 comm->AllGatherv(localEdgeIdsArray, rankLocalEdgeIds, rankNumEdges,
689 size_t myRank = comm->GetRank();
690 for (
size_t i = 0; i <
m_nRanks; ++i)
697 for (
size_t j = 0; j < rankNumEdges[i]; ++j)
699 int edgeId = rankLocalEdgeIds[rankLocalEdgeDisp[i] + j];
700 if (
std::find(uniqueEdgeIds.begin(), uniqueEdgeIds.end(), edgeId) !=
729 for (
size_t i = 0; i < count; ++i)
731 f->PerformExchange(testFwd, testBwd);
745 NekDouble avgTime = sumTime[0] / comm->GetSize();
746 return std::make_tuple(avgTime, minTime[0], maxTime[0]);
NekDouble TimePerTest(unsigned int n)
Returns amount of seconds per iteration in a test with n iterations.
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
LibUtilities::CommSharedPtr m_comm
Communicator.
std::vector< int > m_allEdgeIndex
List of trace map indices of the quad points to exchange.
AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad, const int &nRanks, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
int m_nRanks
Number of ranks/processes/partitions.
int m_maxQuad
Max number of quadrature points in an element.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
int m_maxCount
Largest shared partition edge.
AllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace, const int &nRanks)
Default constructor.
std::vector< int > m_allVEdgeIndex
List of trace map indices of the quad points to exchange.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_allVSendCount
List of counts for MPI_alltoallv.
Array< OneD, int > m_allVSendDisp
List of displacements for MPI_alltoallv.
int m_maxQuad
Max number of quadrature points in an element.
std::map< int, std::vector< int > > m_edgeToTrace
Map of edge ID to quad point trace indices.
AssemblyCommDG(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap)
int m_nRanks
Number of ranks/processes/partitions.
static std::tuple< NekDouble, NekDouble, NekDouble > Timing(const LibUtilities::CommSharedPtr &comm, const int &count, const int &num, const ExchangeMethodSharedPtr &f)
Timing of the MPI exchange method.
void InitialiseStructure(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr > > &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
Initalises the structure for the MPI communication.
std::map< int, std::vector< int > > m_rankSharedEdges
Map of process to shared edge IDs.
ExchangeMethodSharedPtr m_exchange
Chosen exchange method (either fastest parallel or serial)
static void RealignTraceElement(Array< OneD, int > &toAlign, StdRegions::Orientation orient, int nquad1, int nquad2=0)
Base class for all multi-elemental spectral/hp expansions.
std::shared_ptr< LibUtilities::SessionReader > GetSession() const
Returns the session object.
const std::shared_ptr< LocalRegions::ExpansionVector > GetExp() const
This function returns the vector of elements in the expansion.
std::shared_ptr< LibUtilities::Comm > GetComm() const
Returns the comm object.
NeighborAllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Default constructor.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
std::vector< int > m_edgeTraceIndex
List of trace map indices of the quad points to exchange.
Array< OneD, int > m_sendDisp
List of displacements.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_sendCount
List of counts.
LibUtilities::CommRequestSharedPtr m_sendRequest
List of send requests.
Pairwise(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int > > &rankSharedEdges, const std::map< int, std::vector< int > > &edgeToTrace)
Array< OneD, NekDouble > m_sendBuff
Send buffer for exchange.
Array< OneD, NekDouble > m_recvBuff
Receive buffer for exchange.
LibUtilities::CommRequestSharedPtr m_recvRequest
List of receive requests.
Array< OneD, int > m_edgeTraceIndex
List of trace index locations in recv/send buff.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
std::shared_ptr< Expansion > ExpansionSharedPtr
std::vector< ExpansionSharedPtr > ExpansionVector
std::shared_ptr< ExchangeMethod > ExchangeMethodSharedPtr
std::shared_ptr< ExpList > ExpListSharedPtr
Shared pointer to an ExpList object.
std::map< int, std::vector< PeriodicEntity > > PeriodicMap
InputIterator find(InputIterator first, InputIterator last, InputIterator startingpoint, const EqualityComparable &value)
@ eDir1BwdDir2_Dir2FwdDir1
@ eDir1FwdDir2_Dir2BwdDir1
The above copyright notice and this permission notice shall be included.
int id
Geometry ID of entity.
bool isLocal
Flag specifying if this entity is local to this partition.