43 namespace MultiRegions
48 const std::map<
int, std::vector<int>> &rankSharedEdges,
49 const std::map<
int, std::vector<int>> &edgeToTrace)
50 : m_comm(comm), m_maxQuad(maxQuad), m_nRanks(nRanks)
52 for (
size_t i = 0; i < nRanks; ++i)
54 if (rankSharedEdges.find(i) != rankSharedEdges.end())
57 ?
static_cast<int>(rankSharedEdges.at(i).size())
66 for (
size_t i = 0; i < nRanks; ++i)
68 if (rankSharedEdges.find(i) != rankSharedEdges.end())
70 for (
size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
72 std::vector<int> edgeIndex =
73 edgeToTrace.at(rankSharedEdges.at(i)[j]);
74 if (edgeIndex.size() < maxQuad)
76 std::vector<int> diff(maxQuad - edgeIndex.size(), -1);
77 edgeIndex.insert(edgeIndex.end(), diff.begin(), diff.end());
86 std::vector<int> edgeIndex(
87 maxQuad * (
m_maxCount - rankSharedEdges.at(i).size()), -1);
94 std::vector<int> edgeIndex(maxQuad *
m_maxCount, -1);
102 const std::map<
int, std::vector<int>> &rankSharedEdges,
103 const std::map<
int, std::vector<int>> &edgeToTrace,
108 for (
size_t i = 0; i < nRanks; ++i)
110 if (rankSharedEdges.find(i) != rankSharedEdges.end())
112 for (
size_t j = 0; j < rankSharedEdges.at(i).size(); ++j)
114 std::vector<int> edgeIndex =
115 edgeToTrace.at(rankSharedEdges.at(i)[j]);
128 for (
size_t i = 1; i < nRanks; ++i)
136 const std::map<
int, std::vector<int>> &rankSharedEdges,
137 const std::map<
int, std::vector<int>> &edgeToTrace)
140 int nNeighbours = rankSharedEdges.size();
144 for (
auto &rankEdgeVec : rankSharedEdges)
146 destinations[cnt] = rankEdgeVec.first;
147 weights[cnt] = rankEdgeVec.second.size();
151 comm->DistGraphCreateAdjacent(destinations, weights, 1);
157 for (
const auto &rankEdgeSet : rankSharedEdges)
159 for (
size_t i : rankEdgeSet.second)
161 std::vector<int> edgeIndex = edgeToTrace.at(i);
171 for (
size_t i = 1; i < nNeighbours; ++i)
178 const std::map<
int, std::vector<int>> &rankSharedEdges,
179 const std::map<
int, std::vector<int>> &edgeToTrace)
183 int nNeighbours = rankSharedEdges.size();
186 for (
const auto &rankEdgeSet : rankSharedEdges)
188 std::vector<int> edgeTraceIndex;
189 for (
size_t i : rankEdgeSet.second)
191 std::vector<int> edgeIndex = edgeToTrace.at(i);
192 edgeTraceIndex.insert(edgeTraceIndex.end(), edgeIndex.begin(),
197 std::make_pair(rankEdgeSet.first, edgeTraceIndex));
199 sendCount[cnt++] = edgeTraceIndex.size();
204 for (
size_t i = 1; i < nNeighbours; ++i)
209 size_t totSends = std::accumulate(sendCount.begin(), sendCount.end(), 0);
239 for (
size_t j = 0; j < size; ++j)
251 m_comm->AlltoAll(sendBuff, recvBuff);
253 for (
size_t j = 0; j < size; ++j)
311 for (
size_t j = 0; j < len; ++j)
329 for (
size_t j = 0; j < len; ++j)
345 auto comm = locExp.
GetSession()->GetComm()->GetRowComm();
348 if (comm->IsSerial())
358 bndCondExp, bndCond, perMap, comm);
361 std::vector<ExchangeMethodSharedPtr> MPIFuncs;
362 std::vector<std::string> MPIFuncsNames;
366 if (locExp.
GetSession()->MatchSolverInfo(
"OverrideMPI",
"ON") ||
373 MPIFuncsNames.emplace_back(
"AllToAll");
378 MPIFuncsNames.emplace_back(
"AllToAllV");
381 MPIFuncs.emplace_back(
384 MPIFuncsNames.emplace_back(
"PairwiseSendRecv");
387 if (std::get<0>(comm->GetVersion()) >= 3)
392 MPIFuncsNames.emplace_back(
"NeighborAllToAllV");
395 int numPoints = trace->GetNpoints();
396 int warmup = 10, iter = 50;
398 std::vector<NekDouble> avg(MPIFuncs.size(), -1);
399 bool verbose = locExp.
GetSession()->DefinesCmdLineArgument(
"verbose");
401 if (verbose && comm->GetRank() == 0)
403 std::cout <<
"MPI setup for trace exchange: " << std::endl;
406 for (
size_t i = 0; i < MPIFuncs.size(); ++i)
408 Timing(comm, warmup, numPoints, MPIFuncs[i]);
409 std::tie(avg[i], min, max) =
410 Timing(comm, iter, numPoints, MPIFuncs[i]);
411 if (verbose && comm->GetRank() == 0)
413 std::cout <<
" " << MPIFuncsNames[i]
414 <<
" times (avg, min, max): " << avg[i] <<
" " << min
415 <<
" " << max << std::endl;
420 int fastestMPI = std::distance(
421 avg.begin(), std::min_element(avg.begin(), avg.end()));
423 if (verbose && comm->GetRank() == 0)
425 std::cout <<
" Chosen fastest method: "
426 << MPIFuncsNames[fastestMPI] << std::endl;
467 int quad = 0, nDim = 0, eid = 0, offset = 0;
472 nDim = locExpVector[0]->GetShapeDimension();
477 for (
size_t i = 0; i < trace->GetExpSize(); ++i)
479 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
480 offset = trace->GetPhys_Offset(i);
484 auto it = perMap.find(eid);
485 if (perMap.count(eid) > 0)
490 eid = std::min(eid, ent.
id);
499 for (
size_t i = 0; i < trace->GetExpSize(); ++i)
501 eid = trace->GetExp(i)->GetGeom()->GetGlobalID();
502 offset = trace->GetPhys_Offset(i);
503 quad = trace->GetExp(i)->GetTotPoints();
509 auto it = perMap.find(eid);
510 bool realign =
false;
511 if (perMap.count(eid) > 0)
516 realign = eid == std::min(eid, ent.
id);
517 eid = std::min(eid, ent.
id);
521 for (
size_t j = 0; j < quad; ++j)
557 tmpArray, orient, trace->GetExp(i)->GetNumPoints(0),
558 trace->GetExp(i)->GetNumPoints(1));
571 std::vector<int> localEdgeIds;
572 for (eid = 0; eid < locExpVector.size(); ++eid)
576 for (
size_t j = 0; j < locExpansion->GetNtraces(); ++j)
578 int id = elmtToTrace[eid][j]->GetGeom()->GetGlobalID();
579 localEdgeIds.emplace_back(
id);
582 quad = locExpansion->GetTotPoints();
593 std::set<int> bndIdList;
594 for (
size_t i = 0; i < bndCond.size(); ++i)
596 for (
size_t j = 0; j < bndCondExp[i]->GetExpSize(); ++j)
598 eid = bndCondExp[i]->GetExp(j)->GetGeom()->GetGlobalID();
599 if (perMap.find(eid) ==
602 bndIdList.insert(eid);
608 std::vector<int> uniqueEdgeIds;
609 std::vector<bool> duplicated(localEdgeIds.size(),
false);
610 for (
size_t i = 0; i < localEdgeIds.size(); ++i)
612 eid = localEdgeIds[i];
613 for (
size_t j = i + 1; j < localEdgeIds.size(); ++j)
615 if (eid == localEdgeIds[j])
617 duplicated[i] = duplicated[j] =
true;
623 if (bndIdList.find(eid) == bndIdList.end())
626 auto it = perMap.find(eid);
627 if (it != perMap.end())
629 if (!it->second[0].isLocal)
631 uniqueEdgeIds.emplace_back(
632 std::min(eid, it->second[0].id));
637 uniqueEdgeIds.emplace_back(eid);
647 comm->AllGather(localEdgeSize, rankNumEdges);
650 for (
size_t i = 1; i <
m_nRanks; ++i)
652 rankLocalEdgeDisp[i] = rankLocalEdgeDisp[i - 1] + rankNumEdges[i - 1];
656 for (
size_t i = 0; i < uniqueEdgeIds.size(); ++i)
658 localEdgeIdsArray[i] = uniqueEdgeIds[i];
662 std::sort(localEdgeIdsArray.begin(), localEdgeIdsArray.end());
665 std::accumulate(rankNumEdges.begin(), rankNumEdges.end(), 0), 0);
668 comm->AllGatherv(localEdgeIdsArray, rankLocalEdgeIds, rankNumEdges,
672 size_t myRank = comm->GetRank();
674 for (
size_t i = 0; i <
m_nRanks; ++i)
681 for (
size_t j = 0; j < rankNumEdges[i]; ++j)
683 int edgeId = rankLocalEdgeIds[rankLocalEdgeDisp[i] + j];
684 if (
std::find(uniqueEdgeIds.begin(), uniqueEdgeIds.end(), edgeId) !=
713 for (
size_t i = 0; i < count; ++i)
715 f->PerformExchange(testFwd, testBwd);
729 NekDouble avgTime = sumTime[0] / comm->GetSize();
730 return std::make_tuple(avgTime, minTime[0], maxTime[0]);
NekDouble TimePerTest(unsigned int n)
Returns amount of seconds per iteration in a test with n iterations.
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
LibUtilities::CommSharedPtr m_comm
Communicator.
std::vector< int > m_allEdgeIndex
List of trace map indices of the quad points to exchange.
AllToAll(const LibUtilities::CommSharedPtr &comm, const int &maxQuad, const int &nRanks, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
Default constructor.
int m_nRanks
Number of ranks/processes/partitions.
int m_maxQuad
Max number of quadrature points in an element.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
int m_maxCount
Largest shared partition edge.
std::vector< int > m_allVEdgeIndex
List of trace map indices of the quad points to exchange.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
AllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace, const int &nRanks)
Default constructor.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_allVSendCount
List of counts for MPI_alltoallv.
Array< OneD, int > m_allVSendDisp
List of displacements for MPI_alltoallv.
void InitialiseStructure(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr >> &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap, const LibUtilities::CommSharedPtr &comm)
Initalises the structure for the MPI communication.
int m_maxQuad
Max number of quadrature points in an element.
std::map< int, std::vector< int > > m_edgeToTrace
Map of edge ID to quad point trace indices.
int m_nRanks
Number of ranks/processes/partitions.
static std::tuple< NekDouble, NekDouble, NekDouble > Timing(const LibUtilities::CommSharedPtr &comm, const int &count, const int &num, const ExchangeMethodSharedPtr &f)
Timing of the MPI exchange method.
AssemblyCommDG(const ExpList &locExp, const ExpListSharedPtr &trace, const Array< OneD, Array< OneD, LocalRegions::ExpansionSharedPtr >> &elmtToTrace, const Array< OneD, const ExpListSharedPtr > &bndCondExp, const Array< OneD, const SpatialDomains::BoundaryConditionShPtr > &bndCond, const PeriodicMap &perMap)
std::map< int, std::vector< int > > m_rankSharedEdges
Map of process to shared edge IDs.
ExchangeMethodSharedPtr m_exchange
Chosen exchange method (either fastest parallel or serial)
static void RealignTraceElement(Array< OneD, int > &toAlign, StdRegions::Orientation orient, int nquad1, int nquad2=0)
Base class for all multi-elemental spectral/hp expansions.
const std::shared_ptr< LocalRegions::ExpansionVector > GetExp() const
This function returns the vector of elements in the expansion.
std::shared_ptr< LibUtilities::SessionReader > GetSession() const
Returns the session object.
NeighborAllToAllV(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
Default constructor.
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
std::vector< int > m_edgeTraceIndex
List of trace map indices of the quad points to exchange.
Array< OneD, int > m_sendDisp
List of displacements.
LibUtilities::CommSharedPtr m_comm
Communicator.
Array< OneD, int > m_sendCount
List of counts.
LibUtilities::CommRequestSharedPtr m_sendRequest
List of send requests.
Array< OneD, NekDouble > m_sendBuff
Send buffer for exchange.
Array< OneD, NekDouble > m_recvBuff
Receive buffer for exchange.
LibUtilities::CommRequestSharedPtr m_recvRequest
List of receive requests.
Array< OneD, int > m_sendDisp
List of displacements.
std::vector< std::pair< int, std::vector< int > > > m_vecPairPartitionTrace
List of partition to trace map indices of the quad points to exchange.
Pairwise(const LibUtilities::CommSharedPtr &comm, const std::map< int, std::vector< int >> &rankSharedEdges, const std::map< int, std::vector< int >> &edgeToTrace)
void PerformExchange(const Array< OneD, NekDouble > &testFwd, Array< OneD, NekDouble > &testBwd) final
LibUtilities::CommSharedPtr m_comm
Communicator.
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
std::shared_ptr< Expansion > ExpansionSharedPtr
std::vector< ExpansionSharedPtr > ExpansionVector
std::shared_ptr< ExchangeMethod > ExchangeMethodSharedPtr
std::shared_ptr< ExpList > ExpListSharedPtr
Shared pointer to an ExpList object.
std::map< int, std::vector< PeriodicEntity > > PeriodicMap
InputIterator find(InputIterator first, InputIterator last, InputIterator startingpoint, const EqualityComparable &value)
@ eDir1BwdDir2_Dir2FwdDir1
@ eDir1FwdDir2_Dir2BwdDir1
The above copyright notice and this permission notice shall be included.
int id
Geometry ID of entity.
bool isLocal
Flag specifying if this entity is local to this partition.