51 "Block Relaxed Jacobi Preconditioner for CFS.");
62 size_t nvariables = pFields.size();
67 for (
size_t i = 0; i < nvariables; i++)
91 boost::ignore_unused(flag);
101 const NekDouble OmBRJParam = 1.0 - BRJParam;
103 size_t nvariables = pFields.size();
104 size_t npoints = pFields[0]->GetNcoeffs();
105 size_t ntotpnt = inarray.size();
107 ASSERTL0(nvariables * npoints == ntotpnt,
108 "nvariables*npoints!=ntotpnt in PreconCoeff");
117 for (
size_t m = 0; m < nvariables; m++)
119 size_t moffset = m * npoints;
120 rhs2d[m] = rhs + moffset;
121 out_2d[m] = outarray + moffset;
122 outTmp_2d[m] = outTmp + moffset;
123 pFields[m]->MultiplyByMassMatrix(inarray + moffset, rhs2d[m]);
126 size_t nphysic = pFields[0]->GetNpoints();
127 size_t nTracePts = pFields[0]->GetTrace()->GetNpoints();
132 for (
size_t j = 0; j < nvariables; j++)
139 for (
size_t i = 0; i < ntmpTrace; i++)
142 for (
size_t j = 0; j < nvariables; j++)
149 for (
size_t j = 0; j < nvariables; j++)
155 bool flagUpdateDervFlux =
false;
157 const size_t nwspTraceDataType = nvariables + 1;
159 for (
size_t m = 0; m < nwspTraceDataType; m++)
170 for (
size_t nrelax = 0; nrelax < nBRJIterTot - 1; nrelax++)
172 Vmath::Smul(ntotpnt, OmBRJParam, outarray, 1, outN, 1);
176 pFields, nvariables, npoints, rhs2d, out_2d, flagUpdateDervFlux,
177 FwdFluxDeriv, BwdFluxDeriv, qfield, tmpTrace, wspTraceDataType,
188 Vmath::Svtvp(ntotpnt, BRJParam, outTmp, 1, outN, 1, outarray, 1);
201 boost::ignore_unused(pFields);
207 int nvariables = pFields.size();
208 int nelmts = pFields[0]->GetNumElmts();
210 for (
int i = 0; i < nelmts; i++)
212 matdim[i] = pFields[0]->GetExp(i)->GetNcoeffs() * nvariables;
224 cout <<
" ## CalcuPreconMat " << endl;
230 const auto vecwidth = vec_t::width;
232 alignas(vec_t::alignment) std::array<NekSingle, vec_t::width> tmp;
234 for (
int ne = 0; ne < nelmts; ne++)
236 const auto nElmtDof = matdim[ne];
237 const auto nblocks = nElmtDof / vecwidth;
240 PreconMatSingle->GetBlockPtr(ne, ne)->GetRawPtr();
242 for (
int i1 = 0; i1 < nblocks; ++i1)
244 for (
int j = 0; j < nElmtDof; ++j)
246 for (
int i = 0; i < vecwidth; ++i)
248 tmp[i] = mmat[j + (i1 * vecwidth + i) * nElmtDof];
255 const auto endwidth = nElmtDof - nblocks * vecwidth;
260 for (
int j = 0; j < nElmtDof; ++j)
262 for (
int i = 0; i < endwidth; ++i)
264 tmp[i] = mmat[j + (nblocks * vecwidth + i) * nElmtDof];
267 for (
int i = endwidth; i < vecwidth; ++i)
290 boost::ignore_unused(res);
315 boost::ignore_unused(flag);
326 unsigned int nvariables = pFields.size();
328 int nTotElmt = pFields[0]->GetNumElmts();
331 const auto vecwidth = vec_t::width;
334 std::vector<vec_t, tinysimd::allocator<vec_t>> Sinarray(
m_max_nblocks);
335 std::vector<vec_t, tinysimd::allocator<vec_t>> Soutarray(
m_max_nElmtDof);
338 alignas(vec_t::alignment) std::array<NekSingle, vec_t::width> tmp;
340 for (
int ne = 0, cnt = 0, icnt = 0, icnt1 = 0; ne < nTotElmt; ne++)
342 const auto nElmtCoef = pFields[0]->GetNcoeffs(ne);
343 const auto nElmtDof = nElmtCoef * nvariables;
344 const auto nblocks = (nElmtDof % vecwidth) ? nElmtDof / vecwidth + 1
345 : nElmtDof / vecwidth;
349 for (
int j = 0; j < nblocks; ++j, icnt += vecwidth)
351 for (
int i = 0; i < vecwidth; ++i)
356 Sinarray[j].load(tmp.data());
361 vec_t in = Sinarray[0];
362 for (
int i = 0; i < nElmtDof; ++i)
368 for (
int n = 1; n < nblocks; ++n)
371 for (
int i = 0; i < nElmtDof; ++i)
379 for (
int i = 0; i < nElmtDof; ++i)
382 Soutarray[i].store(tmp.data());
386 for (
int j = 1; j < vecwidth; ++j)
394 icnt1 += nblocks * vecwidth;
401template <
typename DataType>
404 const size_t nvariables,
const size_t nCoeffs,
416 boost::ignore_unused(flagUpdateDervFlux, qfield, TraceJacDerivArray,
417 TraceJacDerivSign, FwdFluxDeriv, BwdFluxDeriv,
420 size_t nTracePts = pFields[0]->GetTrace()->GetNpoints();
421 size_t npoints = pFields[0]->GetNpoints();
425 for (
size_t i = 0; i < nvariables; i++)
428 pFields[i]->BwdTrans(outarray[i], outpnts[i]);
438 size_t indexwspTrace = 0;
439 Fwd = wspTrace[indexwspTrace], indexwspTrace++;
440 Bwd = wspTrace[indexwspTrace], indexwspTrace++;
441 FwdFlux = wspTrace[indexwspTrace], indexwspTrace++;
442 BwdFlux = wspTrace[indexwspTrace], indexwspTrace++;
445 for (
size_t i = 0; i < nvariables; ++i)
448 pFields[i]->GetFwdBwdTracePhys(outpnts[i], Fwd[i], Bwd[i]);
453 size_t indexwspTraceDataType = 0;
455 for (
size_t m = 0; m < nvariables; ++m)
457 Fwdarray[m] = wspTraceDataType[indexwspTraceDataType],
458 indexwspTraceDataType++;
461 Fwdreslt = wspTraceDataType[indexwspTraceDataType], indexwspTraceDataType++;
463 for (
size_t m = 0; m < nvariables; ++m)
465 for (
size_t i = 0; i < nTracePts; ++i)
467 Fwdarray[m][i] = DataType(Fwd[m][i]);
470 for (
size_t m = 0; m < nvariables; ++m)
473 for (
size_t n = 0; n < nvariables; ++n)
475 for (
size_t p = 0;
p < nTracePts; ++
p)
477 Fwdreslt[
p] += TraceJacArray[0][m][n][
p] * Fwdarray[n][
p];
480 for (
size_t i = 0; i < nTracePts; ++i)
486 for (
size_t m = 0; m < nvariables; ++m)
488 for (
size_t i = 0; i < nTracePts; ++i)
490 Fwdarray[m][i] = DataType(Bwd[m][i]);
493 for (
size_t m = 0; m < nvariables; ++m)
496 for (
size_t n = 0; n < nvariables; ++n)
498 for (
size_t p = 0;
p < nTracePts; ++
p)
500 Fwdreslt[
p] += TraceJacArray[1][m][n][
p] * Fwdarray[n][
p];
503 for (
size_t i = 0; i < nTracePts; ++i)
509 for (
size_t i = 0; i < nvariables; ++i)
513 pFields[i]->AddTraceIntegralToOffDiag(FwdFlux[i], BwdFlux[i],
519 for (
size_t i = 0; i < nvariables; ++i)
521 for (
size_t p = 0;
p < nCoeffs; ++
p)
532template <
typename TypeNekBlkMatSharedPtr>
539 size_t nvars = pFields.size();
540 size_t nelmts = pFields[0]->GetNumElmts();
543 for (
size_t i = 0; i < nelmts; i++)
545 nelmtcoef = pFields[0]->GetExp(i)->GetNcoeffs();
546 nelmtmatdim[i] = nelmtcoef * nscale;
549 for (
size_t i = 0; i < nvars; i++)
551 for (
size_t j = 0; j < nvars; j++)
#define ASSERTL0(condition, msg)
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
void DoCalcPreconMatBRJCoeff(const Array< OneD, const Array< OneD, NekDouble > > &inarray, Array< OneD, Array< OneD, SNekBlkMatSharedPtr > > &gmtxarray, SNekBlkMatSharedPtr &gmtVar, Array< OneD, SNekBlkMatSharedPtr > &TraceJac, Array< OneD, SNekBlkMatSharedPtr > &TraceJacDeriv, Array< OneD, Array< OneD, NekSingle > > &TraceJacDerivSign, TensorOfArray4D< NekSingle > &TraceJacArray, TensorOfArray4D< NekSingle > &TraceJacDerivArray, TensorOfArray5D< NekSingle > &TraceIPSymJacArray)
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacSingle
void MinusOffDiag2Rhs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const size_t nvariables, const size_t nCoeffs, const Array< OneD, const Array< OneD, NekDouble > > &inarray, Array< OneD, Array< OneD, NekDouble > > &outarray, bool flagUpdateDervFlux, Array< OneD, Array< OneD, NekDouble > > &FwdFluxDeriv, Array< OneD, Array< OneD, NekDouble > > &BwdFluxDeriv, TensorOfArray3D< NekDouble > &qfield, TensorOfArray3D< NekDouble > &wspTrace, Array< OneD, Array< OneD, DataType > > &wspTraceDataType, const TensorOfArray4D< DataType > &TraceJacArray, const TensorOfArray4D< DataType > &TraceJacDerivArray, const Array< OneD, const Array< OneD, DataType > > &TraceJacDerivSign, const TensorOfArray5D< DataType > &TraceIPSymJacArray)
virtual void v_BuildPreconCfs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, const Array< OneD, NekDouble > > &intmp, const NekDouble time, const NekDouble lambda) override
virtual void v_DoPreconCfs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const bool &flag) override
TensorOfArray4D< NekSingle > m_TraceJacArraySingle
Array< OneD, Array< OneD, SNekBlkMatSharedPtr > > m_PreconMatVarsSingle
static std::string className
Name of the class.
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacDerivSingle
unsigned int m_max_nblocks
Array< OneD, Array< OneD, NekSingle > > m_TraceJacDerivSignSingle
static PreconCfsSharedPtr create(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vComm)
Creates an instance of this class.
PrecType m_PreconMatStorage
virtual void v_InitObject() override
TensorOfArray4D< NekSingle > m_TraceJacDerivArraySingle
PreconCfsBRJ(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vComm)
unsigned int m_max_nElmtDof
void AllocatePreconBlkDiagCoeff(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, Array< OneD, Array< OneD, TypeNekBlkMatSharedPtr > > &gmtxarray, const int &nscale=1)
TensorOfArray5D< NekSingle > m_TraceIPSymJacArraySingle
std::vector< int > m_inputIdx
void AllocateNekBlkMatDig(SNekBlkMatSharedPtr &mat, const Array< OneD, unsigned int > nrow, const Array< OneD, unsigned int > ncol)
void AllocateSIMDPreconBlkMatDiag(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields)
This function creates the matrix structure for the block diagonal operator. It organizes the way that...
void PreconBlkDiag(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, NekDouble > &inarray, Array< OneD, NekDouble > &outarray)
virtual bool v_UpdatePreconMatCheck(const Array< OneD, const NekDouble > &res, const NekDouble dtLambda) override
std::vector< simd< NekSingle >, tinysimd::allocator< simd< NekSingle > > > m_sBlkDiagMat
void DoNullPrecon(const Array< OneD, NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const bool &flag)
NekPreconCfsOperators m_operator
NekDouble m_DtLambdaPreconMat
NekDouble m_BndEvaluateTime
LibUtilities::CommSharedPtr m_Comm
std::shared_ptr< SessionReader > SessionReaderSharedPtr
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
The above copyright notice and this permission notice shall be included.
PreconCfsFactory & GetPreconCfsFactory()
Declaration of the boundary condition factory singleton.
std::shared_ptr< SNekBlkMat > SNekBlkMatSharedPtr
tinysimd::simd< NekDouble > vec_t
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
svtvp (scalar times vector plus vector): z = alpha*x + y
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
void Zero(int n, T *x, const int incx)
Zero vector.
void Fill(int n, const T alpha, T *x, const int incx)
Fill a vector with a constant value.
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
typename abi< ScalarType, width >::type simd