48 std::string PreconCfsBRJ::className =
50 "PreconCfsBRJ", PreconCfsBRJ::create,
51 "Block Relaxed Jacobi Preconditioner for CFS.");
53 PreconCfsBRJ::PreconCfsBRJ(
62 size_t nvariables = pFields.size();
67 for (
size_t i = 0; i < nvariables; i++)
86 boost::ignore_unused(flag);
96 const NekDouble OmBRJParam = 1.0 - BRJParam;
98 size_t nvariables = pFields.size();
99 size_t npoints = pFields[0]->GetNcoeffs();
100 size_t ntotpnt = inarray.size();
102 ASSERTL0(nvariables * npoints == ntotpnt,
103 "nvariables*npoints!=ntotpnt in PreconCoeff");
112 for (
size_t m = 0; m < nvariables; m++)
114 size_t moffset = m * npoints;
115 rhs2d[m] = rhs + moffset;
116 out_2d[m] = outarray + moffset;
117 outTmp_2d[m] = outTmp + moffset;
118 pFields[m]->MultiplyByMassMatrix(inarray + moffset, rhs2d[m]);
121 size_t nphysic = pFields[0]->GetNpoints();
122 size_t nTracePts = pFields[0]->GetTrace()->GetNpoints();
127 for (
size_t j = 0; j < nvariables; j++)
134 for (
size_t i = 0; i < ntmpTrace; i++)
137 for (
size_t j = 0; j < nvariables; j++)
144 for (
size_t j = 0; j < nvariables; j++)
150 bool flagUpdateDervFlux =
false;
152 const size_t nwspTraceDataType = nvariables + 1;
154 for (
size_t m = 0; m < nwspTraceDataType; m++)
165 for (
size_t nrelax = 0; nrelax < nBRJIterTot - 1; nrelax++)
167 Vmath::Smul(ntotpnt, OmBRJParam, outarray, 1, outN, 1);
171 pFields, nvariables, npoints, rhs2d, out_2d, flagUpdateDervFlux,
172 FwdFluxDeriv, BwdFluxDeriv, qfield, tmpTrace, wspTraceDataType,
183 Vmath::Svtvp(ntotpnt, BRJParam, outTmp, 1, outN, 1, outarray, 1);
193 boost::ignore_unused(pFields);
199 int nvariables = pFields.size();
200 int nelmts = pFields[0]->GetNumElmts();
202 for (
int i = 0; i < nelmts; i++)
204 matdim[i] = pFields[0]->GetExp(i)->GetNcoeffs() * nvariables;
216 cout <<
" ## CalcuPreconMat " << endl;
222 const auto vecwidth = vec_t::width;
224 alignas(vec_t::alignment) std::array<NekSingle, vec_t::width> tmp;
226 for (
int ne = 0; ne < nelmts; ne++)
228 const auto nElmtDof = matdim[ne];
229 const auto nblocks = nElmtDof / vecwidth;
232 PreconMatSingle->GetBlockPtr(ne, ne)->GetRawPtr();
234 for (
int i1 = 0; i1 < nblocks; ++i1)
236 for (
int j = 0; j < nElmtDof; ++j)
238 for (
int i = 0; i < vecwidth; ++i)
240 tmp[i] = mmat[j + (i1 * vecwidth + i) * nElmtDof];
247 const auto endwidth = nElmtDof - nblocks * vecwidth;
252 for (
int j = 0; j < nElmtDof; ++j)
254 for (
int i = 0; i < endwidth; ++i)
256 tmp[i] = mmat[j + (nblocks * vecwidth + i) * nElmtDof];
259 for (
int i = endwidth; i < vecwidth; ++i)
279 boost::ignore_unused(res);
301 unsigned int nvariables = pFields.size();
303 int nTotElmt = pFields[0]->GetNumElmts();
306 const auto vecwidth = vec_t::width;
309 std::vector<vec_t, tinysimd::allocator<vec_t>> Sinarray(
m_max_nblocks);
310 std::vector<vec_t, tinysimd::allocator<vec_t>> Soutarray(
m_max_nElmtDof);
313 alignas(vec_t::alignment) std::array<NekSingle, vec_t::width> tmp;
315 for (
int ne = 0, cnt = 0, icnt = 0, icnt1 = 0; ne < nTotElmt; ne++)
317 const auto nElmtCoef = pFields[0]->GetNcoeffs(ne);
318 const auto nElmtDof = nElmtCoef * nvariables;
319 const auto nblocks = (nElmtDof % vecwidth) ? nElmtDof / vecwidth + 1
320 : nElmtDof / vecwidth;
324 for (
int j = 0; j < nblocks; ++j, icnt += vecwidth)
326 for (
int i = 0; i < vecwidth; ++i)
331 Sinarray[j].load(tmp.data());
336 vec_t in = Sinarray[0];
337 for (
int i = 0; i < nElmtDof; ++i)
343 for (
int n = 1; n < nblocks; ++n)
346 for (
int i = 0; i < nElmtDof; ++i)
354 for (
int i = 0; i < nElmtDof; ++i)
357 Soutarray[i].store(tmp.data());
361 for (
int j = 1; j < vecwidth; ++j)
369 icnt1 += nblocks * vecwidth;
373 template <
typename DataType>
376 const size_t nvariables,
const size_t nCoeffs,
388 boost::ignore_unused(flagUpdateDervFlux, qfield, TraceJacDerivArray,
389 TraceJacDerivSign, FwdFluxDeriv, BwdFluxDeriv,
392 size_t nTracePts = pFields[0]->GetTrace()->GetNpoints();
393 size_t npoints = pFields[0]->GetNpoints();
397 for (
size_t i = 0; i < nvariables; i++)
400 pFields[i]->BwdTrans(outarray[i], outpnts[i]);
410 size_t indexwspTrace = 0;
411 Fwd = wspTrace[indexwspTrace], indexwspTrace++;
412 Bwd = wspTrace[indexwspTrace], indexwspTrace++;
413 FwdFlux = wspTrace[indexwspTrace], indexwspTrace++;
414 BwdFlux = wspTrace[indexwspTrace], indexwspTrace++;
417 for (
size_t i = 0; i < nvariables; ++i)
420 pFields[i]->GetFwdBwdTracePhys(outpnts[i], Fwd[i], Bwd[i]);
425 size_t indexwspTraceDataType = 0;
427 for (
size_t m = 0; m < nvariables; ++m)
429 Fwdarray[m] = wspTraceDataType[indexwspTraceDataType],
430 indexwspTraceDataType++;
433 Fwdreslt = wspTraceDataType[indexwspTraceDataType], indexwspTraceDataType++;
435 for (
size_t m = 0; m < nvariables; ++m)
437 for (
size_t i = 0; i < nTracePts; ++i)
439 Fwdarray[m][i] = DataType(Fwd[m][i]);
442 for (
size_t m = 0; m < nvariables; ++m)
445 for (
size_t n = 0; n < nvariables; ++n)
447 for (
size_t p = 0;
p < nTracePts; ++
p)
449 Fwdreslt[
p] += TraceJacArray[0][m][n][
p] * Fwdarray[n][
p];
452 for (
size_t i = 0; i < nTracePts; ++i)
458 for (
size_t m = 0; m < nvariables; ++m)
460 for (
size_t i = 0; i < nTracePts; ++i)
462 Fwdarray[m][i] = DataType(Bwd[m][i]);
465 for (
size_t m = 0; m < nvariables; ++m)
468 for (
size_t n = 0; n < nvariables; ++n)
470 for (
size_t p = 0;
p < nTracePts; ++
p)
472 Fwdreslt[
p] += TraceJacArray[1][m][n][
p] * Fwdarray[n][
p];
475 for (
size_t i = 0; i < nTracePts; ++i)
481 for (
size_t i = 0; i < nvariables; ++i)
485 pFields[i]->AddTraceIntegralToOffDiag(FwdFlux[i], BwdFlux[i],
491 for (
size_t i = 0; i < nvariables; ++i)
493 for (
size_t p = 0;
p < nCoeffs; ++
p)
501 template <
typename TypeNekBlkMatSharedPtr>
508 size_t nvars = pFields.size();
509 size_t nelmts = pFields[0]->GetNumElmts();
512 for (
size_t i = 0; i < nelmts; i++)
514 nelmtcoef = pFields[0]->GetExp(i)->GetNcoeffs();
515 nelmtmatdim[i] = nelmtcoef * nscale;
518 for (
size_t i = 0; i < nvars; i++)
520 for (
size_t j = 0; j < nvars; j++)
#define ASSERTL0(condition, msg)
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
void DoCalcPreconMatBRJCoeff(const Array< OneD, const Array< OneD, NekDouble >> &inarray, Array< OneD, Array< OneD, SNekBlkMatSharedPtr >> &gmtxarray, SNekBlkMatSharedPtr &gmtVar, Array< OneD, SNekBlkMatSharedPtr > &TraceJac, Array< OneD, SNekBlkMatSharedPtr > &TraceJacDeriv, Array< OneD, Array< OneD, NekSingle >> &TraceJacDerivSign, TensorOfArray4D< NekSingle > &TraceJacArray, TensorOfArray4D< NekSingle > &TraceJacDerivArray, TensorOfArray5D< NekSingle > &TraceIPSymJacArray)
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacSingle
virtual void v_DoPreconCfs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const bool &flag) override
TensorOfArray4D< NekSingle > m_TraceJacArraySingle
Array< OneD, Array< OneD, SNekBlkMatSharedPtr > > m_PreconMatVarsSingle
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacDerivSingle
unsigned int m_max_nblocks
Array< OneD, Array< OneD, NekSingle > > m_TraceJacDerivSignSingle
PrecType m_PreconMatStorage
virtual void v_InitObject() override
TensorOfArray4D< NekSingle > m_TraceJacDerivArraySingle
void MinusOffDiag2Rhs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const size_t nvariables, const size_t nCoeffs, const Array< OneD, const Array< OneD, NekDouble >> &inarray, Array< OneD, Array< OneD, NekDouble >> &outarray, bool flagUpdateDervFlux, Array< OneD, Array< OneD, NekDouble >> &FwdFluxDeriv, Array< OneD, Array< OneD, NekDouble >> &BwdFluxDeriv, TensorOfArray3D< NekDouble > &qfield, TensorOfArray3D< NekDouble > &wspTrace, Array< OneD, Array< OneD, DataType >> &wspTraceDataType, const TensorOfArray4D< DataType > &TraceJacArray, const TensorOfArray4D< DataType > &TraceJacDerivArray, const Array< OneD, const Array< OneD, DataType >> &TraceJacDerivSign, const TensorOfArray5D< DataType > &TraceIPSymJacArray)
void AllocatePreconBlkDiagCoeff(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, Array< OneD, Array< OneD, TypeNekBlkMatSharedPtr >> &gmtxarray, const int &nscale=1)
unsigned int m_max_nElmtDof
TensorOfArray5D< NekSingle > m_TraceIPSymJacArraySingle
std::vector< int > m_inputIdx
void AllocateNekBlkMatDig(SNekBlkMatSharedPtr &mat, const Array< OneD, unsigned int > nrow, const Array< OneD, unsigned int > ncol)
virtual void v_BuildPreconCfs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, const Array< OneD, NekDouble >> &intmp, const NekDouble time, const NekDouble lambda) override
void AllocateSIMDPreconBlkMatDiag(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields)
This function creates the matrix structure for the block diagonal operator. It organizes the way that...
void PreconBlkDiag(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, NekDouble > &inarray, Array< OneD, NekDouble > &outarray)
virtual bool v_UpdatePreconMatCheck(const Array< OneD, const NekDouble > &res, const NekDouble dtLambda) override
std::vector< simd< NekSingle >, tinysimd::allocator< simd< NekSingle > > > m_sBlkDiagMat
void DoNullPrecon(const Array< OneD, NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const bool &flag)
NekDouble m_DtLambdaPreconMat
NekDouble m_BndEvaluateTime
LibUtilities::CommSharedPtr m_Comm
virtual void v_InitObject() override
NekPreconCfsOperators m_operator
std::shared_ptr< SessionReader > SessionReaderSharedPtr
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
The above copyright notice and this permission notice shall be included.
std::shared_ptr< SNekBlkMat > SNekBlkMatSharedPtr
tinysimd::simd< NekDouble > vec_t
PreconCfsOpFactory & GetPreconCfsOpFactory()
Declaration of the boundary condition factory singleton.
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
svtvp (scalar times vector plus vector): z = alpha*x + y
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
void Zero(int n, T *x, const int incx)
Zero vector.
void Fill(int n, const T alpha, T *x, const int incx)
Fill a vector with a constant value.
typename abi< ScalarType, width >::type simd