Nektar++
PreconCfsBRJ.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: PreconCfsBRJ.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: PreconCfsBRJ definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39namespace Nektar
40{
41
42/**
43 * @class PreconCfsBRJ
44 *
45 * Solves a linear system using iterative methods.
46 */
47std::string PreconCfsBRJ::className =
49 "PreconCfsBRJ", PreconCfsBRJ::create,
50 "Block Relaxed Jacobi Preconditioner for CFS.");
51
55 const LibUtilities::CommSharedPtr &vComm)
56 : PreconCfs(pFields, pSession, vComm)
57{
58 pSession->LoadParameter("PreconItsStep", m_PreconItsStep, 7);
59 pSession->LoadParameter("BRJRelaxParam", m_BRJRelaxParam, 1.0);
60
61 size_t nvariables = pFields.size();
62
65 for (size_t i = 0; i < nvariables; i++)
66 {
68 }
70
72}
73
74/**
75 *
76 */
78{
79}
80
81/**
82 *
83 */
86 const Array<OneD, NekDouble> &inarray, Array<OneD, NekDouble> &outarray,
87 [[maybe_unused]] const bool &flag)
88{
89 ASSERTL0(inarray.size() == outarray.size(),
90 "In and Out not the same size in DoPreconCfs");
91
92 size_t nBRJIterTot = m_PreconItsStep;
93 if (nBRJIterTot == 0)
94 {
95 DoNullPrecon(inarray, outarray, flag);
96 }
97 else
98 {
99 const NekDouble BRJParam = m_BRJRelaxParam;
100 const NekDouble OmBRJParam = 1.0 - BRJParam;
101
102 size_t nvariables = pFields.size();
103 size_t npoints = pFields[0]->GetNcoeffs();
104 size_t ntotpnt = inarray.size();
105
106 ASSERTL0(nvariables * npoints == ntotpnt,
107 "nvariables*npoints!=ntotpnt in PreconCoeff");
108
109 Array<OneD, NekDouble> rhs(ntotpnt);
110
111 Array<OneD, NekDouble> outN(ntotpnt);
112 Array<OneD, NekDouble> outTmp(ntotpnt);
113 Array<OneD, Array<OneD, NekDouble>> rhs2d(nvariables);
114 Array<OneD, Array<OneD, NekDouble>> out_2d(nvariables);
115 Array<OneD, Array<OneD, NekDouble>> outTmp_2d(nvariables);
116 for (size_t m = 0; m < nvariables; m++)
117 {
118 size_t moffset = m * npoints;
119 rhs2d[m] = rhs + moffset;
120 out_2d[m] = outarray + moffset;
121 outTmp_2d[m] = outTmp + moffset;
122 pFields[m]->MultiplyByMassMatrix(inarray + moffset, rhs2d[m]);
123 }
124
125 size_t nphysic = pFields[0]->GetNpoints();
126 size_t nTracePts = pFields[0]->GetTrace()->GetNpoints();
128 for (int i = 0; i < m_spacedim; i++)
129 {
130 qfield[i] = Array<OneD, Array<OneD, NekDouble>>(nvariables);
131 for (size_t j = 0; j < nvariables; j++)
132 {
133 qfield[i][j] = Array<OneD, NekDouble>(nphysic);
134 }
135 }
136 size_t ntmpTrace = 4 + 2 * m_spacedim;
137 TensorOfArray3D<NekDouble> tmpTrace(ntmpTrace);
138 for (size_t i = 0; i < ntmpTrace; i++)
139 {
140 tmpTrace[i] = Array<OneD, Array<OneD, NekDouble>>(nvariables);
141 for (size_t j = 0; j < nvariables; j++)
142 {
143 tmpTrace[i][j] = Array<OneD, NekDouble>(nTracePts);
144 }
145 }
146 Array<OneD, Array<OneD, NekDouble>> FwdFluxDeriv(nvariables);
147 Array<OneD, Array<OneD, NekDouble>> BwdFluxDeriv(nvariables);
148 for (size_t j = 0; j < nvariables; j++)
149 {
150 FwdFluxDeriv[j] = Array<OneD, NekDouble>(nTracePts);
151 BwdFluxDeriv[j] = Array<OneD, NekDouble>(nTracePts);
152 }
153
154 const size_t nwspTraceDataType = nvariables + 1;
155 Array<OneD, Array<OneD, NekSingle>> wspTraceDataType(nwspTraceDataType);
156 for (size_t m = 0; m < nwspTraceDataType; m++)
157 {
158 wspTraceDataType[m] = Array<OneD, NekSingle>(nTracePts);
159 }
160
162 timer.Start();
163 PreconBlkDiag(pFields, rhs, outarray);
164 timer.Stop();
165 timer.AccumulateRegion("PreconCfsBRJ::PreconBlkDiag", 2);
166
167 for (size_t nrelax = 0; nrelax < nBRJIterTot - 1; nrelax++)
168 {
169 Vmath::Smul(ntotpnt, OmBRJParam, outarray, 1, outN, 1);
170
171 timer.Start();
172 MinusOffDiag2Rhs(pFields, nvariables, npoints, rhs2d, out_2d,
173 tmpTrace, wspTraceDataType, m_TraceJacArraySingle);
174 timer.Stop();
175 timer.AccumulateRegion("PreconCfsBRJ::MinusOffDiag2Rhs", 2);
176
177 timer.Start();
178 PreconBlkDiag(pFields, outarray, outTmp);
179 timer.Stop();
180 timer.AccumulateRegion("PreconCfsBRJ::PreconBlkDiag", 2);
181
182 Vmath::Svtvp(ntotpnt, BRJParam, outTmp, 1, outN, 1, outarray, 1);
183 }
184 }
185
187}
188
189/**
190 *
191 */
193 [[maybe_unused]] const Array<OneD, MultiRegions::ExpListSharedPtr> &pFields,
194 const Array<OneD, const Array<OneD, NekDouble>> &intmp,
195 [[maybe_unused]] const NekDouble time, const NekDouble lambda)
196{
197 if (m_PreconItsStep > 0)
198 {
199 SNekBlkMatSharedPtr PreconMatSingle;
200 using vec_t = simd<NekSingle>;
201 int nvariables = pFields.size();
202 int nelmts = pFields[0]->GetNumElmts();
203 Array<OneD, unsigned int> matdim(nelmts);
204 for (int i = 0; i < nelmts; i++)
205 {
206 matdim[i] = pFields[0]->GetExp(i)->GetNcoeffs() * nvariables;
207 }
208 AllocateNekBlkMatDig(PreconMatSingle, matdim, matdim);
209
211 intmp, m_PreconMatVarsSingle, PreconMatSingle, m_TraceJacSingle,
215
216 if (m_verbose && m_Comm->GetRank() == 0)
217 {
218 std::cout << " ## CalcuPreconMat " << std::endl;
219 }
220
221 // copy matrix to simd layout
222 // load matrix
223 int cnt = 0;
224 const auto vecwidth = vec_t::width;
225
226 alignas(vec_t::alignment) std::array<NekSingle, vec_t::width> tmp;
227
228 for (int ne = 0; ne < nelmts; ne++)
229 {
230 const auto nElmtDof = matdim[ne];
231 const auto nblocks = nElmtDof / vecwidth;
232
233 const NekSingle *mmat =
234 PreconMatSingle->GetBlockPtr(ne, ne)->GetRawPtr();
235 /// Copy array into column major blocks of vector width
236 for (int i1 = 0; i1 < nblocks; ++i1)
237 {
238 for (int j = 0; j < nElmtDof; ++j)
239 {
240 for (int i = 0; i < vecwidth; ++i)
241 {
242 tmp[i] = mmat[j + (i1 * vecwidth + i) * nElmtDof];
243 }
244 // store contiguous vec_t array.
245 m_sBlkDiagMat[cnt++].load(tmp.data());
246 }
247 }
248
249 const auto endwidth = nElmtDof - nblocks * vecwidth;
250
251 // end rows that do not fit into vector widths
252 if (endwidth)
253 {
254 for (int j = 0; j < nElmtDof; ++j)
255 {
256 for (int i = 0; i < endwidth; ++i)
257 {
258 tmp[i] = mmat[j + (nblocks * vecwidth + i) * nElmtDof];
259 }
260
261 for (int i = endwidth; i < vecwidth; ++i)
262 {
263 tmp[i] = 0.0;
264 }
265 m_sBlkDiagMat[cnt++].load(tmp.data());
266 }
267 }
268 }
269 }
270
271 m_DtLambdaPreconMat = lambda;
272 m_CalcPreconMatFlag = false;
274}
275
276/**
277 *
278 */
280 [[maybe_unused]] const Array<OneD, const NekDouble> &res,
281 const NekDouble dtLambda)
282{
283 bool flag = (m_CalcPreconMatFlag || m_DtLambdaPreconMat != dtLambda ||
285 m_CalcPreconMatFlag = flag;
286 return flag;
287}
288
289/**
290 *
291 */
293 Array<OneD, NekDouble> &pOutput,
294 [[maybe_unused]] const bool &flag)
295{
296 Vmath::Vcopy(pInput.size(), pInput, 1, pOutput, 1);
297}
298
299/**
300 *
301 */
304 const Array<OneD, NekDouble> &inarray, Array<OneD, NekDouble> &outarray)
305{
306 unsigned int nvariables = pFields.size();
307
308 int nTotElmt = pFields[0]->GetNumElmts();
309
310 using vec_t = simd<NekSingle>;
311 const auto vecwidth = vec_t::width;
312
313 // vectorized matrix multiply
314 std::vector<vec_t, tinysimd::allocator<vec_t>> Sinarray(m_max_nblocks);
315 std::vector<vec_t, tinysimd::allocator<vec_t>> Soutarray(m_max_nElmtDof);
316
317 alignas(vec_t::alignment) std::array<NekSingle, vec_t::width> tmp;
318
319 for (int ne = 0, cnt = 0, icnt = 0, icnt1 = 0; ne < nTotElmt; ne++)
320 {
321 const auto nElmtCoef = pFields[0]->GetNcoeffs(ne);
322 const auto nElmtDof = nElmtCoef * nvariables;
323 const auto nblocks = (nElmtDof % vecwidth) ? nElmtDof / vecwidth + 1
324 : nElmtDof / vecwidth;
325
326 // gather data into blocks - could probably be done with a
327 // gather call? can be replaced with a gather op when working
328 for (int j = 0; j < nblocks; ++j, icnt += vecwidth)
329 {
330 for (int i = 0; i < vecwidth; ++i)
331 {
332 tmp[i] = inarray[m_inputIdx[icnt + i]];
333 }
334
335 Sinarray[j].load(tmp.data());
336 }
337
338 // Do matrix multiply
339 // first block just needs multiplying
340 vec_t in = Sinarray[0];
341 for (int i = 0; i < nElmtDof; ++i)
342 {
343 Soutarray[i] = m_sBlkDiagMat[cnt++] * in;
344 }
345
346 // rest of blocks are multiply add operations;
347 for (int n = 1; n < nblocks; ++n)
348 {
349 in = Sinarray[n];
350 for (int i = 0; i < nElmtDof; ++i)
351 {
352 Soutarray[i].fma(m_sBlkDiagMat[cnt++], in);
353 }
354 }
355
356 // get block aligned index for this expansion
357 NekSingle val;
358 for (int i = 0; i < nElmtDof; ++i)
359 {
360 // Get hold of data
361 Soutarray[i].store(tmp.data());
362
363 // Sum vector width
364 val = tmp[0];
365 for (int j = 1; j < vecwidth; ++j)
366 {
367 val += tmp[j];
368 }
369 // put data into outarray
370 outarray[m_inputIdx[icnt1 + i]] = NekDouble(val);
371 }
372
373 icnt1 += nblocks * vecwidth;
374 }
375}
376
377/**
378 *
379 */
380template <typename DataType>
383 const size_t nvariables, const size_t nCoeffs,
384 const Array<OneD, const Array<OneD, NekDouble>> &inarray,
387 Array<OneD, Array<OneD, DataType>> &wspTraceDataType,
388 const TensorOfArray4D<DataType> &TraceJacArray)
389{
390 size_t nTracePts = pFields[0]->GetTrace()->GetNpoints();
391 size_t npoints = pFields[0]->GetNpoints();
392 size_t nDim = m_spacedim;
393
394 Array<OneD, Array<OneD, NekDouble>> outpnts(nvariables);
395 for (size_t i = 0; i < nvariables; i++)
396 {
397 outpnts[i] = Array<OneD, NekDouble>(npoints, 0.0);
398 pFields[i]->BwdTrans(outarray[i], outpnts[i]);
399 }
400
401 // Store forwards/backwards space along trace space
406 TensorOfArray3D<NekDouble> numDerivBwd(nDim);
407 TensorOfArray3D<NekDouble> numDerivFwd(nDim);
408 size_t indexwspTrace = 0;
409 Fwd = wspTrace[indexwspTrace], indexwspTrace++;
410 Bwd = wspTrace[indexwspTrace], indexwspTrace++;
411 FwdFlux = wspTrace[indexwspTrace], indexwspTrace++;
412 BwdFlux = wspTrace[indexwspTrace], indexwspTrace++;
413
415 for (size_t i = 0; i < nvariables; ++i)
416 {
417 timer.Start();
418 pFields[i]->GetFwdBwdTracePhys(outpnts[i], Fwd[i], Bwd[i]);
419 timer.Stop();
420 timer.AccumulateRegion("ExpList::GetFwdBwdTracePhys", 10);
421 }
422
423 size_t indexwspTraceDataType = 0;
424 Array<OneD, Array<OneD, DataType>> Fwdarray(nvariables);
425 for (size_t m = 0; m < nvariables; ++m)
426 {
427 Fwdarray[m] = wspTraceDataType[indexwspTraceDataType],
428 indexwspTraceDataType++;
429 }
430 Array<OneD, DataType> Fwdreslt;
431 Fwdreslt = wspTraceDataType[indexwspTraceDataType], indexwspTraceDataType++;
432
433 for (size_t m = 0; m < nvariables; ++m)
434 {
435 for (size_t i = 0; i < nTracePts; ++i)
436 {
437 Fwdarray[m][i] = DataType(Fwd[m][i]);
438 }
439 }
440 for (size_t m = 0; m < nvariables; ++m)
441 {
442 Vmath::Zero(nTracePts, Fwdreslt, 1);
443 for (size_t n = 0; n < nvariables; ++n)
444 {
445 for (size_t p = 0; p < nTracePts; ++p)
446 {
447 Fwdreslt[p] += TraceJacArray[0][m][n][p] * Fwdarray[n][p];
448 }
449 }
450 for (size_t i = 0; i < nTracePts; ++i)
451 {
452 FwdFlux[m][i] = NekDouble(Fwdreslt[i]);
453 }
454 }
455
456 for (size_t m = 0; m < nvariables; ++m)
457 {
458 for (size_t i = 0; i < nTracePts; ++i)
459 {
460 Fwdarray[m][i] = DataType(Bwd[m][i]);
461 }
462 }
463 for (size_t m = 0; m < nvariables; ++m)
464 {
465 Vmath::Zero(nTracePts, Fwdreslt, 1);
466 for (size_t n = 0; n < nvariables; ++n)
467 {
468 for (size_t p = 0; p < nTracePts; ++p)
469 {
470 Fwdreslt[p] += TraceJacArray[1][m][n][p] * Fwdarray[n][p];
471 }
472 }
473 for (size_t i = 0; i < nTracePts; ++i)
474 {
475 BwdFlux[m][i] = NekDouble(Fwdreslt[i]);
476 }
477 }
478
479 for (size_t i = 0; i < nvariables; ++i)
480 {
481 Vmath::Fill(nCoeffs, 0.0, outarray[i], 1);
482 timer.Start();
483 pFields[i]->AddTraceIntegralToOffDiag(FwdFlux[i], BwdFlux[i],
484 outarray[i]);
485 timer.Stop();
486 timer.AccumulateRegion("ExpList::AddTraceIntegralToOffDiag", 10);
487 }
488
489 for (size_t i = 0; i < nvariables; ++i)
490 {
491 for (size_t p = 0; p < nCoeffs; ++p)
492 {
493 outarray[i][p] =
494 -m_DtLambdaPreconMat * outarray[i][p] + inarray[i][p];
495 }
496 }
497}
498
499/**
500 *
501 */
502template <typename TypeNekBlkMatSharedPtr>
506 const int &nscale)
507{
508
509 size_t nvars = pFields.size();
510 size_t nelmts = pFields[0]->GetNumElmts();
511 size_t nelmtcoef;
512 Array<OneD, unsigned int> nelmtmatdim(nelmts);
513 for (size_t i = 0; i < nelmts; i++)
514 {
515 nelmtcoef = pFields[0]->GetExp(i)->GetNcoeffs();
516 nelmtmatdim[i] = nelmtcoef * nscale;
517 }
518
519 for (size_t i = 0; i < nvars; i++)
520 {
521 for (size_t j = 0; j < nvars; j++)
522 {
523 AllocateNekBlkMatDig(gmtxarray[i][j], nelmtmatdim, nelmtmatdim);
524 }
525 }
526}
527
528} // namespace Nektar
#define ASSERTL0(condition, msg)
Definition: ErrorUtil.hpp:208
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition: Timer.cpp:70
void DoCalcPreconMatBRJCoeff(const Array< OneD, const Array< OneD, NekDouble > > &inarray, Array< OneD, Array< OneD, SNekBlkMatSharedPtr > > &gmtxarray, SNekBlkMatSharedPtr &gmtVar, Array< OneD, SNekBlkMatSharedPtr > &TraceJac, Array< OneD, SNekBlkMatSharedPtr > &TraceJacDeriv, Array< OneD, Array< OneD, NekSingle > > &TraceJacDerivSign, TensorOfArray4D< NekSingle > &TraceJacArray, TensorOfArray4D< NekSingle > &TraceJacDerivArray, TensorOfArray5D< NekSingle > &TraceIPSymJacArray)
Definition: PreconCfsOp.h:97
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacSingle
Definition: PreconCfsBRJ.h:85
void v_BuildPreconCfs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, const Array< OneD, NekDouble > > &intmp, const NekDouble time, const NekDouble lambda) override
void v_DoPreconCfs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const bool &flag) override
TensorOfArray4D< NekSingle > m_TraceJacArraySingle
Definition: PreconCfsBRJ.h:86
void MinusOffDiag2Rhs(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const size_t nvariables, const size_t nCoeffs, const Array< OneD, const Array< OneD, NekDouble > > &inarray, Array< OneD, Array< OneD, NekDouble > > &outarray, TensorOfArray3D< NekDouble > &wspTrace, Array< OneD, Array< OneD, DataType > > &wspTraceDataType, const TensorOfArray4D< DataType > &TraceJacArray)
Array< OneD, Array< OneD, SNekBlkMatSharedPtr > > m_PreconMatVarsSingle
Definition: PreconCfsBRJ.h:77
static std::string className
Name of the class.
Definition: PreconCfsBRJ.h:66
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacDerivSingle
Definition: PreconCfsBRJ.h:87
unsigned int m_max_nblocks
Definition: PreconCfsBRJ.h:79
Array< OneD, Array< OneD, NekSingle > > m_TraceJacDerivSignSingle
Definition: PreconCfsBRJ.h:89
static PreconCfsSharedPtr create(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vComm)
Creates an instance of this class.
Definition: PreconCfsBRJ.h:55
void v_InitObject() override
TensorOfArray4D< NekSingle > m_TraceJacDerivArraySingle
Definition: PreconCfsBRJ.h:88
PreconCfsBRJ(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vComm)
unsigned int m_max_nElmtDof
Definition: PreconCfsBRJ.h:80
void AllocatePreconBlkDiagCoeff(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, Array< OneD, Array< OneD, TypeNekBlkMatSharedPtr > > &gmtxarray, const int &nscale=1)
TensorOfArray5D< NekSingle > m_TraceIPSymJacArraySingle
Definition: PreconCfsBRJ.h:90
std::vector< int > m_inputIdx
Definition: PreconCfsBRJ.h:83
void AllocateNekBlkMatDig(SNekBlkMatSharedPtr &mat, const Array< OneD, unsigned int > nrow, const Array< OneD, unsigned int > ncol)
Definition: PreconCfsBRJ.h:236
void AllocateSIMDPreconBlkMatDiag(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields)
This function creates the matrix structure for the block diagonal operator. It organizes the way that...
Definition: PreconCfsBRJ.h:138
void PreconBlkDiag(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const Array< OneD, NekDouble > &inarray, Array< OneD, NekDouble > &outarray)
bool v_UpdatePreconMatCheck(const Array< OneD, const NekDouble > &res, const NekDouble dtLambda) override
std::vector< simd< NekSingle >, tinysimd::allocator< simd< NekSingle > > > m_sBlkDiagMat
Definition: PreconCfsBRJ.h:82
void DoNullPrecon(const Array< OneD, NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const bool &flag)
NekPreconCfsOperators m_operator
Definition: PreconCfs.h:104
int m_PreconTimesCounter
Definition: PreconCfs.h:106
int m_PreconMatFreezNumb
Definition: PreconCfs.h:105
NekDouble m_DtLambdaPreconMat
Definition: PreconCfs.h:107
bool m_CalcPreconMatFlag
Definition: PreconCfs.h:108
LibUtilities::CommSharedPtr m_Comm
Definition: PreconCfs.h:101
std::shared_ptr< SessionReader > SessionReaderSharedPtr
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:55
PreconCfsFactory & GetPreconCfsFactory()
Declaration of the boundary condition factory singleton.
Definition: PreconCfs.cpp:41
std::shared_ptr< SNekBlkMat > SNekBlkMatSharedPtr
tinysimd::simd< NekDouble > vec_t
double NekDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition: Vmath.hpp:396
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.hpp:273
void Fill(int n, const T alpha, T *x, const int incx)
Fill a vector with a constant value.
Definition: Vmath.hpp:54
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.hpp:825
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80