Nektar++
NekLinSysIterGMRESLoc.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: NekLinSysIterGMRESLoc.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: NekLinSysIterGMRESLoc definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39using namespace std;
40
42{
43/**
44 * @class NekLinSysIterGMRESLoc
45 *
46 * Solves a linear system using iterative methods using local storage rather
47 * than global
48 */
52 "NekLinSysIterGMRES local storage solver.");
53
56 const LibUtilities::CommSharedPtr &vRowComm, const int nDimen,
57 const NekSysKey &pKey)
58 : NekLinSysIter(pSession, vRowComm, nDimen, pKey)
59{
62
64
67
70
71 m_isLocal = true;
72
73 // Allocate array storage of coefficients
74 // Hessenburg matrix
76 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
77 {
79 }
80 // Hesseburg matrix after rotation
82 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
83 {
85 }
86 // Total search directions
88}
89
91{
93}
94
95/**
96 *
97 */
99 const int nLocal, const Array<OneD, const NekDouble> &pInput,
100 Array<OneD, NekDouble> &pOutput, [[maybe_unused]] const int nDir,
101 const NekDouble tol, [[maybe_unused]] const NekDouble factor)
102{
103 m_tolerance = max(tol, 1.0E-15);
104 int niterations = DoGMRES(nLocal, pInput, pOutput);
105
106 return niterations;
107}
108
109/**  
110 * Solve a global linear system using the Gmres 
111 * We solve only for the non-Dirichlet modes. The operator is evaluated  
112 * using an auxiliary function v_DoMatrixMultiply defined by the  
113 * specific solver. Distributed math routines are used to support  
114 * parallel execution of the solver.  
115 *  
116 * @param pInput Input residual in local format.
117 * @param pOutput Solution vector in local format but with continuous
118 * values
119 */
120
122 const Array<OneD, const NekDouble> &pInput,
123 Array<OneD, NekDouble> &pOutput)
124{
127 {
128 Set_Rhs_Magnitude(pInput);
129 }
130
131 NekDouble eps = 0.0;
133
135 m_converged = false;
136
137 bool restarted = false;
138 bool truncted = false;
139
141 {
142 truncted = true;
143 }
144
145 for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
146 {
147 eps = DoGmresRestart(restarted, truncted, nLocal, pInput, pOutput);
148
149 if (m_converged)
150 {
151 break;
152 }
153 restarted = true;
154 }
155
156 if (m_verbose)
157 {
158 Array<OneD, NekDouble> r0(nLocal);
159 Array<OneD, NekDouble> wk(nLocal);
160
161 // calculate difference in residual of solution
163
164 // Note this is finding the difference between the whole
165 // residual not jsut the non-Dirichlet values.
166 // Probably OK since just an monitoring output?
167 Vmath::Svtvp(nLocal, -1.0, r0, 1, pInput, 1, r0, 1);
168
169 m_operator.DoAssembleLoc(r0, wk, true);
170 NekDouble vExchange = Vmath::Dot(nLocal, wk, r0);
171
172 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
173 NekDouble eps1 = vExchange;
174
175 if (m_root)
176 {
177 int nwidthcolm = 13;
178
179 cout << std::scientific << std::setw(nwidthcolm)
180 << std::setprecision(nwidthcolm - 8)
181 << " GMRES iterations made = " << m_totalIterations
182 << " using tolerance of " << m_tolerance
183 << " (error = " << sqrt(eps * m_prec_factor / m_rhs_magnitude)
184 << ")";
185
186 cout << " WITH (GMRES eps = " << eps << " REAL eps= " << eps1
187 << ")";
188
189 if (m_converged)
190 {
191 cout << " CONVERGED" << endl;
192 }
193 else
194 {
195 cout << " WARNING: Exceeded maxIt" << endl;
196 }
197 }
198 }
199
200 if (m_FlagWarnings)
201 {
202 WARNINGL1(m_converged, "GMRES did not converge.");
203 }
204 return m_totalIterations;
205}
206
208 const bool restarted, const bool truncted, const int nLocal,
210{
211 // Allocate array storage of coefficients
212 // Residual
214 // Givens rotation c
216 // Givens rotation s
218 // Total coefficients, just for check
220 // Search direction order
224 // Temporary variables
225 int idtem;
226 int starttem;
227 int endtem;
228
229 NekDouble eps;
230 NekDouble beta, alpha;
231 NekDouble vExchange = 0;
232 // Temporary Array
233 Array<OneD, NekDouble> w(nLocal, 0.0);
234 Array<OneD, NekDouble> wk(nLocal, 0.0);
235 Array<OneD, NekDouble> r0(nLocal, 0.0);
240
241 if (restarted)
242 {
243 // This is A*x
245
246 // The first search direction
247 beta = -1.0;
248
249 // This is r0 = b-A*x
250 Vmath::Svtvp(nLocal, beta, r0, 1, pInput, 1, r0, 1);
251 }
252 else
253 {
254 // If not restarted, x0 should be zero
255 Vmath::Vcopy(nLocal, pInput, 1, r0, 1);
256 }
257
259 {
261 }
262
263 // Norm of (r0)
264 // The m_map tells how to connect
265 m_operator.DoAssembleLoc(r0, wk, true);
266 vExchange = Vmath::Dot(nLocal, wk, r0);
267 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
268 eps = vExchange;
269
270 if (!restarted)
271 {
273 {
275 {
276 // Evaluate initial residual error for exit check
277 ASSERTL0(false, "Need to set up/debugging");
278
279 m_operator.DoAssembleLoc(pInput, wk, true);
280 vExchange = Vmath::Dot(nLocal, wk, pInput);
281 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
282 m_prec_factor = vExchange / eps;
283 }
284 else
285 {
286 m_prec_factor = 1.0;
287 }
288 }
289 }
290
291 Vmath::Smul(nLocal, sqrt(m_prec_factor), r0, 1, r0, 1);
292 eps = eps * m_prec_factor;
293 eta[0] = sqrt(eps);
294
295 // Give an order for the entries in Hessenburg matrix
296 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
297 {
298 id[nd] = nd;
299 id_end[nd] = nd + 1;
300 starttem = id_end[nd] - m_KrylovMaxHessMatBand;
301 if (truncted && (starttem) > 0)
302 {
303 id_start[nd] = starttem;
304 }
305 else
306 {
307 id_start[nd] = 0;
308 }
309 }
310
311 // Normlized by r0 norm V(:,1)=r0/norm(r0)
312 alpha = 1.0 / eta[0];
313
314 // Scalar multiplication
315 if (m_V_total[0].size() == 0)
316 {
317 m_V_total[0] = Array<OneD, NekDouble>(nLocal, 0.0);
318 }
319 Vmath::Smul(nLocal, alpha, r0, 1, m_V_total[0], 1);
320
321 // restarted Gmres(m) process
323 {
324 V1 = Array<OneD, NekDouble>(nLocal, 0.0);
325 }
326
327 int nswp = 0;
328 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
329 {
330 if (m_V_total[nd + 1].size() == 0)
331 {
332 m_V_total[nd + 1] = Array<OneD, NekDouble>(nLocal, 0.0);
333 }
334 Vmath::Zero(nLocal, m_V_total[nd + 1], 1);
336 V2 = m_V_total[nd + 1];
337 h1 = m_hes[nd];
338
340 {
341 m_operator.DoNekSysPrecon(m_V_total[nd], V1, true);
342 }
343 else
344 {
345 V1 = m_V_total[nd];
346 }
347
348 // w here is no need to add nDir due to temporary Array
349 idtem = id[nd];
350 starttem = id_start[idtem];
351 endtem = id_end[idtem];
352
353 DoArnoldi(starttem, endtem, nLocal, w, wk, V1, V2, h1);
354
355 if (starttem > 0)
356 {
357 starttem = starttem - 1;
358 }
359
360 h2 = m_Upper[nd];
361 Vmath::Vcopy(m_LinSysMaxStorage + 1, &h1[0], 1, &h2[0], 1);
362 DoGivensRotation(starttem, endtem, cs, sn, h2, eta);
363
364 eps = eta[nd + 1] * eta[nd + 1];
365
366 // This Gmres merge truncted Gmres to accelerate.
367 // If truncted, cannot jump out because
368 // the last term of eta is not residual
369 if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
370 {
371 if ((eps <
372 m_tolerance * m_tolerance * m_rhs_magnitude)) //&& nd > 0)
373 {
374 m_converged = true;
375 }
376 }
377 nswp++;
379
380 if (m_converged)
381 {
382 break;
383 }
384 }
385
386 DoBackward(nswp, m_Upper, eta, y_total);
387
388 // calculate output y_total*V_total
389 Array<OneD, NekDouble> solution(nLocal, 0.0);
390 for (int i = 0; i < nswp; ++i)
391 {
392 beta = y_total[i];
393 Vmath::Svtvp(nLocal, beta, m_V_total[i], 1, solution, 1, solution, 1);
394 }
395
397 {
398 m_operator.DoNekSysPrecon(solution, solution, true);
399 }
400
401 // Update output.
402 Vmath::Vadd(nLocal, solution, 1, pOutput, 1, pOutput, 1);
403
404 return eps;
405}
406
407// Arnoldi Subroutine
408void NekLinSysIterGMRESLoc::DoArnoldi(const int starttem, const int endtem,
409 const int nLocal,
415{
416 NekDouble alpha, beta, vExchange = 0.0;
418 timer.Start();
420 timer.Stop();
421 timer.AccumulateRegion("NekSysOperators::DoNekSysLhsEval", 10);
422
424 {
426 }
427
428 Vmath::Smul(nLocal, sqrt(m_prec_factor), w, 1, w, 1);
429
430 // Modified Gram-Schmidt
431 for (int i = starttem; i < endtem; ++i)
432 {
433 // Do inner product on equivalent of global values excluding
434 // Diriclet conditions. To do this need to assmble (and
435 // scatter back vector and then zero Dirichlet conditions.
436 m_operator.DoAssembleLoc(m_V_total[i], wk, true);
437 vExchange = Vmath::Dot(nLocal, wk, w);
438 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
439
440 h[i] = vExchange;
441
442 beta = -1.0 * vExchange;
443 Vmath::Svtvp(nLocal, beta, m_V_total[i], 1, w, 1, w, 1);
444 }
445 // end of Modified Gram-Schmidt
446
447 // calculate the L2 norm and normalize
448 m_operator.DoAssembleLoc(w, wk, true);
449 vExchange = Vmath::Dot(nLocal, wk, w);
450 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
451
452 h[endtem] = sqrt(vExchange);
453
454 alpha = 1.0 / h[endtem];
455 Vmath::Smul(nLocal, alpha, w, 1, V2, 1);
456}
457
458// QR factorization through Givens rotation -> Put into a helper class
460 const int endtem,
465{
466 NekDouble temp_dbl;
467 NekDouble dd;
468 NekDouble hh;
469 int idtem = endtem - 1;
470 // The starttem and endtem are beginning and ending order of Givens rotation
471 // They usually equal to the beginning position and ending position of
472 // Hessenburg matrix But sometimes starttem will change, like if it is
473 // initial 0 and becomes nonzero because previous Givens rotation See Yu
474 // Pan's User Guide
475 for (int i = starttem; i < idtem; ++i)
476 {
477 temp_dbl = c[i] * h[i] - s[i] * h[i + 1];
478 h[i + 1] = s[i] * h[i] + c[i] * h[i + 1];
479 h[i] = temp_dbl;
480 }
481 dd = h[idtem];
482 hh = h[endtem];
483 if (hh == 0.0)
484 {
485 c[idtem] = 1.0;
486 s[idtem] = 0.0;
487 }
488 else if (abs(hh) > abs(dd))
489 {
490 temp_dbl = -dd / hh;
491 s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
492 c[idtem] = temp_dbl * s[idtem];
493 }
494 else
495 {
496 temp_dbl = -hh / dd;
497 c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
498 s[idtem] = temp_dbl * c[idtem];
499 }
500
501 h[idtem] = c[idtem] * h[idtem] - s[idtem] * h[endtem];
502 h[endtem] = 0.0;
503
504 temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
505 eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
506 eta[idtem] = temp_dbl;
507}
508
509// Backward calculation
510// To notice, Hesssenburg matrix's column
511// and row changes due to use Array<OneD,Array<OneD,NekDouble>> --> Put into a
512// helper class
517{
518 // Number is the entry number
519 // but C++'s order need to be one smaller
520 int maxid = number - 1;
521 NekDouble sum;
522 y[maxid] = b[maxid] / A[maxid][maxid];
523 for (int i = maxid - 1; i > -1; --i)
524 {
525 sum = b[i];
526 for (int j = i + 1; j < number; ++j)
527 {
528 // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
529 sum -= y[j] * A[j][i];
530 }
531 y[i] = sum / A[i][i];
532 }
533}
534} // namespace Nektar::LibUtilities
#define WARNINGL1(condition, msg)
Definition: ErrorUtil.hpp:243
#define ASSERTL0(condition, msg)
Definition: ErrorUtil.hpp:208
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
Definition: NekFactory.hpp:197
static NekLinSysIterSharedPtr create(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey)
Array< OneD, Array< OneD, NekDouble > > m_V_total
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nLocal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput)
Actual iterative gmres solver for one restart.
void DoArnoldi(const int starttem, const int endtem, const int nLocal, Array< OneD, NekDouble > &w, Array< OneD, NekDouble > &wk, Array< OneD, NekDouble > &V1, Array< OneD, NekDouble > &V2, Array< OneD, NekDouble > &h)
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble > > &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
void DoGivensRotation(const int starttem, const int endtem, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &h, Array< OneD, NekDouble > &eta)
int v_SolveSystem(const int nLocal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir, const NekDouble tol, const NekDouble factor) override
NekLinSysIterGMRESLoc(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey=NekSysKey())
Array< OneD, Array< OneD, NekDouble > > m_hes
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput)
Actual iterative solve-GMRES.
Array< OneD, Array< OneD, NekDouble > > m_Upper
NekDouble m_rhs_magnitude
Dot product of rhs to normalise stopping criterion.
void Set_Rhs_Magnitude(const NekVector< NekDouble > &pIn)
bool m_root
Root if parallel.
Definition: NekSys.h:297
LibUtilities::CommSharedPtr m_rowComm
Communicate.
Definition: NekSys.h:293
bool m_verbose
Verbose.
Definition: NekSys.h:299
NekDouble m_tolerance
Tolerance of iterative solver.
Definition: NekSys.h:291
NekSysOperators m_operator
Operators.
Definition: NekSys.h:302
bool m_converged
Whether the iteration has been converged.
Definition: NekSys.h:295
int m_maxiter
Maximum iterations.
Definition: NekSys.h:289
void DoAssembleLoc(InArrayType &xn, OutArrayType &xn1, const bool &flag=false) const
Definition: NekSys.h:161
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:148
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:141
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition: Timer.cpp:70
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
@ beta
Gauss Radau pinned at x=-1,.
Definition: PointsType.h:59
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:55
static const NekDouble kNekUnsetDouble
std::vector< double > w(NPUPPER)
double NekDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition: Vmath.hpp:396
T Dot(int n, const T *w, const T *x)
dot product
Definition: Vmath.hpp:761
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition: Vmath.hpp:180
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.hpp:825
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:294