Nektar++
NekLinSysIterGMRES.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: NekLinSysIterGMRES.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: NekLinSysIterGMRES definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39using namespace std;
40
42{
43/**
44 * @class NekLinSysIterGMRES
45 *
46 * Solves a linear system using iterative methods.
47 */
50 "GMRES", NekLinSysIterGMRES::create, "NekLinSysIterGMRES solver.");
51
54 const LibUtilities::CommSharedPtr &vRowComm, const int nDimen,
55 const NekSysKey &pKey)
56 : NekLinSysIter(pSession, vRowComm, nDimen, pKey)
57{
60
62
65
68
69 // Allocate array storage of coefficients
70 // Hessenburg matrix
72 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
73 {
75 }
76 // Hesseburg matrix after rotation
78 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
79 {
81 }
82 // Total search directions
84}
85
87{
89}
90
91/**
92 *
93 */
95 const int nGlobal, const Array<OneD, const NekDouble> &pInput,
96 Array<OneD, NekDouble> &pOutput, const int nDir, const NekDouble tol,
97 [[maybe_unused]] const NekDouble factor)
98{
99 m_tolerance = max(tol, 1.0E-15);
100 int niterations = DoGMRES(nGlobal, pInput, pOutput, nDir);
101
102 return niterations;
103}
104
105/**  
106 * Solve a global linear system using the Gmres 
107 * We solve only for the non-Dirichlet modes. The operator is evaluated  
108 * using an auxiliary function v_DoMatrixMultiply defined by the  
109 * specific solver. Distributed math routines are used to support  
110 * parallel execution of the solver.  
111 *  
112 * @param pInput Input residual of all DOFs.  
113 * @param pOutput Solution vector of all DOFs.  
114 */
115
116int NekLinSysIterGMRES::DoGMRES(const int nGlobal,
117 const Array<OneD, const NekDouble> &pInput,
118 Array<OneD, NekDouble> &pOutput, const int nDir)
119{
121
123 {
124 NekVector<NekDouble> inGlob(nGlobal, pInput, eWrapper);
125 Set_Rhs_Magnitude(inGlob);
126 }
127
128 // Get vector sizes
129 NekDouble eps = 0.0;
130 int nNonDir = nGlobal - nDir;
131
133
134 // zero homogeneous out array ready for solution updates
135 // Should not be earlier in case input vector is same as
136 // output and above copy has been peformed
137 Vmath::Zero(nNonDir, tmp = pOutput + nDir, 1);
138
140 m_converged = false;
141
142 bool restarted = false;
143 bool truncted = false;
144
146 {
147 truncted = true;
148 }
149
150 for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
151 {
152 eps =
153 DoGmresRestart(restarted, truncted, nGlobal, pInput, pOutput, nDir);
154
155 if (m_converged)
156 {
157 break;
158 }
159 restarted = true;
160 }
161
162 if (m_verbose)
163 {
164 Array<OneD, NekDouble> r0(nGlobal, 0.0);
166 Vmath::Svtvp(nNonDir, -1.0, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
167 &r0[0] + nDir, 1);
168 NekDouble vExchange = Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir,
169 &m_map[0] + nDir);
170 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
171 NekDouble eps1 = vExchange;
172
173 if (m_root)
174 {
175 int nwidthcolm = 13;
176
177 cout << std::scientific << std::setw(nwidthcolm)
178 << std::setprecision(nwidthcolm - 8)
179 << " GMRES iterations made = " << m_totalIterations
180 << " using tolerance of " << m_tolerance
181 << " (error = " << sqrt(eps * m_prec_factor / m_rhs_magnitude)
182 << ")";
183
184 cout << " WITH (GMRES eps = " << eps << " REAL eps= " << eps1
185 << ")";
186
187 if (m_converged)
188 {
189 cout << " CONVERGED" << endl;
190 }
191 else
192 {
193 cout << " WARNING: Exceeded maxIt" << endl;
194 }
195 }
196 }
197
198 if (m_FlagWarnings)
199 {
200 WARNINGL1(m_converged, "GMRES did not converge.");
201 }
202 return m_totalIterations;
203}
204
206 const bool restarted, const bool truncted, const int nGlobal,
208 const int nDir)
209{
210 int nNonDir = nGlobal - nDir;
211
212 // Allocate array storage of coefficients
213 // Residual
215 // Givens rotation c
217 // Givens rotation s
219 // Total coefficients, just for check
221 // Search direction order
225 // Temporary variables
226 int idtem;
227 int starttem;
228 int endtem;
229
230 NekDouble eps;
231 NekDouble beta, alpha;
232 NekDouble vExchange = 0;
233 // Temporary Array
234 Array<OneD, NekDouble> w(nGlobal, 0.0);
235 Array<OneD, NekDouble> wk(nGlobal, 0.0);
236 Array<OneD, NekDouble> r0(nGlobal, 0.0);
238 Array<OneD, NekDouble> Vsingle1;
239 Array<OneD, NekDouble> Vsingle2;
240 Array<OneD, NekDouble> hsingle1;
241 Array<OneD, NekDouble> hsingle2;
242
243 if (restarted)
244 {
245 // This is A*x
247
248 // The first search direction
249 beta = -1.0;
250
251 // This is r0 = b-A*x
252 Vmath::Svtvp(nNonDir, beta, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
253 &r0[0] + nDir, 1);
254 }
255 else
256 {
257 // This is r0 = b, x = x0 assumed to be zero
258 Vmath::Vcopy(nNonDir, &pInput[0] + nDir, 1, &r0[0] + nDir, 1);
259 }
260
262 {
263 m_operator.DoNekSysPrecon(r0 + nDir, tmp = r0 + nDir);
264 }
265
266 // Norm of (r0)
267 // The m_map tells how to connect
268 vExchange =
269 Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir, &m_map[0] + nDir);
270 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
271 eps = vExchange;
272
273 // Detect zero input array
274 // Causes Arnoldi to breakdown, hence stop here
276 {
277 m_converged = true;
279 {
280 m_prec_factor = 1.0;
281 }
282 return eps;
283 }
284
285 if (!restarted)
286 {
289 {
290 // Evaluate initial residual error for exit check
291 vExchange = Vmath::Dot2(nNonDir, &pInput[0] + nDir,
292 &pInput[0] + nDir, &m_map[0] + nDir);
293 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
294 m_prec_factor = vExchange / eps;
295 }
296 else
297 {
298 m_prec_factor = 1.0;
299 }
300 }
301
302 Vmath::Smul(nNonDir, sqrt(m_prec_factor), r0 + nDir, 1, tmp = r0 + nDir, 1);
303 eps = eps * m_prec_factor;
304 eta[0] = sqrt(eps);
305
306 // Give an order for the entries in Hessenburg matrix
307 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
308 {
309 id[nd] = nd;
310 id_end[nd] = nd + 1;
311 starttem = id_end[nd] - m_KrylovMaxHessMatBand;
312 if (truncted && (starttem) > 0)
313 {
314 id_start[nd] = starttem;
315 }
316 else
317 {
318 id_start[nd] = 0;
319 }
320 }
321
322 // Normlized by r0 norm V(:,1)=r0/norm(r0)
323 alpha = 1.0 / eta[0];
324
325 // Scalar multiplication
326 if (m_V_total[0].size() == 0)
327 {
328 m_V_total[0] = Array<OneD, NekDouble>(nGlobal, 0.0);
329 }
330 Vmath::Smul(nNonDir, alpha, &r0[0] + nDir, 1, &m_V_total[0][0] + nDir, 1);
331
332 // Restarted Gmres(m) process
334 {
335 Vsingle1 = Array<OneD, NekDouble>(nGlobal, 0.0);
336 }
337
338 int nswp = 0;
339 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
340 {
341 if (m_V_total[nd + 1].size() == 0)
342 {
343 m_V_total[nd + 1] = Array<OneD, NekDouble>(nGlobal, 0.0);
344 }
345 Vmath::Zero(nGlobal, m_V_total[nd + 1], 1);
347 Vsingle2 = m_V_total[nd + 1];
348 hsingle1 = m_hes[nd];
349
351 {
353 tmp = Vsingle1 + nDir);
354 }
355 else
356 {
357 Vsingle1 = m_V_total[nd];
358 }
359
360 // w here is no need to add nDir due to temporary Array
361 idtem = id[nd];
362 starttem = id_start[idtem];
363 endtem = id_end[idtem];
364
365 DoArnoldi(starttem, endtem, nGlobal, nDir, w, Vsingle1, Vsingle2,
366 hsingle1);
367
368 if (starttem > 0)
369 {
370 starttem = starttem - 1;
371 }
372
373 hsingle2 = m_Upper[nd];
374 Vmath::Vcopy(m_LinSysMaxStorage + 1, &hsingle1[0], 1, &hsingle2[0], 1);
375 DoGivensRotation(starttem, endtem, nGlobal, nDir, cs, sn, hsingle2,
376 eta);
377
378 eps = eta[nd + 1] * eta[nd + 1];
379
380 // This Gmres merge truncted Gmres to accelerate.
381 // If truncted, cannot jump out because
382 // the last term of eta is not residual
383 if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
384 {
385 if ((eps < m_tolerance * m_tolerance * m_rhs_magnitude) && nd > 0)
386 {
387 m_converged = true;
388 }
389 }
390 nswp++;
392
393 if (m_converged)
394 {
395 break;
396 }
397 }
398
399 DoBackward(nswp, m_Upper, eta, y_total);
400
401 // Calculate output V_total * y_total.
402 Array<OneD, NekDouble> solution(nNonDir, 0.0);
403 for (int i = 0; i < nswp; ++i)
404 {
405 Vmath::Svtvp(nNonDir, y_total[i], &m_V_total[i][0] + nDir, 1,
406 solution.get(), 1, solution.get(), 1);
407 }
408
410 {
411 m_operator.DoNekSysPrecon(solution, solution);
412 }
413
414 // Update output.
415 Vmath::Vadd(nNonDir, solution.get(), 1, &pOutput[0] + nDir, 1,
416 &pOutput[0] + nDir, 1);
417
418 return eps;
419}
420
421// Arnoldi Subroutine
422void NekLinSysIterGMRES::DoArnoldi(const int starttem, const int endtem,
423 const int nGlobal, const int nDir,
425 // V[nd]
426 Array<OneD, NekDouble> &Vsingle1,
427 // V[nd+1]
428 Array<OneD, NekDouble> &Vsingle2,
429 // h
430 Array<OneD, NekDouble> &hsingle)
431{
432 NekDouble alpha, beta, vExchange = 0.0;
434 int nNonDir = nGlobal - nDir;
436 timer.Start();
438 timer.Stop();
439 timer.AccumulateRegion("NekSysOperators::DoNekSysLhsEval", 10);
440
442 {
443 m_operator.DoNekSysPrecon(w + nDir, tmp = w + nDir);
444 }
445
446 Vmath::Smul(nNonDir, sqrt(m_prec_factor), w + nDir, 1, tmp = w + nDir, 1);
447
448 // Modified Gram-Schmidt
449 for (int i = starttem; i < endtem; ++i)
450 {
451 vExchange = Vmath::Dot2(nNonDir, &w[0] + nDir, &m_V_total[i][0] + nDir,
452 &m_map[0] + nDir);
453 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
454
455 hsingle[i] = vExchange;
456
457 beta = -1.0 * vExchange;
458 Vmath::Svtvp(nNonDir, beta, &m_V_total[i][0] + nDir, 1, &w[0] + nDir, 1,
459 &w[0] + nDir, 1);
460 }
461 // end of Modified Gram-Schmidt
462
463 // calculate the L2 norm and normalize
464 vExchange =
465 Vmath::Dot2(nNonDir, &w[0] + nDir, &w[0] + nDir, &m_map[0] + nDir);
466 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
467
468 hsingle[endtem] = sqrt(vExchange);
469
470 alpha = 1.0 / hsingle[endtem];
471 Vmath::Smul(nNonDir, alpha, &w[0] + nDir, 1, &Vsingle2[0] + nDir, 1);
472}
473
474// QR factorization through Givens rotation
475void NekLinSysIterGMRES::DoGivensRotation(const int starttem, const int endtem,
476 [[maybe_unused]] const int nGlobal,
477 [[maybe_unused]] const int nDir,
480 Array<OneD, NekDouble> &hsingle,
482{
483 NekDouble temp_dbl;
484 NekDouble dd;
485 NekDouble hh;
486 int idtem = endtem - 1;
487 // The starttem and endtem are beginning and ending order of Givens rotation
488 // They usually equal to the beginning position and ending position of
489 // Hessenburg matrix But sometimes starttem will change, like if it is
490 // initial 0 and becomes nonzero because previous Givens rotation See Yu
491 // Pan's User Guide
492 for (int i = starttem; i < idtem; ++i)
493 {
494 temp_dbl = c[i] * hsingle[i] - s[i] * hsingle[i + 1];
495 hsingle[i + 1] = s[i] * hsingle[i] + c[i] * hsingle[i + 1];
496 hsingle[i] = temp_dbl;
497 }
498 dd = hsingle[idtem];
499 hh = hsingle[endtem];
500 if (hh == 0.0)
501 {
502 c[idtem] = 1.0;
503 s[idtem] = 0.0;
504 }
505 else if (abs(hh) > abs(dd))
506 {
507 temp_dbl = -dd / hh;
508 s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
509 c[idtem] = temp_dbl * s[idtem];
510 }
511 else
512 {
513 temp_dbl = -hh / dd;
514 c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
515 s[idtem] = temp_dbl * c[idtem];
516 }
517
518 hsingle[idtem] = c[idtem] * hsingle[idtem] - s[idtem] * hsingle[endtem];
519 hsingle[endtem] = 0.0;
520
521 temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
522 eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
523 eta[idtem] = temp_dbl;
524}
525
526// Backward calculation
527// To notice, Hesssenburg matrix's column
528// and row changes due to use Array<OneD,Array<OneD,NekDouble>>
529void NekLinSysIterGMRES::DoBackward(const int number,
533{
534 // Number is the entry number
535 // but C++'s order need to be one smaller
536 int maxid = number - 1;
537 NekDouble sum;
538 y[maxid] = b[maxid] / A[maxid][maxid];
539 for (int i = maxid - 1; i > -1; --i)
540 {
541 sum = b[i];
542 for (int j = i + 1; j < number; ++j)
543 {
544 // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
545 sum -= y[j] * A[j][i];
546 }
547 y[i] = sum / A[i][i];
548 }
549}
550} // namespace Nektar::LibUtilities
#define WARNINGL1(condition, msg)
Definition: ErrorUtil.hpp:243
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
Definition: NekFactory.hpp:197
NekLinSysIterGMRES(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey=NekSysKey())
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int pNumDir)
Actual iterative solve-GMRES.
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble > > &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
Array< OneD, Array< OneD, NekDouble > > m_V_total
Array< OneD, Array< OneD, NekDouble > > m_Upper
int v_SolveSystem(const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir, const NekDouble tol, const NekDouble factor) override
Array< OneD, Array< OneD, NekDouble > > m_hes
void DoGivensRotation(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &hsingle, Array< OneD, NekDouble > &eta)
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir)
Actual iterative gmres solver for one restart.
void DoArnoldi(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &w, Array< OneD, NekDouble > &Vsingle1, Array< OneD, NekDouble > &Vsingle2, Array< OneD, NekDouble > &hsingle)
static NekLinSysIterSharedPtr create(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey)
NekDouble m_rhs_magnitude
Dot product of rhs to normalise stopping criterion.
void Set_Rhs_Magnitude(const NekVector< NekDouble > &pIn)
Array< OneD, int > m_map
Global to universal unique map.
bool m_root
Root if parallel.
Definition: NekSys.h:297
LibUtilities::CommSharedPtr m_rowComm
Communicate.
Definition: NekSys.h:293
bool m_verbose
Verbose.
Definition: NekSys.h:299
NekDouble m_tolerance
Tolerance of iterative solver.
Definition: NekSys.h:291
NekSysOperators m_operator
Operators.
Definition: NekSys.h:302
bool m_converged
Whether the iteration has been converged.
Definition: NekSys.h:295
int m_maxiter
Maximum iterations.
Definition: NekSys.h:289
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:148
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:141
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition: Timer.cpp:70
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
@ beta
Gauss Radau pinned at x=-1,.
Definition: PointsType.h:59
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:55
static const NekDouble kNekUnsetDouble
std::vector< double > w(NPUPPER)
double NekDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition: Vmath.hpp:396
T Dot2(int n, const T *w, const T *x, const int *y)
dot product
Definition: Vmath.hpp:790
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition: Vmath.hpp:180
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.hpp:825
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:294