Nektar++
NekLinSysIterGMRES.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: NekLinSysIterGMRES.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: NekLinSysIterGMRES definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39using namespace std;
40
41namespace Nektar
42{
43namespace LibUtilities
44{
45/**
46 * @class NekLinSysIterGMRES
47 *
48 * Solves a linear system using iterative methods.
49 */
52 "GMRES", NekLinSysIterGMRES::create, "NekLinSysIterGMRES solver.");
53
56 const LibUtilities::CommSharedPtr &vRowComm, const int nDimen,
57 const NekSysKey &pKey)
58 : NekLinSysIter(pSession, vRowComm, nDimen, pKey)
59{
60 std::vector<std::string> variables(1);
61 variables[0] = pSession->GetVariable(0);
62 string variable = variables[0];
63
64 pSession->MatchSolverInfo("GMRESLeftPrecon", "True", m_NekLinSysLeftPrecon,
66 pSession->MatchSolverInfo("GMRESRightPrecon", "False",
69
70 if (pSession->DefinesGlobalSysSolnInfo(variable, "GMRESMaxHessMatBand"))
71 {
72 m_KrylovMaxHessMatBand = boost::lexical_cast<int>(
73 pSession->GetGlobalSysSolnInfo(variable, "GMRESMaxHessMatBand")
74 .c_str());
75 }
76 else
77 {
78 pSession->LoadParameter("GMRESMaxHessMatBand", m_KrylovMaxHessMatBand,
80 }
81
84
85 int GMRESCentralDifference = 0;
86 pSession->LoadParameter("GMRESCentralDifference", GMRESCentralDifference,
87 0);
88
89 switch (GMRESCentralDifference)
90 {
91 case 1:
92 m_DifferenceFlag0 = true;
93 m_DifferenceFlag1 = false;
94 break;
95 case 2:
96 m_DifferenceFlag0 = true;
97 m_DifferenceFlag1 = true;
98 break;
99
100 default:
101 m_DifferenceFlag0 = false;
102 m_DifferenceFlag1 = false;
103 break;
104 }
105}
106
108{
110}
111
113{
114}
115
116/**
117 *
118 */
120 const int nGlobal, const Array<OneD, const NekDouble> &pInput,
121 Array<OneD, NekDouble> &pOutput, const int nDir, const NekDouble tol,
122 const NekDouble factor)
123{
124 m_tolerance = max(tol, 1.0E-16);
125 m_prec_factor = factor;
126 int niterations = DoGMRES(nGlobal, pInput, pOutput, nDir);
127
128 return niterations;
129}
130
131/**  
132 * Solve a global linear system using the Gmres 
133 * We solve only for the non-Dirichlet modes. The operator is evaluated  
134 * using an auxiliary function v_DoMatrixMultiply defined by the  
135 * specific solver. Distributed math routines are used to support  
136 * parallel execution of the solver.  
137 *  
138 * @param pInput Input residual of all DOFs.  
139 * @param pOutput Solution vector of all DOFs.  
140 */
141
142int NekLinSysIterGMRES::DoGMRES(const int nGlobal,
143 const Array<OneD, const NekDouble> &pInput,
144 Array<OneD, NekDouble> &pOutput, const int nDir)
145{
146
148
149 m_rhs_magnitude = 1.0;
150
151 // Get vector sizes
152 NekDouble eps = 0.0;
153 int nNonDir = nGlobal - nDir;
154
156
157 // zero homogeneous out array ready for solution updates
158 // Should not be earlier in case input vector is same as
159 // output and above copy has been peformed
160 Vmath::Zero(nNonDir, tmp = pOutput + nDir, 1);
161
163 m_converged = false;
164
165 bool restarted = false;
166 bool truncted = false;
167
169 {
170 truncted = true;
171 }
172
173 for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
174 {
175 eps =
176 DoGmresRestart(restarted, truncted, nGlobal, pInput, pOutput, nDir);
177
178 if (m_converged)
179 {
180 break;
181 }
182 restarted = true;
183 }
184
185 if (m_verbose)
186 {
187 Array<OneD, NekDouble> r0(nGlobal, 0.0);
189 Vmath::Svtvp(nNonDir, -1.0, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
190 &r0[0] + nDir, 1);
191 NekDouble vExchange = Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir,
192 &m_map[0] + nDir);
193 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
194 NekDouble eps1 = vExchange;
195
196 if (m_root)
197 {
198 int nwidthcolm = 13;
199
200 cout << std::scientific << std::setw(nwidthcolm)
201 << std::setprecision(nwidthcolm - 8)
202 << " GMRES iterations made = " << m_totalIterations
203 << " using tolerance of " << m_tolerance
204 << " (error = " << sqrt(eps * m_prec_factor / m_rhs_magnitude)
205 << ")";
206
207 cout << " WITH (GMRES eps = " << eps << " REAL eps= " << eps1
208 << ")";
209
210 if (m_converged)
211 {
212 cout << " CONVERGED" << endl;
213 }
214 else
215 {
216 cout << " WARNING: Exceeded maxIt" << endl;
217 }
218 }
219 }
220
221 if (m_FlagWarnings)
222 {
223 WARNINGL1(m_converged, "GMRES did not converge.");
224 }
225 return m_totalIterations;
226}
227
229 const bool restarted, const bool truncted, const int nGlobal,
231 const int nDir)
232{
233 int nNonDir = nGlobal - nDir;
234
235 // Allocate array storage of coefficients
236 // Hessenburg matrix
238 // Hesseburg matrix after rotation
240 // Total search directions
242 // Residual
244 // Givens rotation c
246 // Givens rotation s
248 // Total coefficients, just for check
250 // Residual
251 NekDouble eps;
252 // Search direction order
256 // Temporary variables
257 int idtem;
258 int starttem;
259 int endtem;
260
261 NekDouble beta, alpha;
262 NekDouble vExchange = 0;
263 // Temporary Array
264 Array<OneD, NekDouble> r0(nGlobal, 0.0);
267 Array<OneD, NekDouble> Vsingle1;
268 Array<OneD, NekDouble> Vsingle2;
269 Array<OneD, NekDouble> hsingle1;
270 Array<OneD, NekDouble> hsingle2;
271
272 if (restarted)
273 {
274 // This is A*x
276
277 // The first search direction
278 beta = -1.0;
279
280 // This is r0 = b-A*x
281 Vmath::Svtvp(nNonDir, beta, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
282 &r0[0] + nDir, 1);
283 }
284 else
285 {
286 // This is r0 = b, x = x0 assumed to be zero
287 Vmath::Vcopy(nNonDir, &pInput[0] + nDir, 1, &r0[0] + nDir, 1);
288 }
289
291 {
292 tmp1 = r0 + nDir;
293 tmp2 = r0 + nDir;
294 m_operator.DoNekSysPrecon(tmp1, tmp2);
295 }
296
297 // Norm of (r0)
298 // The m_map tells how to connect
299 vExchange =
300 Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir, &m_map[0] + nDir);
301 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
302 eps = vExchange;
303
304 // Detect zero input array
305 // Causes Arnoldi to breakdown, hence stop here
307 {
308 m_converged = true;
310 {
311 m_prec_factor = 1.0;
312 }
313 return eps;
314 }
315
316 if (!restarted)
317 {
320 {
321 // Evaluate initial residual error for exit check
322 vExchange = Vmath::Dot2(nNonDir, &pInput[0] + nDir,
323 &pInput[0] + nDir, &m_map[0] + nDir);
324 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
325 m_prec_factor = vExchange / eps;
326 }
327 else
328 {
329 m_prec_factor = 1.0;
330 }
331 }
332
333 tmp2 = r0 + nDir;
334 Vmath::Smul(nNonDir, sqrt(m_prec_factor), tmp2, 1, tmp2, 1);
335 eps = eps * m_prec_factor;
336 eta[0] = sqrt(eps);
337
338 // Give an order for the entries in Hessenburg matrix
339 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
340 {
341 id[nd] = nd;
342 id_end[nd] = nd + 1;
343 starttem = id_end[nd] - m_KrylovMaxHessMatBand;
344 if (truncted && (starttem) > 0)
345 {
346 id_start[nd] = starttem;
347 }
348 else
349 {
350 id_start[nd] = 0;
351 }
352 }
353
354 // Normlized by r0 norm V(:,1)=r0/norm(r0)
355 alpha = 1.0 / eta[0];
356
357 // Scalar multiplication
358 V_total[0] = Array<OneD, NekDouble>(nGlobal, 0.0);
359 Vmath::Smul(nNonDir, alpha, &r0[0] + nDir, 1, &V_total[0][0] + nDir, 1);
360
361 // Restarted Gmres(m) process
363 {
364 Vsingle1 = Array<OneD, NekDouble>(nGlobal, 0.0);
365 }
366
367 int nswp = 0;
368 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
369 {
370 hes[nd] = Array<OneD, NekDouble>(m_LinSysMaxStorage + 1, 0.0);
371 Upper[nd] = Array<OneD, NekDouble>(m_LinSysMaxStorage + 1, 0.0);
372 V_total[nd + 1] = Array<OneD, NekDouble>(nGlobal, 0.0);
373 Vsingle2 = V_total[nd + 1];
374 hsingle1 = hes[nd];
375
377 {
378 tmp1 = V_total[nd] + nDir;
379 tmp2 = Vsingle1 + nDir;
380 m_operator.DoNekSysPrecon(tmp1, tmp2);
381 }
382 else
383 {
384 Vsingle1 = V_total[nd];
385 }
386
387 // w here is no need to add nDir due to temporary Array
388 idtem = id[nd];
389 starttem = id_start[idtem];
390 endtem = id_end[idtem];
391
392 DoArnoldi(starttem, endtem, nGlobal, nDir, V_total, Vsingle1, Vsingle2,
393 hsingle1);
394
395 if (starttem > 0)
396 {
397 starttem = starttem - 1;
398 }
399
400 hsingle2 = Upper[nd];
401 Vmath::Vcopy(m_LinSysMaxStorage + 1, &hsingle1[0], 1, &hsingle2[0], 1);
402 DoGivensRotation(starttem, endtem, nGlobal, nDir, cs, sn, hsingle2,
403 eta);
404
405 eps = eta[nd + 1] * eta[nd + 1];
406
407 // This Gmres merge truncted Gmres to accelerate.
408 // If truncted, cannot jump out because
409 // the last term of eta is not residual
410 if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
411 {
412 if ((eps < m_tolerance * m_tolerance * m_rhs_magnitude) && nd > 0)
413 {
414 m_converged = true;
415 }
416 NekDouble tolmin = 1.0E-15;
417 if (eps < tolmin * tolmin * m_rhs_magnitude)
418 {
419 m_converged = true;
420 }
421 }
422 nswp++;
424
425 if (m_converged)
426 {
427 break;
428 }
429 }
430
431 DoBackward(nswp, Upper, eta, y_total);
432
433 // Calculate output V_total * y_total.
434 Array<OneD, NekDouble> solution(nNonDir, 0.0);
435 for (int i = 0; i < nswp; ++i)
436 {
437 Vmath::Svtvp(nNonDir, y_total[i], &V_total[i][0] + nDir, 1,
438 solution.get(), 1, solution.get(), 1);
439 }
440
442 {
443 m_operator.DoNekSysPrecon(solution, solution);
444 }
445
446 // Update output.
447 Vmath::Vadd(nNonDir, solution.get(), 1, &pOutput[0] + nDir, 1,
448 &pOutput[0] + nDir, 1);
449
450 return eps;
451}
452
453// Arnoldi Subroutine
454void NekLinSysIterGMRES::DoArnoldi(const int starttem, const int endtem,
455 const int nGlobal, const int nDir,
456 // V_total(:,1:nd)
458 // V[nd]
459 Array<OneD, NekDouble> &Vsingle1,
460 // V[nd+1]
461 Array<OneD, NekDouble> &Vsingle2,
462 // h
463 Array<OneD, NekDouble> &hsingle)
464{
465 // To notice, V_local's order not certainly equal to starttem:endtem
466 // starttem:endtem is the entry position in Hessenburg matrix
467 NekDouble alpha, beta;
468 Array<OneD, NekDouble> tmp1, tmp2;
469 int numbertem;
470 int nNonDir = nGlobal - nDir;
471 // Later for parallel
472 NekDouble vExchange = 0.0;
473 // w=AV(:,nd)
474 Array<OneD, NekDouble> w(nGlobal, 0.0);
475
477 timer.Start();
479 timer.Stop();
480 timer.AccumulateRegion("NekSysOperators::DoNekSysLhsEval", 10);
481
482 tmp1 = w + nDir;
483 tmp2 = w + nDir;
485 {
486 m_operator.DoNekSysPrecon(tmp1, tmp2);
487 }
488
489 Vmath::Smul(nNonDir, sqrt(m_prec_factor), tmp2, 1, tmp2, 1);
490
491 // Modified Gram-Schmidt
492 // The pointer not certainly equal to starttem.
493 // Like initially, Gmres-deep need to use numbertem=0
494 numbertem = starttem;
495 for (int i = starttem; i < endtem; ++i)
496 {
497 vExchange =
498 Vmath::Dot2(nNonDir, &w[0] + nDir, &V_local[numbertem][0] + nDir,
499 &m_map[0] + nDir);
500 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
501
502 hsingle[i] = vExchange;
503
504 beta = -1.0 * vExchange;
505 Vmath::Svtvp(nNonDir, beta, &V_local[numbertem][0] + nDir, 1,
506 &w[0] + nDir, 1, &w[0] + nDir, 1);
507 numbertem = numbertem + 1;
508 }
509 // end of Modified Gram-Schmidt
510
511 // calculate the L2 norm and normalize
512 vExchange =
513 Vmath::Dot2(nNonDir, &w[0] + nDir, &w[0] + nDir, &m_map[0] + nDir);
514
515 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
516
517 hsingle[endtem] = sqrt(vExchange);
518
519 alpha = 1.0 / hsingle[endtem];
520 Vmath::Smul(nNonDir, alpha, &w[0] + nDir, 1, &Vsingle2[0] + nDir, 1);
521}
522
523// QR factorization through Givens rotation
524void NekLinSysIterGMRES::DoGivensRotation(const int starttem, const int endtem,
525 const int nGlobal, const int nDir,
528 Array<OneD, NekDouble> &hsingle,
530{
531 boost::ignore_unused(nGlobal, nDir);
532 NekDouble temp_dbl;
533 NekDouble dd;
534 NekDouble hh;
535 int idtem = endtem - 1;
536 // The starttem and endtem are beginning and ending order of Givens rotation
537 // They usually equal to the beginning position and ending position of
538 // Hessenburg matrix But sometimes starttem will change, like if it is
539 // initial 0 and becomes nonzero because previous Givens rotation See Yu
540 // Pan's User Guide
541 for (int i = starttem; i < idtem; ++i)
542 {
543 temp_dbl = c[i] * hsingle[i] - s[i] * hsingle[i + 1];
544 hsingle[i + 1] = s[i] * hsingle[i] + c[i] * hsingle[i + 1];
545 hsingle[i] = temp_dbl;
546 }
547 dd = hsingle[idtem];
548 hh = hsingle[endtem];
549 if (hh == 0.0)
550 {
551 c[idtem] = 1.0;
552 s[idtem] = 0.0;
553 }
554 else if (abs(hh) > abs(dd))
555 {
556 temp_dbl = -dd / hh;
557 s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
558 c[idtem] = temp_dbl * s[idtem];
559 }
560 else
561 {
562 temp_dbl = -hh / dd;
563 c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
564 s[idtem] = temp_dbl * c[idtem];
565 }
566
567 hsingle[idtem] = c[idtem] * hsingle[idtem] - s[idtem] * hsingle[endtem];
568 hsingle[endtem] = 0.0;
569
570 temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
571 eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
572 eta[idtem] = temp_dbl;
573}
574
575// Backward calculation
576// To notice, Hesssenburg matrix's column
577// and row changes due to use Array<OneD,Array<OneD,NekDouble>>
578void NekLinSysIterGMRES::DoBackward(const int number,
582{
583 // Number is the entry number
584 // but C++'s order need to be one smaller
585 int maxid = number - 1;
586 NekDouble sum;
587 y[maxid] = b[maxid] / A[maxid][maxid];
588
589 for (int i = maxid - 1; i > -1; --i)
590 {
591 sum = b[i];
592
593 for (int j = i + 1; j < number; ++j)
594 {
595 // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
596 sum = sum - y[j] * A[j][i];
597 }
598 y[i] = sum / A[i][i];
599 }
600}
601} // namespace LibUtilities
602} // namespace Nektar
#define WARNINGL1(condition, msg)
Definition: ErrorUtil.hpp:250
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
Definition: NekFactory.hpp:198
NekLinSysIterGMRES(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey=NekSysKey())
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int pNumDir)
Actual iterative solve-GMRES.
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble > > &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
void DoArnoldi(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, Array< OneD, NekDouble > > &V_local, Array< OneD, NekDouble > &Vsingle1, Array< OneD, NekDouble > &Vsingle2, Array< OneD, NekDouble > &hsingle)
virtual int v_SolveSystem(const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir, const NekDouble tol, const NekDouble factor) override
void DoGivensRotation(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &hsingle, Array< OneD, NekDouble > &eta)
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir)
Actual iterative gmres solver for one restart.
static NekLinSysIterSharedPtr create(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey)
virtual void v_InitObject() override
NekDouble m_rhs_magnitude
Dot product of rhs to normalise stopping criterion.
Array< OneD, int > m_map
Global to universal unique map.
bool m_root
Root if parallel.
Definition: NekSys.h:306
LibUtilities::CommSharedPtr m_rowComm
Communicate.
Definition: NekSys.h:302
bool m_verbose
Verbose.
Definition: NekSys.h:308
NekDouble m_tolerance
Tolerance of iterative solver.
Definition: NekSys.h:300
NekSysOperators m_operator
Operators.
Definition: NekSys.h:311
bool m_converged
Whether the iteration has been converged.
Definition: NekSys.h:304
int m_maxiter
Maximum iterations.
Definition: NekSys.h:298
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:150
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:143
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition: Timer.cpp:72
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
@ beta
Gauss Radau pinned at x=-1,.
Definition: PointsType.h:61
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:57
static const NekDouble kNekUnsetDouble
std::vector< double > w(NPUPPER)
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
double NekDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
svtvp (scalar times vector plus vector): z = alpha*x + y
Definition: Vmath.cpp:617
T Dot2(int n, const T *w, const T *x, const int *y)
dot2 (vector times vector times vector): z = w*x*y
Definition: Vmath.cpp:1142
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition: Vmath.cpp:354
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.cpp:245
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.cpp:487
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.cpp:1191
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:294