Nektar++
NekLinSysIterGMRES.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: NekLinSysIterGMRES.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // License for the specific language governing rights and limitations under
14 // Permission is hereby granted, free of charge, to any person obtaining a
15 // copy of this software and associated documentation files (the "Software"),
16 // to deal in the Software without restriction, including without limitation
17 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
18 // and/or sell copies of the Software, and to permit persons to whom the
19 // Software is furnished to do so, subject to the following conditions:
20 //
21 // The above copyright notice and this permission notice shall be included
22 // in all copies or substantial portions of the Software.
23 //
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30 // DEALINGS IN THE SOFTWARE.
31 //
32 // Description: NekLinSysIterGMRES definition
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
38 
39 using namespace std;
40 
41 namespace Nektar
42 {
43 namespace LibUtilities
44 {
45 /**
46  * @class NekLinSysIterGMRES
47  *
48  * Solves a linear system using iterative methods.
49  */
50 string NekLinSysIterGMRES::className =
52  "GMRES", NekLinSysIterGMRES::create, "NekLinSysIterGMRES solver.");
53 
54 NekLinSysIterGMRES::NekLinSysIterGMRES(
56  const LibUtilities::CommSharedPtr &vComm, const int nDimen,
57  const NekSysKey &pKey)
58  : NekLinSysIter(pSession, vComm, nDimen, pKey)
59 {
60  std::vector<std::string> variables(1);
61  variables[0] = pSession->GetVariable(0);
62  string variable = variables[0];
63 
64  pSession->MatchSolverInfo("GMRESLeftPrecon", "True", m_NekLinSysLeftPrecon,
66  pSession->MatchSolverInfo("GMRESRightPrecon", "False", m_NekLinSysRightPrecon,
68 
69  if (pSession->DefinesGlobalSysSolnInfo(variable, "GMRESMaxHessMatBand"))
70  {
71  m_KrylovMaxHessMatBand = boost::lexical_cast<int>(
72  pSession->GetGlobalSysSolnInfo(variable, "GMRESMaxHessMatBand")
73  .c_str());
74  }
75  else
76  {
77  pSession->LoadParameter("GMRESMaxHessMatBand", m_KrylovMaxHessMatBand,
78  m_LinSysMaxStorage + 1);
79  }
80 
83 
84  int GMRESCentralDifference = 0;
85  pSession->LoadParameter("GMRESCentralDifference", GMRESCentralDifference,
86  0);
87 
88  switch (GMRESCentralDifference)
89  {
90  case 1:
91  m_DifferenceFlag0 = true;
92  m_DifferenceFlag1 = false;
93  break;
94  case 2:
95  m_DifferenceFlag0 = true;
96  m_DifferenceFlag1 = true;
97  break;
98 
99  default:
100  m_DifferenceFlag0 = false;
101  m_DifferenceFlag1 = false;
102  break;
103  }
104 }
105 
107 {
109 }
110 
112 {
113 }
114 
115 /**
116  *
117  */
119  const int nGlobal, const Array<OneD, const NekDouble> &pInput,
120  Array<OneD, NekDouble> &pOutput, const int nDir, const NekDouble tol,
121  const NekDouble factor)
122 {
123  boost::ignore_unused(tol);
124 
125  m_tolerance = max(tol, 1.0E-16);
126  m_prec_factor = factor;
127  int niterations = DoGMRES(nGlobal, pInput, pOutput, nDir);
128 
129  return niterations;
130 }
131 
132 /**  
133  * Solve a global linear system using the Gmres 
134  * We solve only for the non-Dirichlet modes. The operator is evaluated  
135  * using an auxiliary function v_DoMatrixMultiply defined by the  
136  * specific solver. Distributed math routines are used to support  
137  * parallel execution of the solver.  
138  *  
139  * @param pInput Input residual of all DOFs.  
140  * @param pOutput Solution vector of all DOFs.  
141  */
142 
143 int NekLinSysIterGMRES::DoGMRES(const int nGlobal,
144  const Array<OneD, const NekDouble> &pInput,
145  Array<OneD, NekDouble> &pOutput, const int nDir)
146 {
148 
150  {
151  NekVector<NekDouble> inGlob(nGlobal, pInput, eWrapper);
152  Set_Rhs_Magnitude(inGlob);
153  }
154 
155  m_rhs_magnitude = 1.0;
156 
157  // Get vector sizes
158  NekDouble eps = 0.0;
159  int nNonDir = nGlobal - nDir;
160 
162 
163  // zero homogeneous out array ready for solution updates
164  // Should not be earlier in case input vector is same as
165  // output and above copy has been peformed
166  Vmath::Zero(nNonDir, tmp = pOutput + nDir, 1);
167 
168  m_totalIterations = 0;
169  m_converged = false;
170 
171  bool restarted = false;
172  bool truncted = false;
173 
174  if (m_KrylovMaxHessMatBand > 0)
175  {
176  truncted = true;
177  }
178 
179  int nwidthcolm = 13;
180 
181  for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
182  {
183  eps =
184  DoGmresRestart(restarted, truncted, nGlobal, pInput, pOutput, nDir);
185 
186  if (m_converged)
187  {
188  break;
189  }
190  restarted = true;
191  }
192 
193  if (m_verbose)
194  {
195  Array<OneD, NekDouble> r0(nGlobal, 0.0);
197  Vmath::Svtvp(nNonDir, -1.0, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
198  &r0[0] + nDir, 1);
199  NekDouble vExchange = Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir,
200  &m_map[0] + nDir);
201  m_Comm->AllReduce(vExchange, LibUtilities::ReduceSum);
202  NekDouble eps1 = vExchange;
203 
204  if (m_root)
205  {
206  cout << std::scientific << std::setw(nwidthcolm)
207  << std::setprecision(nwidthcolm - 8)
208  << " GMRES iterations made = " << m_totalIterations
209  << " using tolerance of " << m_tolerance
210  << " (error = " << sqrt(eps * m_prec_factor / m_rhs_magnitude)
211  << ")";
212 
213  cout << " WITH (GMRES eps = " << eps << " REAL eps= " << eps1
214  << ")";
215 
216  if (m_converged)
217  {
218  cout << " CONVERGED" << endl;
219  }
220  else
221  {
222  cout << " WARNING: Exceeded maxIt" << endl;
223  }
224  }
225  }
226 
227  if (m_FlagWarnings)
228  {
229  WARNINGL1(m_converged, "GMRES did not converge.");
230  }
231  return m_totalIterations;
232 }
233 
235  const bool restarted, const bool truncted, const int nGlobal,
237  const int nDir)
238 {
239  int nNonDir = nGlobal - nDir;
240 
241  // Allocate array storage of coefficients
242  // Hessenburg matrix
244  for (int i = 0; i < m_LinSysMaxStorage; ++i)
245  {
246  hes[i] = Array<OneD, NekDouble>(m_LinSysMaxStorage + 1, 0.0);
247  }
248  // Hesseburg matrix after rotation
250  for (int i = 0; i < m_LinSysMaxStorage; ++i)
251  {
252  Upper[i] = Array<OneD, NekDouble>(m_LinSysMaxStorage + 1, 0.0);
253  }
254  // Total search directions
256  for (int i = 0; i < m_LinSysMaxStorage + 1; ++i)
257  {
258  V_total[i] = Array<OneD, NekDouble>(nGlobal, 0.0);
259  }
260  // Residual
262  // Givens rotation c
264  // Givens rotation s
266  // Total coefficients, just for check
268  // Residual
269  NekDouble eps;
270  // Search direction order
274  // Temporary variables
275  int idtem;
276  int starttem;
277  int endtem;
278 
279  NekDouble beta, alpha;
280  NekDouble vExchange = 0;
281  // Temporary Array
282  Array<OneD, NekDouble> r0(nGlobal, 0.0);
285  Array<OneD, NekDouble> Vsingle1;
286  Array<OneD, NekDouble> Vsingle2;
287  Array<OneD, NekDouble> hsingle1;
288  Array<OneD, NekDouble> hsingle2;
289 
290  if (restarted)
291  {
292  // This is tmp2=Ax
294 
295  // The first search direction
296  beta = -1.0;
297  // This is r0=b-AX
298  Vmath::Svtvp(nNonDir, beta, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
299  &r0[0] + nDir, 1);
300  }
301  else
302  {
303  // If not restarted, x0 should be zero
304  Vmath::Vcopy(nNonDir, &pInput[0] + nDir, 1, &r0[0] + nDir, 1);
305  }
306 
308  {
309  tmp1 = r0 + nDir;
310  tmp2 = r0 + nDir;
311  m_operator.DoNekSysPrecon(tmp1, tmp2);
312  }
313 
314  // Norm of (r0)
315  // The m_map tells how to connect
316  vExchange =
317  Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir, &m_map[0] + nDir);
318  m_Comm->AllReduce(vExchange, LibUtilities::ReduceSum);
319  eps = vExchange;
320 
321  if (!restarted)
322  {
324  {
326  {
327  // Evaluate initial residual error for exit check
328  vExchange = Vmath::Dot2(nNonDir, &pInput[0] + nDir,
329  &pInput[0] + nDir, &m_map[0] + nDir);
330  m_Comm->AllReduce(vExchange, LibUtilities::ReduceSum);
331  m_prec_factor = vExchange / eps;
332  }
333  else
334  {
335  m_prec_factor = 1.0;
336  }
337  }
338  }
339 
340  tmp2 = r0 + nDir;
341  Vmath::Smul(nNonDir, sqrt(m_prec_factor), tmp2, 1, tmp2, 1);
342  eps = eps * m_prec_factor;
343  eta[0] = sqrt(eps);
344 
345  // Give an order for the entries in Hessenburg matrix
346  for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
347  {
348  id[nd] = nd;
349  id_end[nd] = nd + 1;
350  starttem = id_end[nd] - m_KrylovMaxHessMatBand;
351  if (truncted && (starttem) > 0)
352  {
353  id_start[nd] = starttem;
354  }
355  else
356  {
357  id_start[nd] = 0;
358  }
359  }
360 
361  // Normlized by r0 norm V(:,1)=r0/norm(r0)
362  alpha = 1.0 / eta[0];
363  // Scalar multiplication
364  Vmath::Smul(nNonDir, alpha, &r0[0] + nDir, 1, &V_total[0][0] + nDir, 1);
365 
366  // restarted Gmres(m) process
367  int nswp = 0;
369  {
370  Vsingle1 = Array<OneD, NekDouble>(nGlobal, 0.0);
371  }
372 
373  for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
374  {
375  Vsingle2 = V_total[nd + 1];
376  hsingle1 = hes[nd];
377 
379  {
380  tmp1 = V_total[nd] + nDir;
381  tmp2 = Vsingle1 + nDir;
382  m_operator.DoNekSysPrecon(tmp1, tmp2);
383  }
384  else
385  {
386  Vsingle1 = V_total[nd];
387  }
388  // w here is no need to add nDir due to temporary Array
389  idtem = id[nd];
390  starttem = id_start[idtem];
391  endtem = id_end[idtem];
392 
393  DoArnoldi(starttem, endtem, nGlobal, nDir, V_total, Vsingle1, Vsingle2,
394  hsingle1);
395 
396  if (starttem > 0)
397  {
398  starttem = starttem - 1;
399  }
400 
401  hsingle2 = Upper[nd];
402  Vmath::Vcopy(m_LinSysMaxStorage + 1, &hsingle1[0], 1, &hsingle2[0], 1);
403  DoGivensRotation(starttem, endtem, nGlobal, nDir, cs, sn, hsingle2,
404  eta);
405 
406  eps = eta[nd + 1] * eta[nd + 1];
407  // This Gmres merge truncted Gmres to accelerate.
408  // If truncted, cannot jump out because
409  // the last term of eta is not residual
410  if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
411  {
412  // If (eps * m_prec_factor < m_tolerance *
413  // m_tolerance * m_rhs_magnitude )
414  if ((eps < m_tolerance * m_tolerance * m_rhs_magnitude) && nd > 0)
415  {
416  m_converged = true;
417  }
418  NekDouble tolmin = 1.0E-15;
419  if (eps < tolmin * tolmin * m_rhs_magnitude)
420  {
421  m_converged = true;
422  }
423  }
424  nswp++;
426 
427  if (m_converged)
428  {
429  break;
430  }
431  }
432 
433  DoBackward(nswp, Upper, eta, y_total);
434  // calculate output y_total*V_total
435  Array<OneD, NekDouble> solution(nGlobal, 0.0);
436  for (int i = 0; i < nswp; ++i)
437  {
438  beta = y_total[i];
439  Vmath::Svtvp(nNonDir, beta, &V_total[i][0] + nDir, 1,
440  &solution[0] + nDir, 1, &solution[0] + nDir, 1);
441  }
442 
444  {
445  tmp1 = solution + nDir;
446  tmp2 = solution + nDir;
447  m_operator.DoNekSysPrecon(tmp1, tmp2);
448  }
449  Vmath::Vadd(nNonDir, &solution[0] + nDir, 1, &pOutput[0] + nDir, 1,
450  &pOutput[0] + nDir, 1);
451 
452  return eps;
453 }
454 
455 // Arnoldi Subroutine
456 void NekLinSysIterGMRES::DoArnoldi(const int starttem, const int endtem,
457  const int nGlobal, const int nDir,
458  // V_total(:,1:nd)
459  Array<OneD, Array<OneD, NekDouble>> &V_local,
460  // V[nd]
461  Array<OneD, NekDouble> &Vsingle1,
462  // V[nd+1]
463  Array<OneD, NekDouble> &Vsingle2,
464  // h
465  Array<OneD, NekDouble> &hsingle)
466 {
467  // To notice, V_local's order not certainly equal to starttem:endtem
468  // starttem:endtem is the entry position in Hessenburg matrix
469  NekDouble alpha, beta;
470  Array<OneD, NekDouble> tmp1, tmp2;
471  int numbertem;
472  int nNonDir = nGlobal - nDir;
473  // Later for parallel
474  NekDouble vExchange = 0.0;
475  // w=AV(:,nd)
476  Array<OneD, NekDouble> w(nGlobal, 0.0);
477 
479 
480  tmp1 = w + nDir;
481  tmp2 = w + nDir;
483  {
484  m_operator.DoNekSysPrecon(tmp1, tmp2);
485  }
486 
487  Vmath::Smul(nNonDir, sqrt(m_prec_factor), tmp2, 1, tmp2, 1);
488 
489  // Modified Gram-Schmidt
490  // The pointer not certainly equal to starttem.
491  // Like initially, Gmres-deep need to use numbertem=0
492  numbertem = starttem;
493  for (int i = starttem; i < endtem; ++i)
494  {
495  vExchange =
496  Vmath::Dot2(nNonDir, &w[0] + nDir, &V_local[numbertem][0] + nDir,
497  &m_map[0] + nDir);
498  m_Comm->AllReduce(vExchange, LibUtilities::ReduceSum);
499 
500  hsingle[i] = vExchange;
501 
502  beta = -1.0 * vExchange;
503  Vmath::Svtvp(nNonDir, beta, &V_local[numbertem][0] + nDir, 1,
504  &w[0] + nDir, 1, &w[0] + nDir, 1);
505  numbertem = numbertem + 1;
506  }
507  // end of Modified Gram-Schmidt
508 
509  // calculate the L2 norm and normalize
510  vExchange =
511  Vmath::Dot2(nNonDir, &w[0] + nDir, &w[0] + nDir, &m_map[0] + nDir);
512 
513  m_Comm->AllReduce(vExchange, LibUtilities::ReduceSum);
514 
515  hsingle[endtem] = sqrt(vExchange);
516 
517  alpha = 1.0 / hsingle[endtem];
518  Vmath::Smul(nNonDir, alpha, &w[0] + nDir, 1, &Vsingle2[0] + nDir, 1);
519 }
520 
521 // QR factorization through Givens rotation
522 void NekLinSysIterGMRES::DoGivensRotation(const int starttem, const int endtem,
523  const int nGlobal, const int nDir,
526  Array<OneD, NekDouble> &hsingle,
528 {
529  boost::ignore_unused(nGlobal, nDir);
530  NekDouble temp_dbl;
531  NekDouble dd;
532  NekDouble hh;
533  int idtem = endtem - 1;
534  // The starttem and endtem are beginning and ending order of Givens rotation
535  // They usually equal to the beginning position and ending position of
536  // Hessenburg matrix But sometimes starttem will change, like if it is
537  // initial 0 and becomes nonzero because previous Givens rotation See Yu
538  // Pan's User Guide
539  for (int i = starttem; i < idtem; ++i)
540  {
541  temp_dbl = c[i] * hsingle[i] - s[i] * hsingle[i + 1];
542  hsingle[i + 1] = s[i] * hsingle[i] + c[i] * hsingle[i + 1];
543  hsingle[i] = temp_dbl;
544  }
545  dd = hsingle[idtem];
546  hh = hsingle[endtem];
547  if (hh == 0.0)
548  {
549  c[idtem] = 1.0;
550  s[idtem] = 0.0;
551  }
552  else if (abs(hh) > abs(dd))
553  {
554  temp_dbl = -dd / hh;
555  s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
556  c[idtem] = temp_dbl * s[idtem];
557  }
558  else
559  {
560  temp_dbl = -hh / dd;
561  c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
562  s[idtem] = temp_dbl * c[idtem];
563  }
564 
565  hsingle[idtem] = c[idtem] * hsingle[idtem] - s[idtem] * hsingle[endtem];
566  hsingle[endtem] = 0.0;
567 
568  temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
569  eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
570  eta[idtem] = temp_dbl;
571 }
572 
573 // Backward calculation
574 // To notice, Hesssenburg matrix's column
575 // and row changes due to use Array<OneD,Array<OneD,NekDouble>>
576 void NekLinSysIterGMRES::DoBackward(const int number,
580 {
581  // Number is the entry number
582  // but C++'s order need to be one smaller
583  int maxid = number - 1;
584  NekDouble sum;
585  y[maxid] = b[maxid] / A[maxid][maxid];
586 
587  for (int i = maxid - 1; i > -1; --i)
588  {
589  sum = b[i];
590 
591  for (int j = i + 1; j < number; ++j)
592  {
593  // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
594  sum = sum - y[j] * A[j][i];
595  }
596  y[i] = sum / A[i][i];
597  }
598 }
599 } // namespace LibUtilities
600 } // namespace Nektar
#define WARNINGL1(condition, msg)
Definition: ErrorUtil.hpp:251
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
Definition: NekFactory.hpp:200
void DoArnoldi(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, Array< OneD, NekDouble >> &V_local, Array< OneD, NekDouble > &Vsingle1, Array< OneD, NekDouble > &Vsingle2, Array< OneD, NekDouble > &hsingle)
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int pNumDir)
Actual iterative solve-GMRES.
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble >> &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
virtual int v_SolveSystem(const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir, const NekDouble tol, const NekDouble factor)
void DoGivensRotation(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &hsingle, Array< OneD, NekDouble > &eta)
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir)
Actual iterative gmres solver for one restart.
NekDouble m_rhs_magnitude
Dot product of rhs to normalise stopping criterion.
void Set_Rhs_Magnitude(const NekVector< NekDouble > &pIn)
Array< OneD, int > m_map
Global to universal unique map.
bool m_root
Root if parallel.
Definition: NekSys.h:289
bool m_verbose
Verbose.
Definition: NekSys.h:291
NekDouble m_tolerance
Tolerance of iterative solver.
Definition: NekSys.h:283
NekSysOperators m_operator
Operators.
Definition: NekSys.h:294
LibUtilities::CommSharedPtr m_Comm
Communicate.
Definition: NekSys.h:285
bool m_converged
Whether the iteration has been converged.
Definition: NekSys.h:287
int m_maxiter
Maximum iterations.
Definition: NekSys.h:281
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:142
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:135
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:54
static const NekDouble kNekUnsetDouble
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:1
double NekDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
svtvp (scalar times vector plus vector): z = alpha*x + y
Definition: Vmath.cpp:565
T Dot2(int n, const T *w, const T *x, const int *y)
vvtvp (vector times vector times vector): z = w*x*y
Definition: Vmath.cpp:1084
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition: Vmath.cpp:322
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.cpp:225
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.cpp:436
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.cpp:1199
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:272
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:267