Nektar++
Loading...
Searching...
No Matches
NekLinSysIterGMRES.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: NekLinSysIterGMRES.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: NekLinSysIterGMRES definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39using namespace std;
40
42{
43/**
44 * @class NekLinSysIterGMRES
45 *
46 * Solves a linear system using iterative methods.
47 */
50 "GMRES", NekLinSysIterGMRES::create, "NekLinSysIterGMRES solver.");
51
54 const LibUtilities::CommSharedPtr &vRowComm, const int nDimen,
55 const NekSysKey &pKey)
56 : NekLinSysIter(pSession, vRowComm, nDimen, pKey)
57{
60
62
66
68
69 // Allocate array storage of coefficients
70 // Hessenburg matrix
72 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
73 {
75 }
76 // Hesseburg matrix after rotation
78 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
79 {
81 }
82 // Total search directions
84}
85
90
91/**
92 *
93 */
95 const int nGlobal, const Array<OneD, const NekDouble> &pInput,
96 Array<OneD, NekDouble> &pOutput, const int nDir)
97{
98 int niterations = DoGMRES(nGlobal, pInput, pOutput, nDir);
99
100 return niterations;
101}
102
103void NekLinSysIterGMRES::v_DoIterate(const int nGlobal,
104 const Array<OneD, NekDouble> &rhs,
105 Array<OneD, NekDouble> &x, const int nDir,
106 NekDouble &err, int &iter)
107{
108 iter = DoGMRES(nGlobal, rhs, x, nDir);
109 err = m_finalError;
110}
111
112/**  
113 * Solve a global linear system using the Gmres 
114 * We solve only for the non-Dirichlet modes. The operator is evaluated  
115 * using an auxiliary function v_DoMatrixMultiply defined by the  
116 * specific solver. Distributed math routines are used to support  
117 * parallel execution of the solver.  
118 *  
119 * @param pInput Input residual of all DOFs.  
120 * @param pOutput Solution vector of all DOFs.  
121 */
122
123int NekLinSysIterGMRES::DoGMRES(const int nGlobal,
124 const Array<OneD, const NekDouble> &pInput,
125 Array<OneD, NekDouble> &pOutput, const int nDir)
126{
128
130 {
131 Set_Rhs_Magnitude(pInput);
132 }
133
134 // Get vector sizes
135 int nNonDir = nGlobal - nDir;
136 NekDouble eps = 0.0;
137
139
140 // zero homogeneous out array ready for solution updates
141 // Should not be earlier in case input vector is same as
142 // output and above copy has been peformed
143 Vmath::Zero(nNonDir, tmp = pOutput + nDir, 1);
144
146 m_converged = false;
147
148 bool restarted = false;
149 bool truncted = false;
150
152 {
153 truncted = true;
154 }
155
156 for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
157 {
158 eps =
159 DoGmresRestart(restarted, truncted, nGlobal, pInput, pOutput, nDir);
160
161 if (m_converged)
162 {
163 break;
164 }
165 restarted = true;
166 }
167
168 // Verbose print error, iteration count, tolerance, ..
169 if (m_verbose)
170 {
171 // Compute "real eps" based on solution x as r = Ax - b
172 Array<OneD, NekDouble> r0(nGlobal, 0.0);
174 Vmath::Vsub(nNonDir, &pInput[0] + nDir, 1, &r0[0] + nDir, 1,
175 &r0[0] + nDir, 1);
176 NekDouble vExchange = Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir,
177 &m_map[0] + nDir);
178 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
179 NekDouble eps1 = vExchange;
180
181 if (m_root)
182 {
183 cout << "GMRES iterations made = " << m_totalIterations
184 << " using tolerance of " << m_NekLinSysTolerance
185 << " (error = " << m_finalError
186 << ", rhs_mag = " << sqrt(m_rhs_magnitude)
187 << " with (GMRES eps = " << eps << " REAL eps= " << eps1
188 << ")";
189
190 // Append appropriate message when finalising GMRES
191 if (m_converged)
192 {
193 cout << " CONVERGED" << endl;
194 }
195 else
196 {
197 cout << " WARNING: Exceeded maxIt" << endl;
198 }
199 }
200 }
201
202 if (m_FlagWarnings)
203 {
204 WARNINGL1(m_converged, "GMRES did not converge.");
205 }
206 return m_totalIterations;
207}
208
210 const bool restarted, const bool truncted, const int nGlobal,
212 const int nDir)
213{
214 int nNonDir = nGlobal - nDir;
215
216 // Allocate array storage of coefficients
217 // Residual
219 // Givens rotation c
221 // Givens rotation s
223 // Total coefficients, just for check
225 // Search direction order
229 // Temporary variables
230 int idtem;
231 int starttem;
232 int endtem;
233
234 NekDouble eps;
235 NekDouble beta, alpha;
236 NekDouble vExchange = 0;
237 // Temporary Array
238 Array<OneD, NekDouble> w(nGlobal, 0.0);
239 Array<OneD, NekDouble> wk(nGlobal, 0.0);
240 Array<OneD, NekDouble> r0(nGlobal, 0.0);
242 Array<OneD, NekDouble> Vsingle1;
243 Array<OneD, NekDouble> Vsingle2;
244 Array<OneD, NekDouble> hsingle1;
245 Array<OneD, NekDouble> hsingle2;
246
247 if (restarted)
248 {
249 // This is A*x
251
252 // The first search direction
253 beta = -1.0;
254
255 // This is r0 = b-A*x
256 Vmath::Svtvp(nNonDir, beta, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
257 &r0[0] + nDir, 1);
258 }
259 else
260 {
261 // This is r0 = b, x = x0 assumed to be zero
262 Vmath::Vcopy(nNonDir, &pInput[0] + nDir, 1, &r0[0] + nDir, 1);
263 }
264
266 {
267 m_operator.DoNekSysPrecon(r0 + nDir, tmp = r0 + nDir);
268 }
269
270 // Norm of (r0)
271 // The m_map tells how to connect
272 vExchange =
273 Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir, &m_map[0] + nDir);
274 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
275 eps = vExchange;
276
277 // Detect zero input array
278 // Causes Arnoldi to breakdown, hence stop here
280 {
281 m_converged = true;
283 {
284 m_prec_factor = 1.0;
285 }
286 return eps;
287 }
288
289 if (!restarted)
290 {
293 {
294 // Evaluate initial residual error for exit check
295 vExchange = Vmath::Dot2(nNonDir, &pInput[0] + nDir,
296 &pInput[0] + nDir, &m_map[0] + nDir);
297 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
298 m_prec_factor = vExchange / eps;
299 }
300 else
301 {
302 m_prec_factor = 1.0;
303 }
304 }
305
306 Vmath::Smul(nNonDir, sqrt(m_prec_factor), r0 + nDir, 1, tmp = r0 + nDir, 1);
307 eps = eps * m_prec_factor;
308 eta[0] = sqrt(eps);
309
310 // Give an order for the entries in Hessenburg matrix
311 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
312 {
313 id[nd] = nd;
314 id_end[nd] = nd + 1;
315 starttem = id_end[nd] - m_KrylovMaxHessMatBand;
316 if (truncted && (starttem) > 0)
317 {
318 id_start[nd] = starttem;
319 }
320 else
321 {
322 id_start[nd] = 0;
323 }
324 }
325
326 // Normlized by r0 norm V(:,1)=r0/norm(r0)
327 alpha = 1.0 / eta[0];
328
329 // Scalar multiplication
330 if (m_V_total[0].size() == 0)
331 {
332 m_V_total[0] = Array<OneD, NekDouble>(nGlobal, 0.0);
333 }
334 Vmath::Smul(nNonDir, alpha, &r0[0] + nDir, 1, &m_V_total[0][0] + nDir, 1);
335
336 // Restarted Gmres(m) process
338 {
339 Vsingle1 = Array<OneD, NekDouble>(nGlobal, 0.0);
340 }
341
342 int nswp = 0;
343 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
344 {
345 if (m_V_total[nd + 1].size() == 0)
346 {
347 m_V_total[nd + 1] = Array<OneD, NekDouble>(nGlobal, 0.0);
348 }
349 Vmath::Zero(nGlobal, m_V_total[nd + 1], 1);
351 Vsingle2 = m_V_total[nd + 1];
352 hsingle1 = m_hes[nd];
353
355 {
357 tmp = Vsingle1 + nDir);
358 }
359 else
360 {
361 Vsingle1 = m_V_total[nd];
362 }
363
364 // w here is no need to add nDir due to temporary Array
365 idtem = id[nd];
366 starttem = id_start[idtem];
367 endtem = id_end[idtem];
368
369 DoArnoldi(starttem, endtem, nGlobal, nDir, w, Vsingle1, Vsingle2,
370 hsingle1);
371
372 if (starttem > 0)
373 {
374 starttem = starttem - 1;
375 }
376
377 hsingle2 = m_Upper[nd];
378 Vmath::Vcopy(m_LinSysMaxStorage + 1, &hsingle1[0], 1, &hsingle2[0], 1);
379 DoGivensRotation(starttem, endtem, nGlobal, nDir, cs, sn, hsingle2,
380 eta);
381
382 eps = eta[nd + 1] * eta[nd + 1];
383
384 // This Gmres merge truncted Gmres to accelerate.
385 // If truncted, cannot jump out because
386 // the last term of eta is not residual
387 if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
388 {
391 nd > 0)
392 {
393 m_converged = true;
394 }
395 }
396
397 nswp++;
399
400 if (m_converged)
401 {
402 break;
403 }
404 }
405
406 DoBackward(nswp, m_Upper, eta, y_total);
407
408 // Calculate output V_total * y_total.
409 Array<OneD, NekDouble> solution(nNonDir, 0.0);
410 for (int i = 0; i < nswp; ++i)
411 {
412 Vmath::Svtvp(nNonDir, y_total[i], &m_V_total[i][0] + nDir, 1,
413 solution.data(), 1, solution.data(), 1);
414 }
415
417 {
418 m_operator.DoNekSysPrecon(solution, solution);
419 }
420
421 // Update output.
422 Vmath::Vadd(nNonDir, solution.data(), 1, &pOutput[0] + nDir, 1,
423 &pOutput[0] + nDir, 1);
424
425 return eps;
426}
427
428// Arnoldi Subroutine
429void NekLinSysIterGMRES::DoArnoldi(const int starttem, const int endtem,
430 const int nGlobal, const int nDir,
432 // V[nd]
433 Array<OneD, NekDouble> &Vsingle1,
434 // V[nd+1]
435 Array<OneD, NekDouble> &Vsingle2,
436 // h
437 Array<OneD, NekDouble> &hsingle)
438{
439 NekDouble alpha, beta, vExchange = 0.0;
441 int nNonDir = nGlobal - nDir;
443 timer.Start();
445 timer.Stop();
446 timer.AccumulateRegion("NekSysOperators::DoNekSysLhsEval", 10);
447
449 {
450 m_operator.DoNekSysPrecon(w + nDir, tmp = w + nDir);
451 }
452
453 Vmath::Smul(nNonDir, sqrt(m_prec_factor), w + nDir, 1, tmp = w + nDir, 1);
454
455 // Modified Gram-Schmidt
456 for (int i = starttem; i < endtem; ++i)
457 {
458 vExchange = Vmath::Dot2(nNonDir, &w[0] + nDir, &m_V_total[i][0] + nDir,
459 &m_map[0] + nDir);
460 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
461
462 hsingle[i] = vExchange;
463
464 beta = -1.0 * vExchange;
465 Vmath::Svtvp(nNonDir, beta, &m_V_total[i][0] + nDir, 1, &w[0] + nDir, 1,
466 &w[0] + nDir, 1);
467 }
468 // end of Modified Gram-Schmidt
469
470 // calculate the L2 norm and normalize
471 vExchange =
472 Vmath::Dot2(nNonDir, &w[0] + nDir, &w[0] + nDir, &m_map[0] + nDir);
473 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
474
475 hsingle[endtem] = sqrt(vExchange);
476
477 alpha = 1.0 / hsingle[endtem];
478 Vmath::Smul(nNonDir, alpha, &w[0] + nDir, 1, &Vsingle2[0] + nDir, 1);
479}
480
481// QR factorization through Givens rotation
482void NekLinSysIterGMRES::DoGivensRotation(const int starttem, const int endtem,
483 [[maybe_unused]] const int nGlobal,
484 [[maybe_unused]] const int nDir,
487 Array<OneD, NekDouble> &hsingle,
489{
490 NekDouble temp_dbl;
491 NekDouble dd;
492 NekDouble hh;
493 int idtem = endtem - 1;
494 // The starttem and endtem are beginning and ending order of Givens rotation
495 // They usually equal to the beginning position and ending position of
496 // Hessenburg matrix But sometimes starttem will change, like if it is
497 // initial 0 and becomes nonzero because previous Givens rotation See Yu
498 // Pan's User Guide
499 for (int i = starttem; i < idtem; ++i)
500 {
501 temp_dbl = c[i] * hsingle[i] - s[i] * hsingle[i + 1];
502 hsingle[i + 1] = s[i] * hsingle[i] + c[i] * hsingle[i + 1];
503 hsingle[i] = temp_dbl;
504 }
505 dd = hsingle[idtem];
506 hh = hsingle[endtem];
507 if (hh == 0.0)
508 {
509 c[idtem] = 1.0;
510 s[idtem] = 0.0;
511 }
512 else if (abs(hh) > abs(dd))
513 {
514 temp_dbl = -dd / hh;
515 s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
516 c[idtem] = temp_dbl * s[idtem];
517 }
518 else
519 {
520 temp_dbl = -hh / dd;
521 c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
522 s[idtem] = temp_dbl * c[idtem];
523 }
524
525 hsingle[idtem] = c[idtem] * hsingle[idtem] - s[idtem] * hsingle[endtem];
526 hsingle[endtem] = 0.0;
527
528 temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
529 eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
530 eta[idtem] = temp_dbl;
531}
532
533// Backward calculation
534// To notice, Hesssenburg matrix's column
535// and row changes due to use Array<OneD,Array<OneD,NekDouble>>
536void NekLinSysIterGMRES::DoBackward(const int number,
540{
541 // Number is the entry number
542 // but C++'s order need to be one smaller
543 int maxid = number - 1;
544 NekDouble sum;
545 y[maxid] = b[maxid] / A[maxid][maxid];
546 for (int i = maxid - 1; i > -1; --i)
547 {
548 sum = b[i];
549 for (int j = i + 1; j < number; ++j)
550 {
551 // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
552 sum -= y[j] * A[j][i];
553 }
554 y[i] = sum / A[i][i];
555 }
556}
557} // namespace Nektar::LibUtilities
#define WARNINGL1(condition, msg)
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
int v_SolveSystem(const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir) override
NekLinSysIterGMRES(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey=NekSysKey())
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int pNumDir)
Actual iterative solve-GMRES.
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble > > &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
Array< OneD, Array< OneD, NekDouble > > m_V_total
Array< OneD, Array< OneD, NekDouble > > m_Upper
Array< OneD, Array< OneD, NekDouble > > m_hes
void DoGivensRotation(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &hsingle, Array< OneD, NekDouble > &eta)
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir)
Actual iterative gmres solver for one restart.
void DoArnoldi(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &w, Array< OneD, NekDouble > &Vsingle1, Array< OneD, NekDouble > &Vsingle2, Array< OneD, NekDouble > &hsingle)
static NekLinSysIterSharedPtr create(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey)
void v_DoIterate(const int nGlobal, const Array< OneD, NekDouble > &rhs, Array< OneD, NekDouble > &x, const int nDir, NekDouble &err, int &iter) override
void Set_Rhs_Magnitude(const Array< OneD, NekDouble > &pIn)
Array< OneD, int > m_map
Global to universal unique map.
LibUtilities::CommSharedPtr m_rowComm
Definition NekSys.h:299
NekSysOperators m_operator
Definition NekSys.h:306
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition NekSys.h:155
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition NekSys.h:148
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition Timer.cpp:70
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
@ beta
Gauss Radau pinned at x=-1,.
Definition PointsType.h:59
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition Comm.h:55
static const NekDouble kNekUnsetDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition Vmath.hpp:396
T Dot2(int n, const T *w, const T *x, const int *y)
dot product
Definition Vmath.hpp:790
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition Vmath.hpp:180
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition Vmath.hpp:825
void Vsub(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Subtract vector z = x-y.
Definition Vmath.hpp:220
STL namespace.
scalarT< T > abs(scalarT< T > in)
Definition scalar.hpp:295
scalarT< T > min(scalarT< T > lhs, scalarT< T > rhs)
Definition scalar.hpp:300
scalarT< T > sqrt(scalarT< T > in)
Definition scalar.hpp:290