Nektar++
NekLinSysIterGMRES.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: NekLinSysIterGMRES.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: NekLinSysIterGMRES definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39using namespace std;
40
42{
43/**
44 * @class NekLinSysIterGMRES
45 *
46 * Solves a linear system using iterative methods.
47 */
50 "GMRES", NekLinSysIterGMRES::create, "NekLinSysIterGMRES solver.");
51
54 const LibUtilities::CommSharedPtr &vRowComm, const int nDimen,
55 const NekSysKey &pKey)
56 : NekLinSysIter(pSession, vRowComm, nDimen, pKey)
57{
60
62
66
68
69 // Allocate array storage of coefficients
70 // Hessenburg matrix
72 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
73 {
75 }
76 // Hesseburg matrix after rotation
78 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
79 {
81 }
82 // Total search directions
84}
85
87{
89}
90
91/**
92 *
93 */
95 const int nGlobal, const Array<OneD, const NekDouble> &pInput,
96 Array<OneD, NekDouble> &pOutput, const int nDir)
97{
98 int niterations = DoGMRES(nGlobal, pInput, pOutput, nDir);
99
100 return niterations;
101}
102
103/**  
104 * Solve a global linear system using the Gmres 
105 * We solve only for the non-Dirichlet modes. The operator is evaluated  
106 * using an auxiliary function v_DoMatrixMultiply defined by the  
107 * specific solver. Distributed math routines are used to support  
108 * parallel execution of the solver.  
109 *  
110 * @param pInput Input residual of all DOFs.  
111 * @param pOutput Solution vector of all DOFs.  
112 */
113
114int NekLinSysIterGMRES::DoGMRES(const int nGlobal,
115 const Array<OneD, const NekDouble> &pInput,
116 Array<OneD, NekDouble> &pOutput, const int nDir)
117{
119
121 {
122 Set_Rhs_Magnitude(pInput);
123 }
124
125 // Get vector sizes
126 NekDouble eps = 0.0;
127 int nNonDir = nGlobal - nDir;
128
130
131 // zero homogeneous out array ready for solution updates
132 // Should not be earlier in case input vector is same as
133 // output and above copy has been peformed
134 Vmath::Zero(nNonDir, tmp = pOutput + nDir, 1);
135
137 m_converged = false;
138
139 bool restarted = false;
140 bool truncted = false;
141
143 {
144 truncted = true;
145 }
146
147 for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
148 {
149 eps =
150 DoGmresRestart(restarted, truncted, nGlobal, pInput, pOutput, nDir);
151
152 if (m_converged)
153 {
154 break;
155 }
156 restarted = true;
157 }
158
159 if (m_verbose)
160 {
161 Array<OneD, NekDouble> r0(nGlobal, 0.0);
163 Vmath::Vsub(nNonDir, &pInput[0] + nDir, 1, &r0[0] + nDir, 1,
164 &r0[0] + nDir, 1);
165 NekDouble vExchange = Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir,
166 &m_map[0] + nDir);
167 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
168 NekDouble eps1 = vExchange;
169
170 if (m_root)
171 {
172 int nwidthcolm = 13;
173
174 cout << std::scientific << std::setw(nwidthcolm)
175 << std::setprecision(nwidthcolm - 8)
176 << " GMRES iterations made = " << m_totalIterations
177 << " using tolerance of " << m_NekLinSysTolerance
178 << " (error = " << sqrt(eps * m_prec_factor / m_rhs_magnitude)
179 << ")";
180
181 cout << " WITH (GMRES eps = " << eps << " REAL eps= " << eps1
182 << ")";
183
184 if (m_converged)
185 {
186 cout << " CONVERGED" << endl;
187 }
188 else
189 {
190 cout << " WARNING: Exceeded maxIt" << endl;
191 }
192 }
193 }
194
195 if (m_FlagWarnings)
196 {
197 WARNINGL1(m_converged, "GMRES did not converge.");
198 }
199 return m_totalIterations;
200}
201
203 const bool restarted, const bool truncted, const int nGlobal,
205 const int nDir)
206{
207 int nNonDir = nGlobal - nDir;
208
209 // Allocate array storage of coefficients
210 // Residual
212 // Givens rotation c
214 // Givens rotation s
216 // Total coefficients, just for check
218 // Search direction order
222 // Temporary variables
223 int idtem;
224 int starttem;
225 int endtem;
226
227 NekDouble eps;
228 NekDouble beta, alpha;
229 NekDouble vExchange = 0;
230 // Temporary Array
231 Array<OneD, NekDouble> w(nGlobal, 0.0);
232 Array<OneD, NekDouble> wk(nGlobal, 0.0);
233 Array<OneD, NekDouble> r0(nGlobal, 0.0);
235 Array<OneD, NekDouble> Vsingle1;
236 Array<OneD, NekDouble> Vsingle2;
237 Array<OneD, NekDouble> hsingle1;
238 Array<OneD, NekDouble> hsingle2;
239
240 if (restarted)
241 {
242 // This is A*x
244
245 // The first search direction
246 beta = -1.0;
247
248 // This is r0 = b-A*x
249 Vmath::Svtvp(nNonDir, beta, &r0[0] + nDir, 1, &pInput[0] + nDir, 1,
250 &r0[0] + nDir, 1);
251 }
252 else
253 {
254 // This is r0 = b, x = x0 assumed to be zero
255 Vmath::Vcopy(nNonDir, &pInput[0] + nDir, 1, &r0[0] + nDir, 1);
256 }
257
259 {
260 m_operator.DoNekSysPrecon(r0 + nDir, tmp = r0 + nDir);
261 }
262
263 // Norm of (r0)
264 // The m_map tells how to connect
265 vExchange =
266 Vmath::Dot2(nNonDir, &r0[0] + nDir, &r0[0] + nDir, &m_map[0] + nDir);
267 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
268 eps = vExchange;
269
270 // Detect zero input array
271 // Causes Arnoldi to breakdown, hence stop here
273 {
274 m_converged = true;
276 {
277 m_prec_factor = 1.0;
278 }
279 return eps;
280 }
281
282 if (!restarted)
283 {
286 {
287 // Evaluate initial residual error for exit check
288 vExchange = Vmath::Dot2(nNonDir, &pInput[0] + nDir,
289 &pInput[0] + nDir, &m_map[0] + nDir);
290 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
291 m_prec_factor = vExchange / eps;
292 }
293 else
294 {
295 m_prec_factor = 1.0;
296 }
297 }
298
299 Vmath::Smul(nNonDir, sqrt(m_prec_factor), r0 + nDir, 1, tmp = r0 + nDir, 1);
300 eps = eps * m_prec_factor;
301 eta[0] = sqrt(eps);
302
303 // Give an order for the entries in Hessenburg matrix
304 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
305 {
306 id[nd] = nd;
307 id_end[nd] = nd + 1;
308 starttem = id_end[nd] - m_KrylovMaxHessMatBand;
309 if (truncted && (starttem) > 0)
310 {
311 id_start[nd] = starttem;
312 }
313 else
314 {
315 id_start[nd] = 0;
316 }
317 }
318
319 // Normlized by r0 norm V(:,1)=r0/norm(r0)
320 alpha = 1.0 / eta[0];
321
322 // Scalar multiplication
323 if (m_V_total[0].size() == 0)
324 {
325 m_V_total[0] = Array<OneD, NekDouble>(nGlobal, 0.0);
326 }
327 Vmath::Smul(nNonDir, alpha, &r0[0] + nDir, 1, &m_V_total[0][0] + nDir, 1);
328
329 // Restarted Gmres(m) process
331 {
332 Vsingle1 = Array<OneD, NekDouble>(nGlobal, 0.0);
333 }
334
335 int nswp = 0;
336 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
337 {
338 if (m_V_total[nd + 1].size() == 0)
339 {
340 m_V_total[nd + 1] = Array<OneD, NekDouble>(nGlobal, 0.0);
341 }
342 Vmath::Zero(nGlobal, m_V_total[nd + 1], 1);
344 Vsingle2 = m_V_total[nd + 1];
345 hsingle1 = m_hes[nd];
346
348 {
350 tmp = Vsingle1 + nDir);
351 }
352 else
353 {
354 Vsingle1 = m_V_total[nd];
355 }
356
357 // w here is no need to add nDir due to temporary Array
358 idtem = id[nd];
359 starttem = id_start[idtem];
360 endtem = id_end[idtem];
361
362 DoArnoldi(starttem, endtem, nGlobal, nDir, w, Vsingle1, Vsingle2,
363 hsingle1);
364
365 if (starttem > 0)
366 {
367 starttem = starttem - 1;
368 }
369
370 hsingle2 = m_Upper[nd];
371 Vmath::Vcopy(m_LinSysMaxStorage + 1, &hsingle1[0], 1, &hsingle2[0], 1);
372 DoGivensRotation(starttem, endtem, nGlobal, nDir, cs, sn, hsingle2,
373 eta);
374
375 eps = eta[nd + 1] * eta[nd + 1];
376
377 // This Gmres merge truncted Gmres to accelerate.
378 // If truncted, cannot jump out because
379 // the last term of eta is not residual
380 if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
381 {
384 nd > 0)
385 {
386 m_converged = true;
387 }
388 }
389 nswp++;
391
392 if (m_converged)
393 {
394 break;
395 }
396 }
397
398 DoBackward(nswp, m_Upper, eta, y_total);
399
400 // Calculate output V_total * y_total.
401 Array<OneD, NekDouble> solution(nNonDir, 0.0);
402 for (int i = 0; i < nswp; ++i)
403 {
404 Vmath::Svtvp(nNonDir, y_total[i], &m_V_total[i][0] + nDir, 1,
405 solution.get(), 1, solution.get(), 1);
406 }
407
409 {
410 m_operator.DoNekSysPrecon(solution, solution);
411 }
412
413 // Update output.
414 Vmath::Vadd(nNonDir, solution.get(), 1, &pOutput[0] + nDir, 1,
415 &pOutput[0] + nDir, 1);
416
417 return eps;
418}
419
420// Arnoldi Subroutine
421void NekLinSysIterGMRES::DoArnoldi(const int starttem, const int endtem,
422 const int nGlobal, const int nDir,
424 // V[nd]
425 Array<OneD, NekDouble> &Vsingle1,
426 // V[nd+1]
427 Array<OneD, NekDouble> &Vsingle2,
428 // h
429 Array<OneD, NekDouble> &hsingle)
430{
431 NekDouble alpha, beta, vExchange = 0.0;
433 int nNonDir = nGlobal - nDir;
435 timer.Start();
437 timer.Stop();
438 timer.AccumulateRegion("NekSysOperators::DoNekSysLhsEval", 10);
439
441 {
442 m_operator.DoNekSysPrecon(w + nDir, tmp = w + nDir);
443 }
444
445 Vmath::Smul(nNonDir, sqrt(m_prec_factor), w + nDir, 1, tmp = w + nDir, 1);
446
447 // Modified Gram-Schmidt
448 for (int i = starttem; i < endtem; ++i)
449 {
450 vExchange = Vmath::Dot2(nNonDir, &w[0] + nDir, &m_V_total[i][0] + nDir,
451 &m_map[0] + nDir);
452 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
453
454 hsingle[i] = vExchange;
455
456 beta = -1.0 * vExchange;
457 Vmath::Svtvp(nNonDir, beta, &m_V_total[i][0] + nDir, 1, &w[0] + nDir, 1,
458 &w[0] + nDir, 1);
459 }
460 // end of Modified Gram-Schmidt
461
462 // calculate the L2 norm and normalize
463 vExchange =
464 Vmath::Dot2(nNonDir, &w[0] + nDir, &w[0] + nDir, &m_map[0] + nDir);
465 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
466
467 hsingle[endtem] = sqrt(vExchange);
468
469 alpha = 1.0 / hsingle[endtem];
470 Vmath::Smul(nNonDir, alpha, &w[0] + nDir, 1, &Vsingle2[0] + nDir, 1);
471}
472
473// QR factorization through Givens rotation
474void NekLinSysIterGMRES::DoGivensRotation(const int starttem, const int endtem,
475 [[maybe_unused]] const int nGlobal,
476 [[maybe_unused]] const int nDir,
479 Array<OneD, NekDouble> &hsingle,
481{
482 NekDouble temp_dbl;
483 NekDouble dd;
484 NekDouble hh;
485 int idtem = endtem - 1;
486 // The starttem and endtem are beginning and ending order of Givens rotation
487 // They usually equal to the beginning position and ending position of
488 // Hessenburg matrix But sometimes starttem will change, like if it is
489 // initial 0 and becomes nonzero because previous Givens rotation See Yu
490 // Pan's User Guide
491 for (int i = starttem; i < idtem; ++i)
492 {
493 temp_dbl = c[i] * hsingle[i] - s[i] * hsingle[i + 1];
494 hsingle[i + 1] = s[i] * hsingle[i] + c[i] * hsingle[i + 1];
495 hsingle[i] = temp_dbl;
496 }
497 dd = hsingle[idtem];
498 hh = hsingle[endtem];
499 if (hh == 0.0)
500 {
501 c[idtem] = 1.0;
502 s[idtem] = 0.0;
503 }
504 else if (abs(hh) > abs(dd))
505 {
506 temp_dbl = -dd / hh;
507 s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
508 c[idtem] = temp_dbl * s[idtem];
509 }
510 else
511 {
512 temp_dbl = -hh / dd;
513 c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
514 s[idtem] = temp_dbl * c[idtem];
515 }
516
517 hsingle[idtem] = c[idtem] * hsingle[idtem] - s[idtem] * hsingle[endtem];
518 hsingle[endtem] = 0.0;
519
520 temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
521 eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
522 eta[idtem] = temp_dbl;
523}
524
525// Backward calculation
526// To notice, Hesssenburg matrix's column
527// and row changes due to use Array<OneD,Array<OneD,NekDouble>>
528void NekLinSysIterGMRES::DoBackward(const int number,
532{
533 // Number is the entry number
534 // but C++'s order need to be one smaller
535 int maxid = number - 1;
536 NekDouble sum;
537 y[maxid] = b[maxid] / A[maxid][maxid];
538 for (int i = maxid - 1; i > -1; --i)
539 {
540 sum = b[i];
541 for (int j = i + 1; j < number; ++j)
542 {
543 // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
544 sum -= y[j] * A[j][i];
545 }
546 y[i] = sum / A[i][i];
547 }
548}
549} // namespace Nektar::LibUtilities
#define WARNINGL1(condition, msg)
Definition: ErrorUtil.hpp:243
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
int v_SolveSystem(const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir) override
NekLinSysIterGMRES(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey=NekSysKey())
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int pNumDir)
Actual iterative solve-GMRES.
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble > > &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
Array< OneD, Array< OneD, NekDouble > > m_V_total
Array< OneD, Array< OneD, NekDouble > > m_Upper
Array< OneD, Array< OneD, NekDouble > > m_hes
void DoGivensRotation(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &hsingle, Array< OneD, NekDouble > &eta)
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nGlobal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir)
Actual iterative gmres solver for one restart.
void DoArnoldi(const int starttem, const int endtem, const int nGlobal, const int nDir, Array< OneD, NekDouble > &w, Array< OneD, NekDouble > &Vsingle1, Array< OneD, NekDouble > &Vsingle2, Array< OneD, NekDouble > &hsingle)
static NekLinSysIterSharedPtr create(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey)
void Set_Rhs_Magnitude(const Array< OneD, NekDouble > &pIn)
Array< OneD, int > m_map
Global to universal unique map.
LibUtilities::CommSharedPtr m_rowComm
Definition: NekSys.h:291
NekSysOperators m_operator
Definition: NekSys.h:298
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:149
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:142
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition: Timer.cpp:70
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
@ beta
Gauss Radau pinned at x=-1,.
Definition: PointsType.h:59
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:55
static const NekDouble kNekUnsetDouble
std::vector< double > w(NPUPPER)
double NekDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition: Vmath.hpp:396
T Dot2(int n, const T *w, const T *x, const int *y)
dot product
Definition: Vmath.hpp:790
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition: Vmath.hpp:180
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.hpp:825
void Vsub(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Subtract vector z = x-y.
Definition: Vmath.hpp:220
STL namespace.
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:294