Nektar++
Loading...
Searching...
No Matches
NekLinSysIterGMRESLoc.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: NekLinSysIterGMRESLoc.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: NekLinSysIterGMRESLoc definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39using namespace std;
40
42{
43/**
44 * @class NekLinSysIterGMRESLoc
45 *
46 * Solves a linear system using iterative methods using local storage rather
47 * than global
48 */
52 "NekLinSysIterGMRES local storage solver.");
53
56 const LibUtilities::CommSharedPtr &vRowComm, const int nDimen,
57 const NekSysKey &pKey)
58 : NekLinSysIter(pSession, vRowComm, nDimen, pKey)
59{
62
64
68
70
71 m_isLocal = true;
72
73 // Allocate array storage of coefficients
74 // Hessenburg matrix
76 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
77 {
79 }
80 // Hesseburg matrix after rotation
82 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
83 {
85 }
86 // Total search directions
88}
89
94
95/**
96 *
97 */
99 const int nLocal, const Array<OneD, const NekDouble> &pInput,
100 Array<OneD, NekDouble> &pOutput, [[maybe_unused]] const int nDir)
101{
102 int niterations = DoGMRES(nLocal, pInput, pOutput);
103
104 return niterations;
105}
106
108 const Array<OneD, NekDouble> &rhs,
110 [[maybe_unused]] const int nDir,
111 NekDouble &err, int &iter)
112{
113 iter = DoGMRES(nGlobal, rhs, x);
114 err = m_finalError;
115}
116
117/**  
118 * Solve a global linear system using the Gmres 
119 * We solve only for the non-Dirichlet modes. The operator is evaluated  
120 * using an auxiliary function v_DoMatrixMultiply defined by the  
121 * specific solver. Distributed math routines are used to support  
122 * parallel execution of the solver.  
123 *  
124 * @param pInput Input residual in local format.
125 * @param pOutput Solution vector in local format but with continuous
126 * values
127 */
128
130 const Array<OneD, const NekDouble> &pInput,
131 Array<OneD, NekDouble> &pOutput)
132{
135 {
136 Set_Rhs_Magnitude(pInput);
137 }
138
139 NekDouble eps = 0.0;
141
143 m_converged = false;
144
145 bool restarted = false;
146 bool truncted = false;
147
149 {
150 truncted = true;
151 }
152
153 for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
154 {
155 eps = DoGmresRestart(restarted, truncted, nLocal, pInput, pOutput);
156
157 if (m_converged)
158 {
159 break;
160 }
161 restarted = true;
162 }
163
164 if (m_verbose)
165 {
166 Array<OneD, NekDouble> r0(nLocal);
167 Array<OneD, NekDouble> wk(nLocal);
168
169 // calculate difference in residual of solution
171
172 // Note this is finding the difference between the whole
173 // residual not jsut the non-Dirichlet values.
174 // Probably OK since just an monitoring output?
175 Vmath::Vsub(nLocal, pInput, 1, r0, 1, r0, 1);
176
177 m_operator.DoAssembleLoc(r0, wk, true);
178 NekDouble vExchange = Vmath::Dot(nLocal, wk, r0);
179
180 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
181 NekDouble eps1 = vExchange;
182
183 if (m_root)
184 {
185 cout << "GMRES iterations made = " << m_totalIterations
186 << " using tolerance of " << m_NekLinSysTolerance
187 << " (error = " << m_finalError
188 << ", rhs_mag = " << sqrt(m_rhs_magnitude)
189 << " with (GMRES eps = " << eps << " REAL eps= " << eps1
190 << ")";
191
192 // Append appropriate message when finalising GMRES
193 if (m_converged)
194 {
195 cout << " CONVERGED" << endl;
196 }
197 else
198 {
199 cout << " WARNING: Exceeded maxIt" << endl;
200 }
201 }
202 }
203
204 if (m_FlagWarnings)
205 {
206 WARNINGL1(m_converged, "GMRES did not converge.");
207 }
208 return m_totalIterations;
209}
210
212 const bool restarted, const bool truncted, const int nLocal,
214{
215 // Allocate array storage of coefficients
216 // Residual
218 // Givens rotation c
220 // Givens rotation s
222 // Total coefficients, just for check
224 // Search direction order
228 // Temporary variables
229 int idtem;
230 int starttem;
231 int endtem;
232
233 NekDouble eps;
234 NekDouble beta, alpha;
235 NekDouble vExchange = 0;
236 // Temporary Array
237 Array<OneD, NekDouble> w(nLocal, 0.0);
238 Array<OneD, NekDouble> wk(nLocal, 0.0);
239 Array<OneD, NekDouble> r0(nLocal, 0.0);
244
245 if (restarted)
246 {
247 // This is A*x
249
250 // The first search direction
251 beta = -1.0;
252
253 // This is r0 = b-A*x
254 Vmath::Svtvp(nLocal, beta, r0, 1, pInput, 1, r0, 1);
255 }
256 else
257 {
258 // If not restarted, x0 should be zero
259 Vmath::Vcopy(nLocal, pInput, 1, r0, 1);
260 }
261
263 {
265 }
266
267 // Norm of (r0)
268 // The m_map tells how to connect
269 m_operator.DoAssembleLoc(r0, wk, true);
270 vExchange = Vmath::Dot(nLocal, wk, r0);
271 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
272 eps = vExchange;
273
274 if (!restarted)
275 {
277 {
279 {
280 // Evaluate initial residual error for exit check
281 ASSERTL0(false, "Need to set up/debugging");
282
283 m_operator.DoAssembleLoc(pInput, wk, true);
284 vExchange = Vmath::Dot(nLocal, wk, pInput);
285 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
286 m_prec_factor = vExchange / eps;
287 }
288 else
289 {
290 m_prec_factor = 1.0;
291 }
292 }
293 }
294
295 Vmath::Smul(nLocal, sqrt(m_prec_factor), r0, 1, r0, 1);
296 eps = eps * m_prec_factor;
297 eta[0] = sqrt(eps);
298
299 // Give an order for the entries in Hessenburg matrix
300 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
301 {
302 id[nd] = nd;
303 id_end[nd] = nd + 1;
304 starttem = id_end[nd] - m_KrylovMaxHessMatBand;
305 if (truncted && (starttem) > 0)
306 {
307 id_start[nd] = starttem;
308 }
309 else
310 {
311 id_start[nd] = 0;
312 }
313 }
314
315 // Normlized by r0 norm V(:,1)=r0/norm(r0)
316 alpha = 1.0 / eta[0];
317
318 // Scalar multiplication
319 if (m_V_total[0].size() == 0)
320 {
321 m_V_total[0] = Array<OneD, NekDouble>(nLocal, 0.0);
322 }
323 Vmath::Smul(nLocal, alpha, r0, 1, m_V_total[0], 1);
324
325 // restarted Gmres(m) process
327 {
328 V1 = Array<OneD, NekDouble>(nLocal, 0.0);
329 }
330
331 int nswp = 0;
332 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
333 {
334 if (m_V_total[nd + 1].size() == 0)
335 {
336 m_V_total[nd + 1] = Array<OneD, NekDouble>(nLocal, 0.0);
337 }
338 Vmath::Zero(nLocal, m_V_total[nd + 1], 1);
340 V2 = m_V_total[nd + 1];
341 h1 = m_hes[nd];
342
344 {
345 m_operator.DoNekSysPrecon(m_V_total[nd], V1, true);
346 }
347 else
348 {
349 V1 = m_V_total[nd];
350 }
351
352 // w here is no need to add nDir due to temporary Array
353 idtem = id[nd];
354 starttem = id_start[idtem];
355 endtem = id_end[idtem];
356
357 DoArnoldi(starttem, endtem, nLocal, w, wk, V1, V2, h1);
358
359 if (starttem > 0)
360 {
361 starttem = starttem - 1;
362 }
363
364 h2 = m_Upper[nd];
365 Vmath::Vcopy(m_LinSysMaxStorage + 1, &h1[0], 1, &h2[0], 1);
366 DoGivensRotation(starttem, endtem, cs, sn, h2, eta);
367
368 eps = eta[nd + 1] * eta[nd + 1];
369
370 // This Gmres merge truncted Gmres to accelerate.
371 // If truncted, cannot jump out because
372 // the last term of eta is not residual
373 if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
374 {
376 m_rhs_magnitude)) //&& nd > 0)
377 {
378 m_converged = true;
379 }
380 }
381 nswp++;
383
384 if (m_converged)
385 {
386 break;
387 }
388 }
389
390 DoBackward(nswp, m_Upper, eta, y_total);
391
392 // calculate output y_total*V_total
393 Array<OneD, NekDouble> solution(nLocal, 0.0);
394 for (int i = 0; i < nswp; ++i)
395 {
396 beta = y_total[i];
397 Vmath::Svtvp(nLocal, beta, m_V_total[i], 1, solution, 1, solution, 1);
398 }
399
401 {
402 m_operator.DoNekSysPrecon(solution, solution, true);
403 }
404
405 // Update output.
406 Vmath::Vadd(nLocal, solution, 1, pOutput, 1, pOutput, 1);
407
408 return eps;
409}
410
411// Arnoldi Subroutine
412void NekLinSysIterGMRESLoc::DoArnoldi(const int starttem, const int endtem,
413 const int nLocal,
419{
420 NekDouble alpha, beta, vExchange = 0.0;
422 timer.Start();
424 timer.Stop();
425 timer.AccumulateRegion("NekSysOperators::DoNekSysLhsEval", 10);
426
428 {
430 }
431
432 Vmath::Smul(nLocal, sqrt(m_prec_factor), w, 1, w, 1);
433
434 // Modified Gram-Schmidt
435 for (int i = starttem; i < endtem; ++i)
436 {
437 // Do inner product on equivalent of global values excluding
438 // Diriclet conditions. To do this need to assmble (and
439 // scatter back vector and then zero Dirichlet conditions.
440 m_operator.DoAssembleLoc(m_V_total[i], wk, true);
441 vExchange = Vmath::Dot(nLocal, wk, w);
442 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
443
444 h[i] = vExchange;
445
446 beta = -1.0 * vExchange;
447 Vmath::Svtvp(nLocal, beta, m_V_total[i], 1, w, 1, w, 1);
448 }
449 // end of Modified Gram-Schmidt
450
451 // calculate the L2 norm and normalize
452 m_operator.DoAssembleLoc(w, wk, true);
453 vExchange = Vmath::Dot(nLocal, wk, w);
454 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
455
456 h[endtem] = sqrt(vExchange);
457
458 alpha = 1.0 / h[endtem];
459 Vmath::Smul(nLocal, alpha, w, 1, V2, 1);
460}
461
462// QR factorization through Givens rotation -> Put into a helper class
464 const int endtem,
469{
470 NekDouble temp_dbl;
471 NekDouble dd;
472 NekDouble hh;
473 int idtem = endtem - 1;
474 // The starttem and endtem are beginning and ending order of Givens rotation
475 // They usually equal to the beginning position and ending position of
476 // Hessenburg matrix But sometimes starttem will change, like if it is
477 // initial 0 and becomes nonzero because previous Givens rotation See Yu
478 // Pan's User Guide
479 for (int i = starttem; i < idtem; ++i)
480 {
481 temp_dbl = c[i] * h[i] - s[i] * h[i + 1];
482 h[i + 1] = s[i] * h[i] + c[i] * h[i + 1];
483 h[i] = temp_dbl;
484 }
485 dd = h[idtem];
486 hh = h[endtem];
487 if (hh == 0.0)
488 {
489 c[idtem] = 1.0;
490 s[idtem] = 0.0;
491 }
492 else if (abs(hh) > abs(dd))
493 {
494 temp_dbl = -dd / hh;
495 s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
496 c[idtem] = temp_dbl * s[idtem];
497 }
498 else
499 {
500 temp_dbl = -hh / dd;
501 c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
502 s[idtem] = temp_dbl * c[idtem];
503 }
504
505 h[idtem] = c[idtem] * h[idtem] - s[idtem] * h[endtem];
506 h[endtem] = 0.0;
507
508 temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
509 eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
510 eta[idtem] = temp_dbl;
511}
512
513// Backward calculation
514// To notice, Hesssenburg matrix's column
515// and row changes due to use Array<OneD,Array<OneD,NekDouble>> --> Put into a
516// helper class
521{
522 // Number is the entry number
523 // but C++'s order need to be one smaller
524 int maxid = number - 1;
525 NekDouble sum;
526 y[maxid] = b[maxid] / A[maxid][maxid];
527 for (int i = maxid - 1; i > -1; --i)
528 {
529 sum = b[i];
530 for (int j = i + 1; j < number; ++j)
531 {
532 // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
533 sum -= y[j] * A[j][i];
534 }
535 y[i] = sum / A[i][i];
536 }
537}
538} // namespace Nektar::LibUtilities
#define WARNINGL1(condition, msg)
#define ASSERTL0(condition, msg)
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
static NekLinSysIterSharedPtr create(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey)
Array< OneD, Array< OneD, NekDouble > > m_V_total
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nLocal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput)
Actual iterative gmres solver for one restart.
void DoArnoldi(const int starttem, const int endtem, const int nLocal, Array< OneD, NekDouble > &w, Array< OneD, NekDouble > &wk, Array< OneD, NekDouble > &V1, Array< OneD, NekDouble > &V2, Array< OneD, NekDouble > &h)
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble > > &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
void DoGivensRotation(const int starttem, const int endtem, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &h, Array< OneD, NekDouble > &eta)
void v_DoIterate(const int nGlobal, const Array< OneD, NekDouble > &rhs, Array< OneD, NekDouble > &x, const int nDir, NekDouble &err, int &iter) override
int v_SolveSystem(const int nLocal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir) override
NekLinSysIterGMRESLoc(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey=NekSysKey())
Array< OneD, Array< OneD, NekDouble > > m_hes
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput)
Actual iterative solve-GMRES.
Array< OneD, Array< OneD, NekDouble > > m_Upper
void Set_Rhs_Magnitude(const Array< OneD, NekDouble > &pIn)
LibUtilities::CommSharedPtr m_rowComm
Definition NekSys.h:299
NekSysOperators m_operator
Definition NekSys.h:306
void DoAssembleLoc(InArrayType &xn, OutArrayType &xn1, const bool &flag=false) const
Definition NekSys.h:168
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition NekSys.h:155
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition NekSys.h:148
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition Timer.cpp:70
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
@ beta
Gauss Radau pinned at x=-1,.
Definition PointsType.h:59
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition Comm.h:55
static const NekDouble kNekUnsetDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition Vmath.hpp:396
T Dot(int n, const T *w, const T *x)
dot product
Definition Vmath.hpp:761
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition Vmath.hpp:180
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition Vmath.hpp:825
void Vsub(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Subtract vector z = x-y.
Definition Vmath.hpp:220
STL namespace.
scalarT< T > abs(scalarT< T > in)
Definition scalar.hpp:295
scalarT< T > min(scalarT< T > lhs, scalarT< T > rhs)
Definition scalar.hpp:300
scalarT< T > sqrt(scalarT< T > in)
Definition scalar.hpp:290