Nektar++
NekLinSysIterGMRESLoc.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: NekLinSysIterGMRESLoc.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// License for the specific language governing rights and limitations under
14// Permission is hereby granted, free of charge, to any person obtaining a
15// copy of this software and associated documentation files (the "Software"),
16// to deal in the Software without restriction, including without limitation
17// the rights to use, copy, modify, merge, publish, distribute, sublicense,
18// and/or sell copies of the Software, and to permit persons to whom the
19// Software is furnished to do so, subject to the following conditions:
20//
21// The above copyright notice and this permission notice shall be included
22// in all copies or substantial portions of the Software.
23//
24// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30// DEALINGS IN THE SOFTWARE.
31//
32// Description: NekLinSysIterGMRESLoc definition
33//
34///////////////////////////////////////////////////////////////////////////////
35
38
39using namespace std;
40
42{
43/**
44 * @class NekLinSysIterGMRESLoc
45 *
46 * Solves a linear system using iterative methods using local storage rather
47 * than global
48 */
52 "NekLinSysIterGMRES local storage solver.");
53
56 const LibUtilities::CommSharedPtr &vRowComm, const int nDimen,
57 const NekSysKey &pKey)
58 : NekLinSysIter(pSession, vRowComm, nDimen, pKey)
59{
62
64
68
70
71 m_isLocal = true;
72
73 // Allocate array storage of coefficients
74 // Hessenburg matrix
76 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
77 {
79 }
80 // Hesseburg matrix after rotation
82 for (size_t nd = 0; nd < m_LinSysMaxStorage; nd++)
83 {
85 }
86 // Total search directions
88}
89
91{
93}
94
95/**
96 *
97 */
99 const int nLocal, const Array<OneD, const NekDouble> &pInput,
100 Array<OneD, NekDouble> &pOutput, [[maybe_unused]] const int nDir)
101{
102 int niterations = DoGMRES(nLocal, pInput, pOutput);
103
104 return niterations;
105}
106
107/**  
108 * Solve a global linear system using the Gmres 
109 * We solve only for the non-Dirichlet modes. The operator is evaluated  
110 * using an auxiliary function v_DoMatrixMultiply defined by the  
111 * specific solver. Distributed math routines are used to support  
112 * parallel execution of the solver.  
113 *  
114 * @param pInput Input residual in local format.
115 * @param pOutput Solution vector in local format but with continuous
116 * values
117 */
118
120 const Array<OneD, const NekDouble> &pInput,
121 Array<OneD, NekDouble> &pOutput)
122{
125 {
126 Set_Rhs_Magnitude(pInput);
127 }
128
129 NekDouble eps = 0.0;
131
133 m_converged = false;
134
135 bool restarted = false;
136 bool truncted = false;
137
139 {
140 truncted = true;
141 }
142
143 for (int nrestart = 0; nrestart < m_maxrestart; ++nrestart)
144 {
145 eps = DoGmresRestart(restarted, truncted, nLocal, pInput, pOutput);
146
147 if (m_converged)
148 {
149 break;
150 }
151 restarted = true;
152 }
153
154 if (m_verbose)
155 {
156 Array<OneD, NekDouble> r0(nLocal);
157 Array<OneD, NekDouble> wk(nLocal);
158
159 // calculate difference in residual of solution
161
162 // Note this is finding the difference between the whole
163 // residual not jsut the non-Dirichlet values.
164 // Probably OK since just an monitoring output?
165 Vmath::Vsub(nLocal, pInput, 1, r0, 1, r0, 1);
166
167 m_operator.DoAssembleLoc(r0, wk, true);
168 NekDouble vExchange = Vmath::Dot(nLocal, wk, r0);
169
170 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
171 NekDouble eps1 = vExchange;
172
173 if (m_root)
174 {
175 int nwidthcolm = 13;
176
177 cout << std::scientific << std::setw(nwidthcolm)
178 << std::setprecision(nwidthcolm - 8)
179 << " GMRES iterations made = " << m_totalIterations
180 << " using tolerance of " << m_NekLinSysTolerance
181 << " (error = " << sqrt(eps * m_prec_factor / m_rhs_magnitude)
182 << ")";
183
184 cout << " WITH (GMRES eps = " << eps << " REAL eps= " << eps1
185 << ")";
186
187 if (m_converged)
188 {
189 cout << " CONVERGED" << endl;
190 }
191 else
192 {
193 cout << " WARNING: Exceeded maxIt" << endl;
194 }
195 }
196 }
197
198 if (m_FlagWarnings)
199 {
200 WARNINGL1(m_converged, "GMRES did not converge.");
201 }
202 return m_totalIterations;
203}
204
206 const bool restarted, const bool truncted, const int nLocal,
208{
209 // Allocate array storage of coefficients
210 // Residual
212 // Givens rotation c
214 // Givens rotation s
216 // Total coefficients, just for check
218 // Search direction order
222 // Temporary variables
223 int idtem;
224 int starttem;
225 int endtem;
226
227 NekDouble eps;
228 NekDouble beta, alpha;
229 NekDouble vExchange = 0;
230 // Temporary Array
231 Array<OneD, NekDouble> w(nLocal, 0.0);
232 Array<OneD, NekDouble> wk(nLocal, 0.0);
233 Array<OneD, NekDouble> r0(nLocal, 0.0);
238
239 if (restarted)
240 {
241 // This is A*x
243
244 // The first search direction
245 beta = -1.0;
246
247 // This is r0 = b-A*x
248 Vmath::Svtvp(nLocal, beta, r0, 1, pInput, 1, r0, 1);
249 }
250 else
251 {
252 // If not restarted, x0 should be zero
253 Vmath::Vcopy(nLocal, pInput, 1, r0, 1);
254 }
255
257 {
259 }
260
261 // Norm of (r0)
262 // The m_map tells how to connect
263 m_operator.DoAssembleLoc(r0, wk, true);
264 vExchange = Vmath::Dot(nLocal, wk, r0);
265 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
266 eps = vExchange;
267
268 if (!restarted)
269 {
271 {
273 {
274 // Evaluate initial residual error for exit check
275 ASSERTL0(false, "Need to set up/debugging");
276
277 m_operator.DoAssembleLoc(pInput, wk, true);
278 vExchange = Vmath::Dot(nLocal, wk, pInput);
279 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
280 m_prec_factor = vExchange / eps;
281 }
282 else
283 {
284 m_prec_factor = 1.0;
285 }
286 }
287 }
288
289 Vmath::Smul(nLocal, sqrt(m_prec_factor), r0, 1, r0, 1);
290 eps = eps * m_prec_factor;
291 eta[0] = sqrt(eps);
292
293 // Give an order for the entries in Hessenburg matrix
294 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
295 {
296 id[nd] = nd;
297 id_end[nd] = nd + 1;
298 starttem = id_end[nd] - m_KrylovMaxHessMatBand;
299 if (truncted && (starttem) > 0)
300 {
301 id_start[nd] = starttem;
302 }
303 else
304 {
305 id_start[nd] = 0;
306 }
307 }
308
309 // Normlized by r0 norm V(:,1)=r0/norm(r0)
310 alpha = 1.0 / eta[0];
311
312 // Scalar multiplication
313 if (m_V_total[0].size() == 0)
314 {
315 m_V_total[0] = Array<OneD, NekDouble>(nLocal, 0.0);
316 }
317 Vmath::Smul(nLocal, alpha, r0, 1, m_V_total[0], 1);
318
319 // restarted Gmres(m) process
321 {
322 V1 = Array<OneD, NekDouble>(nLocal, 0.0);
323 }
324
325 int nswp = 0;
326 for (int nd = 0; nd < m_LinSysMaxStorage; ++nd)
327 {
328 if (m_V_total[nd + 1].size() == 0)
329 {
330 m_V_total[nd + 1] = Array<OneD, NekDouble>(nLocal, 0.0);
331 }
332 Vmath::Zero(nLocal, m_V_total[nd + 1], 1);
334 V2 = m_V_total[nd + 1];
335 h1 = m_hes[nd];
336
338 {
339 m_operator.DoNekSysPrecon(m_V_total[nd], V1, true);
340 }
341 else
342 {
343 V1 = m_V_total[nd];
344 }
345
346 // w here is no need to add nDir due to temporary Array
347 idtem = id[nd];
348 starttem = id_start[idtem];
349 endtem = id_end[idtem];
350
351 DoArnoldi(starttem, endtem, nLocal, w, wk, V1, V2, h1);
352
353 if (starttem > 0)
354 {
355 starttem = starttem - 1;
356 }
357
358 h2 = m_Upper[nd];
359 Vmath::Vcopy(m_LinSysMaxStorage + 1, &h1[0], 1, &h2[0], 1);
360 DoGivensRotation(starttem, endtem, cs, sn, h2, eta);
361
362 eps = eta[nd + 1] * eta[nd + 1];
363
364 // This Gmres merge truncted Gmres to accelerate.
365 // If truncted, cannot jump out because
366 // the last term of eta is not residual
367 if ((!truncted) || (nd < m_KrylovMaxHessMatBand))
368 {
370 m_rhs_magnitude)) //&& nd > 0)
371 {
372 m_converged = true;
373 }
374 }
375 nswp++;
377
378 if (m_converged)
379 {
380 break;
381 }
382 }
383
384 DoBackward(nswp, m_Upper, eta, y_total);
385
386 // calculate output y_total*V_total
387 Array<OneD, NekDouble> solution(nLocal, 0.0);
388 for (int i = 0; i < nswp; ++i)
389 {
390 beta = y_total[i];
391 Vmath::Svtvp(nLocal, beta, m_V_total[i], 1, solution, 1, solution, 1);
392 }
393
395 {
396 m_operator.DoNekSysPrecon(solution, solution, true);
397 }
398
399 // Update output.
400 Vmath::Vadd(nLocal, solution, 1, pOutput, 1, pOutput, 1);
401
402 return eps;
403}
404
405// Arnoldi Subroutine
406void NekLinSysIterGMRESLoc::DoArnoldi(const int starttem, const int endtem,
407 const int nLocal,
413{
414 NekDouble alpha, beta, vExchange = 0.0;
416 timer.Start();
418 timer.Stop();
419 timer.AccumulateRegion("NekSysOperators::DoNekSysLhsEval", 10);
420
422 {
424 }
425
426 Vmath::Smul(nLocal, sqrt(m_prec_factor), w, 1, w, 1);
427
428 // Modified Gram-Schmidt
429 for (int i = starttem; i < endtem; ++i)
430 {
431 // Do inner product on equivalent of global values excluding
432 // Diriclet conditions. To do this need to assmble (and
433 // scatter back vector and then zero Dirichlet conditions.
434 m_operator.DoAssembleLoc(m_V_total[i], wk, true);
435 vExchange = Vmath::Dot(nLocal, wk, w);
436 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
437
438 h[i] = vExchange;
439
440 beta = -1.0 * vExchange;
441 Vmath::Svtvp(nLocal, beta, m_V_total[i], 1, w, 1, w, 1);
442 }
443 // end of Modified Gram-Schmidt
444
445 // calculate the L2 norm and normalize
446 m_operator.DoAssembleLoc(w, wk, true);
447 vExchange = Vmath::Dot(nLocal, wk, w);
448 m_rowComm->AllReduce(vExchange, LibUtilities::ReduceSum);
449
450 h[endtem] = sqrt(vExchange);
451
452 alpha = 1.0 / h[endtem];
453 Vmath::Smul(nLocal, alpha, w, 1, V2, 1);
454}
455
456// QR factorization through Givens rotation -> Put into a helper class
458 const int endtem,
463{
464 NekDouble temp_dbl;
465 NekDouble dd;
466 NekDouble hh;
467 int idtem = endtem - 1;
468 // The starttem and endtem are beginning and ending order of Givens rotation
469 // They usually equal to the beginning position and ending position of
470 // Hessenburg matrix But sometimes starttem will change, like if it is
471 // initial 0 and becomes nonzero because previous Givens rotation See Yu
472 // Pan's User Guide
473 for (int i = starttem; i < idtem; ++i)
474 {
475 temp_dbl = c[i] * h[i] - s[i] * h[i + 1];
476 h[i + 1] = s[i] * h[i] + c[i] * h[i + 1];
477 h[i] = temp_dbl;
478 }
479 dd = h[idtem];
480 hh = h[endtem];
481 if (hh == 0.0)
482 {
483 c[idtem] = 1.0;
484 s[idtem] = 0.0;
485 }
486 else if (abs(hh) > abs(dd))
487 {
488 temp_dbl = -dd / hh;
489 s[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
490 c[idtem] = temp_dbl * s[idtem];
491 }
492 else
493 {
494 temp_dbl = -hh / dd;
495 c[idtem] = 1.0 / sqrt(1.0 + temp_dbl * temp_dbl);
496 s[idtem] = temp_dbl * c[idtem];
497 }
498
499 h[idtem] = c[idtem] * h[idtem] - s[idtem] * h[endtem];
500 h[endtem] = 0.0;
501
502 temp_dbl = c[idtem] * eta[idtem] - s[idtem] * eta[endtem];
503 eta[endtem] = s[idtem] * eta[idtem] + c[idtem] * eta[endtem];
504 eta[idtem] = temp_dbl;
505}
506
507// Backward calculation
508// To notice, Hesssenburg matrix's column
509// and row changes due to use Array<OneD,Array<OneD,NekDouble>> --> Put into a
510// helper class
515{
516 // Number is the entry number
517 // but C++'s order need to be one smaller
518 int maxid = number - 1;
519 NekDouble sum;
520 y[maxid] = b[maxid] / A[maxid][maxid];
521 for (int i = maxid - 1; i > -1; --i)
522 {
523 sum = b[i];
524 for (int j = i + 1; j < number; ++j)
525 {
526 // i and j changes due to use Array<OneD,Array<OneD,NekDouble>>
527 sum -= y[j] * A[j][i];
528 }
529 y[i] = sum / A[i][i];
530 }
531}
532} // namespace Nektar::LibUtilities
#define WARNINGL1(condition, msg)
Definition: ErrorUtil.hpp:243
#define ASSERTL0(condition, msg)
Definition: ErrorUtil.hpp:208
tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, std::string pDesc="")
Register a class with the factory.
static NekLinSysIterSharedPtr create(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey)
Array< OneD, Array< OneD, NekDouble > > m_V_total
NekDouble DoGmresRestart(const bool restarted, const bool truncted, const int nLocal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput)
Actual iterative gmres solver for one restart.
void DoArnoldi(const int starttem, const int endtem, const int nLocal, Array< OneD, NekDouble > &w, Array< OneD, NekDouble > &wk, Array< OneD, NekDouble > &V1, Array< OneD, NekDouble > &V2, Array< OneD, NekDouble > &h)
void DoBackward(const int number, Array< OneD, Array< OneD, NekDouble > > &A, const Array< OneD, const NekDouble > &b, Array< OneD, NekDouble > &y)
void DoGivensRotation(const int starttem, const int endtem, Array< OneD, NekDouble > &c, Array< OneD, NekDouble > &s, Array< OneD, NekDouble > &h, Array< OneD, NekDouble > &eta)
int v_SolveSystem(const int nLocal, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput, const int nDir) override
NekLinSysIterGMRESLoc(const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vRowComm, const int nDimen, const NekSysKey &pKey=NekSysKey())
Array< OneD, Array< OneD, NekDouble > > m_hes
int DoGMRES(const int pNumRows, const Array< OneD, const NekDouble > &pInput, Array< OneD, NekDouble > &pOutput)
Actual iterative solve-GMRES.
Array< OneD, Array< OneD, NekDouble > > m_Upper
void Set_Rhs_Magnitude(const Array< OneD, NekDouble > &pIn)
LibUtilities::CommSharedPtr m_rowComm
Definition: NekSys.h:291
NekSysOperators m_operator
Definition: NekSys.h:298
void DoAssembleLoc(InArrayType &xn, OutArrayType &xn1, const bool &flag=false) const
Definition: NekSys.h:162
void DoNekSysPrecon(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:149
void DoNekSysLhsEval(InArrayType &inarray, OutArrayType &outarray, const bool &flag=false) const
Definition: NekSys.h:142
void AccumulateRegion(std::string, int iolevel=0)
Accumulate elapsed time for a region.
Definition: Timer.cpp:70
std::shared_ptr< SessionReader > SessionReaderSharedPtr
NekLinSysIterFactory & GetNekLinSysIterFactory()
@ beta
Gauss Radau pinned at x=-1,.
Definition: PointsType.h:59
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:55
static const NekDouble kNekUnsetDouble
std::vector< double > w(NPUPPER)
double NekDouble
void Svtvp(int n, const T alpha, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Svtvp (scalar times vector plus vector): z = alpha*x + y.
Definition: Vmath.hpp:396
T Dot(int n, const T *w, const T *x)
dot product
Definition: Vmath.hpp:761
void Vadd(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Add vector z = x+y.
Definition: Vmath.hpp:180
void Smul(int n, const T alpha, const T *x, const int incx, T *y, const int incy)
Scalar multiply y = alpha*x.
Definition: Vmath.hpp:100
void Zero(int n, T *x, const int incx)
Zero vector.
Definition: Vmath.hpp:273
void Vcopy(int n, const T *x, const int incx, T *y, const int incy)
Definition: Vmath.hpp:825
void Vsub(int n, const T *x, const int incx, const T *y, const int incy, T *z, const int incz)
Subtract vector z = x-y.
Definition: Vmath.hpp:220
STL namespace.
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:289
scalarT< T > sqrt(scalarT< T > in)
Definition: scalar.hpp:285