Nektar++
PreconCfsBRJ.h
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: PreconCfsBRJ.h
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: PreconCfsBRJ header
32 //
33 ///////////////////////////////////////////////////////////////////////////////
34 
35 #ifndef NEKTAR_SOLVERS_COMPRESSIBLEFLOWSOLVER_PRECONCFSBRJ
36 #define NEKTAR_SOLVERS_COMPRESSIBLEFLOWSOLVER_PRECONCFSBRJ
37 
39 
40 namespace Nektar
41 {
42 
43 using namespace tinysimd;
44 
45 /**
46  * Block Relaxed(weighted) Jacobi iterative (BRJ) Preconditioner for CFS
47  *
48  */
49 class PreconCfsBRJ : public PreconCfsOp
50 {
51 public:
52  friend class MemoryManager<PreconCfsBRJ>;
53 
54  /// Creates an instance of this class
58  const LibUtilities::CommSharedPtr &vComm)
59  {
61  pFields, pSession, vComm);
62  return p;
63  }
64 
65  /// Name of the class
66  static std::string className;
67 
70  const LibUtilities::CommSharedPtr &vComm);
72 
73  virtual bool v_UpdatePreconMatCheck(const Array<OneD, const NekDouble> &res,
74  const NekDouble dtLambda) override;
75 
76 protected:
79 
81 
82  unsigned int m_max_nblocks;
83  unsigned int m_max_nElmtDof;
84  std::vector<simd<NekSingle>, tinysimd::allocator<simd<NekSingle>>>
86  std::vector<int> m_inputIdx;
87 
94 
96 
97  virtual void v_InitObject() override;
98 
99 private:
100  virtual void v_DoPreconCfs(
102  const Array<OneD, NekDouble> &pInput, Array<OneD, NekDouble> &pOutput,
103  const bool &flag) override;
104 
105  virtual void v_BuildPreconCfs(
107  const Array<OneD, const Array<OneD, NekDouble>> &intmp,
108  const NekDouble time, const NekDouble lambda) override;
109 
110  void PreconBlkDiag(
112  const Array<OneD, NekDouble> &inarray,
113  Array<OneD, NekDouble> &outarray);
114 
115  template <typename DataType>
116  void MinusOffDiag2Rhs(
118  const size_t nvariables, const size_t nCoeffs,
119  const Array<OneD, const Array<OneD, NekDouble>> &inarray,
120  Array<OneD, Array<OneD, NekDouble>> &outarray, bool flagUpdateDervFlux,
121  Array<OneD, Array<OneD, NekDouble>> &FwdFluxDeriv,
122  Array<OneD, Array<OneD, NekDouble>> &BwdFluxDeriv,
124  TensorOfArray3D<NekDouble> &wspTrace,
125  Array<OneD, Array<OneD, DataType>> &wspTraceDataType,
126  const TensorOfArray4D<DataType> &TraceJacArray,
127  const TensorOfArray4D<DataType> &TraceJacDerivArray,
128  const Array<OneD, const Array<OneD, DataType>> &TraceJacDerivSign,
129  const TensorOfArray5D<DataType> &TraceIPSymJacArray);
130 
131  template <typename TypeNekBlkMatSharedPtr>
132  void AllocatePreconBlkDiagCoeff(
135  const int &nscale = 1);
136 
137  /// This function creates the matrix structure for the block
138  /// diagonal operator. It organizes the way that the matrix
139  // is loaded, multiplied and unpacked in order to be operated
140  /// by SIMD instructions. The degrees of freedom of each element
141  /// are reorganized, so they are placed in the correct location
142  /// to perfom SIMD instructions.
145  {
146  using vec_t = simd<NekSingle>;
147 
148  int TotMatLen = 0;
149  int TotLen = 0;
150  const auto nTotElmt = pFields[0]->GetNumElmts();
151  const auto nvariables = pFields.size();
152  const auto vecwidth = vec_t::width;
153 
154  m_max_nblocks = 0;
155  m_max_nElmtDof = 0;
156 
157  for (int ne = 0; ne < nTotElmt; ne++)
158  {
159  const auto nElmtDof = pFields[0]->GetNcoeffs(ne) * nvariables;
160  const auto nblocks = nElmtDof / vecwidth;
161  unsigned int totblocks =
162  (nElmtDof % vecwidth) ? nblocks + 1 : nblocks;
163 
164  m_max_nblocks =
165  (m_max_nblocks > totblocks) ? m_max_nblocks : totblocks;
166  m_max_nElmtDof =
167  (m_max_nElmtDof > nElmtDof) ? m_max_nElmtDof : nElmtDof;
168 
169  TotLen += totblocks * vecwidth;
170  TotMatLen += nElmtDof * totblocks;
171  }
172 
173  m_sBlkDiagMat.resize(TotMatLen);
174  m_inputIdx.resize(TotLen);
175 
176  // generate a index list of vector width aware mapping from
177  // local coeff storage over all variables to elemental storage
178  // over variables
179  unsigned int ncoeffs = pFields[0]->GetNcoeffs();
180  for (int ne = 0, cnt1 = 0; ne < nTotElmt; ne++)
181  {
182  const auto nElmtCoeff = pFields[0]->GetNcoeffs(ne);
183  const auto nElmtDof = nElmtCoeff * nvariables;
184  const auto nblocks = nElmtDof / vecwidth;
185  const auto nCoefOffset = pFields[0]->GetCoeff_Offset(ne);
186  int i = 0;
187  int i0 = 0;
188  int inOffset, j;
189 
190  for (int m = 0; m < nvariables; m++)
191  {
192  inOffset = m * ncoeffs + nCoefOffset;
193 
194  if (m && (vecwidth - i0 < nElmtCoeff))
195  {
196  // May need to add entries from later variables to
197  // remainder of last variable if the vector width
198  // was not exact multiple of number of elemental
199  // coeffs
200  for (i = 0; i0 < vecwidth; ++i, ++i0)
201  {
202  m_inputIdx[cnt1++] = inOffset + i;
203  }
204  }
205  else
206  {
207  i = 0;
208  }
209 
210  // load up other vectors in variable that fit into vector
211  // width
212  for (j = 0; (j + 1) * vecwidth < nElmtCoeff - i; ++j)
213  {
214  for (i0 = 0; i0 < vecwidth; ++i0)
215  {
216  m_inputIdx[cnt1++] = inOffset + i + j * vecwidth + i0;
217  }
218  }
219 
220  // load up any residual data for this variable
221  for (i0 = 0, j = j * vecwidth; j < nElmtCoeff - i; ++j, ++i0)
222  {
223  m_inputIdx[cnt1++] = inOffset + i + j;
224  }
225  }
226 
227  const auto endwidth = nElmtDof - nblocks * vecwidth;
228 
229  // fill out rest of index to match vector width with last entry
230  if (endwidth)
231  {
232  for (i0 = endwidth; i0 < vecwidth; ++i0)
233  {
234  m_inputIdx[cnt1++] = inOffset + i + j - 1;
235  }
236  }
237  ASSERTL1(cnt1 <= TotLen, "m_inputIdx over extended");
238  }
239  }
240 
242  const Array<OneD, unsigned int> nrow,
243  const Array<OneD, unsigned int> ncol)
244  {
245  mat =
247  SNekMatSharedPtr loc_matNvar;
248  for (size_t nelm = 0; nelm < nrow.size(); ++nelm)
249  {
250  int nrowsVars = nrow[nelm];
251  int ncolsVars = ncol[nelm];
252 
254  nrowsVars, ncolsVars, 0.0);
255  mat->SetBlock(nelm, nelm, loc_matNvar);
256  }
257  }
258 };
259 } // namespace Nektar
260 
261 #endif
#define ASSERTL1(condition, msg)
Assert Level 1 – Debugging which is used whether in FULLDEBUG or DEBUG compilation mode....
Definition: ErrorUtil.hpp:249
General purpose memory allocation routines with the ability to allocate from thread specific memory p...
static std::shared_ptr< DataType > AllocateSharedPtr(const Args &...args)
Allocate a shared pointer from the memory pool.
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacSingle
Definition: PreconCfsBRJ.h:88
TensorOfArray4D< NekSingle > m_TraceJacArraySingle
Definition: PreconCfsBRJ.h:89
Array< OneD, Array< OneD, SNekBlkMatSharedPtr > > m_PreconMatVarsSingle
Definition: PreconCfsBRJ.h:80
static std::string className
Name of the class.
Definition: PreconCfsBRJ.h:66
Array< OneD, SNekBlkMatSharedPtr > m_TraceJacDerivSingle
Definition: PreconCfsBRJ.h:90
unsigned int m_max_nblocks
Definition: PreconCfsBRJ.h:82
Array< OneD, Array< OneD, NekSingle > > m_TraceJacDerivSignSingle
Definition: PreconCfsBRJ.h:92
PrecType m_PreconMatStorage
Definition: PreconCfsBRJ.h:95
TensorOfArray4D< NekSingle > m_TraceJacDerivArraySingle
Definition: PreconCfsBRJ.h:91
static PreconCfsOpSharedPtr create(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields, const LibUtilities::SessionReaderSharedPtr &pSession, const LibUtilities::CommSharedPtr &vComm)
Creates an instance of this class.
Definition: PreconCfsBRJ.h:55
unsigned int m_max_nElmtDof
Definition: PreconCfsBRJ.h:83
TensorOfArray5D< NekSingle > m_TraceIPSymJacArraySingle
Definition: PreconCfsBRJ.h:93
std::vector< int > m_inputIdx
Definition: PreconCfsBRJ.h:86
void AllocateNekBlkMatDig(SNekBlkMatSharedPtr &mat, const Array< OneD, unsigned int > nrow, const Array< OneD, unsigned int > ncol)
Definition: PreconCfsBRJ.h:241
void AllocateSIMDPreconBlkMatDiag(const Array< OneD, MultiRegions::ExpListSharedPtr > &pFields)
This function creates the matrix structure for the block diagonal operator. It organizes the way that...
Definition: PreconCfsBRJ.h:143
std::vector< simd< NekSingle >, tinysimd::allocator< simd< NekSingle > > > m_sBlkDiagMat
Definition: PreconCfsBRJ.h:85
std::shared_ptr< SessionReader > SessionReaderSharedPtr
std::shared_ptr< Comm > CommSharedPtr
Pointer to a Communicator object.
Definition: Comm.h:54
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
std::shared_ptr< SNekBlkMat > SNekBlkMatSharedPtr
std::shared_ptr< SNekMat > SNekMatSharedPtr
PrecType
Definition: PreconCfs.h:46
tinysimd::simd< NekDouble > vec_t
std::shared_ptr< PreconCfsOp > PreconCfsOpSharedPtr
Definition: PreconCfsOp.h:152
double NekDouble
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80
boost::alignment::aligned_allocator< T, T::alignment > allocator
Definition: allocator.hpp:49