Nektar++
StorageSmvBsr.hpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: StorageSmvBsr.hpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: 0-based sparse BSR storage class with own unrolled multiply
32 // kernels.
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
36 #ifndef NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
37 #define NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
38 
39 #include <map>
40 #include <vector>
41 #include <utility>
42 #include <fstream>
43 
46 
47 namespace Nektar
48 {
49  /*
50  * Zero-based BSR (Block Sparse Row) storage class with its sparse
51  * multiply kernels built upon its own dense unrolled multiply kernels
52  * up to 4x4 matrices. When matrix is larger than or 4x4, the
53  * multiply kernel calls dense dgemv from BLAS.
54  *
55  * The BSR sparse format assumes sparse matrix is a CSR collection of
56  * dense square blocks of same size. In contrast with Nist BSR class
57  * this one uses zero-based storage. The constructor takes input matrix in
58  * block coordinate storage (BCO) sparse format.
59  *
60  */
61 
62  template<typename T>
64  {
65 
66  public:
67  typedef T DataType;
71 
72  typedef void (*MultiplyKernel)(const double*, const double*, double*);
73 
74  /// \internal
75  /// \brief Forward iterator through nonzero (double) elements of the matrix
76  /// that mimics forward iteration of BCOMatType.
77  /// It's a dirty hack, not a real iterator in the C++ sense.
79  {
80  struct IterType
81  {
82  CoordType first; //< (row, column)
83  DataType second; //< value
84  IndexType nnzindex; //< index of this nnz entry
85  IndexType storageindex;//< offset of this nnz entry in the storage
86  };
87 
88  public:
91  IndexType end,
92  IndexType blkDim,
93  const DataVectorType& val,
94  const IndexVectorType& indx,
95  const IndexVectorType& pntr);
96  const_iterator(const const_iterator& src);
98 
101  const IterType& operator*();
102  const IterType* operator->();
103  bool operator==(const const_iterator& rhs);
104  bool operator!=(const const_iterator& rhs);
105 
106  // one way conversion: iterator -> const_iterator
107  // operator const_iterator<T const, Tag>() const;
108 
109  private:
110  void forward();
112 
121  };
122 
123 
124  public:
125  // Constructs zero-based BSR sparse matrix based on input BCO storage
127  const IndexType blkCols,
128  const IndexType blkDim,
129  const BCOMatType& bcoMat,
130  const MatrixStorage matType = eFULL);
131 
132  // Copy constructor
134 
136 
143  LIB_UTILITIES_EXPORT size_t GetMemoryUsage() const;
144 
147 
149  IndexType row, IndexType column) const;
150 
151  LIB_UTILITIES_EXPORT void Multiply(const DataType* in,
152  DataType* out);
154  DataVectorType &out);
156  DataVectorType &out);
157 
158 
159  protected:
160 
161  // converts input vector of BCO blocks
162  // to the internal zero-based BSR representation
163  void processBcoInput(
164  const IndexType blkRows,
165  const IndexType blkDim,
166  const BCOMatType& bcoMat);
167 
168 
169  void Multiply_1x1(const int mb, const double* val,
170  const int* bindx, const int* bpntrb, const int* bpntre,
171  const double* b, double* c);
172 
173  void Multiply_2x2(const int mb, const double* val,
174  const int* bindx, const int* bpntrb, const int* bpntre,
175  const double* b, double* c);
176 
177  void Multiply_3x3(const int mb, const double* val,
178  const int* bindx, const int* bpntrb, const int* bpntre,
179  const double* b, double* c);
180 
181  void Multiply_4x4(const int mb, const double* val,
182  const int* bindx, const int* bpntrb, const int* bpntre,
183  const double* b, double* c);
184 
185  void Multiply_generic(const int mb, const double* val,
186  const int* bindx, const int* bpntrb, const int* bpntre,
187  const double* b, double* c);
188 
189  // interface to lowest level LibSMV multiply kernels
191 
193 
194  IndexType m_blkRows; // number of block rows
195  IndexType m_blkCols; // number of block columns
196  IndexType m_blkDim; // rank of each block
197 
198  IndexType m_bnnz; //< number of nonzero blocks
199  IndexType m_nnz; //< number of factual nonzero entries in the sparse matrix
200 
201  DataVectorType m_val; // values of non-zero entries
202  IndexVectorType m_indx; // column indices of non-zero entries
203  IndexVectorType m_pntr; // m_pntr(i) contains index in m_val of first non-zero element in row i
204 
205  private:
206 
207  };
208 
209 
210 
211 } // namespace
212 
213 #endif //NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
#define LIB_UTILITIES_EXPORT
1D Array of constant elements with garbage collection and bounds checking.
Definition: SharedArray.hpp:59
bool operator!=(const const_iterator &rhs)
CoordType storageIndexToFullCoord(IndexType storageIndex)
bool operator==(const const_iterator &rhs)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
const_iterator end() const
IndexType GetNumNonZeroEntries() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
IndexVectorType m_indx
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
Array< OneD, const DataType > ConstDataVectorType
DataType GetFillInRatio() const
size_t GetMemoryUsage() const
void Multiply(const DataType *in, DataType *out)
IndexType GetColumns() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0....
IndexType GetBlkSize() const
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
void(* MultiplyKernel)(const double *, const double *, double *)
MultiplyKernel m_mvKernel
Array< OneD, IndexType > IndexVectorType
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
Array< OneD, DataType > DataVectorType
IndexType GetRows() const
const DataType & GetValue(IndexType row, IndexType column) const
const_iterator begin() const
IndexVectorType m_pntr
IndexType GetNumStoredDoubles() const
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:1
unsigned int IndexType
std::pair< IndexType, IndexType > CoordType
std::map< CoordType, BCOEntryType > BCOMatType