Nektar++
StorageSmvBsr.hpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: StorageSmvBsr.hpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: 0-based sparse BSR storage class with own unrolled multiply
32 // kernels.
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
36 #ifndef NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
37 #define NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
38 
39 #include <fstream>
40 #include <map>
41 #include <utility>
42 #include <vector>
43 
46 
47 namespace Nektar
48 {
49 /*
50  * Zero-based BSR (Block Sparse Row) storage class with its sparse
51  * multiply kernels built upon its own dense unrolled multiply kernels
52  * up to 4x4 matrices. When matrix is larger than or 4x4, the
53  * multiply kernel calls dense dgemv from BLAS.
54  *
55  * The BSR sparse format assumes sparse matrix is a CSR collection of
56  * dense square blocks of same size. In contrast with Nist BSR class
57  * this one uses zero-based storage. The constructor takes input matrix in
58  * block coordinate storage (BCO) sparse format.
59  *
60  */
61 
62 template <typename T> class StorageSmvBsr
63 {
64 
65 public:
66  typedef T DataType;
70 
71  typedef void (*MultiplyKernel)(const double *, const double *, double *);
72 
73  /// \internal
74  /// \brief Forward iterator through nonzero (double) elements of the matrix
75  /// that mimics forward iteration of BCOMatType.
76  /// It's a dirty hack, not a real iterator in the C++ sense.
78  {
79  struct IterType
80  {
81  CoordType first; //< (row, column)
82  DataType second; //< value
83  IndexType nnzindex; //< index of this nnz entry
84  IndexType storageindex; //< offset of this nnz entry in the storage
85  };
86 
87  public:
89  IndexType blkDim, const DataVectorType &val,
90  const IndexVectorType &indx,
91  const IndexVectorType &pntr);
92  const_iterator(const const_iterator &src);
94 
97  const IterType &operator*();
98  const IterType *operator->();
99  bool operator==(const const_iterator &rhs);
100  bool operator!=(const const_iterator &rhs);
101 
102  // one way conversion: iterator -> const_iterator
103  // operator const_iterator<T const, Tag>() const;
104 
105  private:
106  void forward();
108 
117  };
118 
119 public:
120  // Constructs zero-based BSR sparse matrix based on input BCO storage
122  const IndexType blkCols,
123  const IndexType blkDim,
124  const BCOMatType &bcoMat,
125  const MatrixStorage matType = eFULL);
126 
127  // Copy constructor
129 
131 
138  LIB_UTILITIES_EXPORT size_t GetMemoryUsage() const;
139 
142 
144  IndexType column) const;
145 
146  LIB_UTILITIES_EXPORT void Multiply(const DataType *in, DataType *out);
148  DataVectorType &out);
150  DataVectorType &out);
151 
152 protected:
153  // converts input vector of BCO blocks
154  // to the internal zero-based BSR representation
155  void processBcoInput(const IndexType blkRows, const IndexType blkDim,
156  const BCOMatType &bcoMat);
157 
158  void Multiply_1x1(const int mb, const double *val, const int *bindx,
159  const int *bpntrb, const int *bpntre, const double *b,
160  double *c);
161 
162  void Multiply_2x2(const int mb, const double *val, const int *bindx,
163  const int *bpntrb, const int *bpntre, const double *b,
164  double *c);
165 
166  void Multiply_3x3(const int mb, const double *val, const int *bindx,
167  const int *bpntrb, const int *bpntre, const double *b,
168  double *c);
169 
170  void Multiply_4x4(const int mb, const double *val, const int *bindx,
171  const int *bpntrb, const int *bpntre, const double *b,
172  double *c);
173 
174  void Multiply_generic(const int mb, const double *val, const int *bindx,
175  const int *bpntrb, const int *bpntre, const double *b,
176  double *c);
177 
178  // interface to lowest level LibSMV multiply kernels
180 
182 
183  IndexType m_blkRows; // number of block rows
184  IndexType m_blkCols; // number of block columns
185  IndexType m_blkDim; // rank of each block
186 
187  IndexType m_bnnz; //< number of nonzero blocks
188  IndexType m_nnz; //< number of factual nonzero entries in the sparse matrix
189 
190  DataVectorType m_val; // values of non-zero entries
191  IndexVectorType m_indx; // column indices of non-zero entries
192  IndexVectorType m_pntr; // m_pntr(i) contains index in m_val of first
193  // non-zero element in row i
194 
195 private:
196 };
197 
198 } // namespace Nektar
199 
200 #endif // NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
#define LIB_UTILITIES_EXPORT
1D Array of constant elements with garbage collection and bounds checking.
Definition: SharedArray.hpp:58
bool operator!=(const const_iterator &rhs)
CoordType storageIndexToFullCoord(IndexType storageIndex)
bool operator==(const const_iterator &rhs)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
const_iterator end() const
IndexType GetNumNonZeroEntries() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
IndexVectorType m_indx
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
Array< OneD, const DataType > ConstDataVectorType
DataType GetFillInRatio() const
size_t GetMemoryUsage() const
void Multiply(const DataType *in, DataType *out)
IndexType GetColumns() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0....
IndexType GetBlkSize() const
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
void(* MultiplyKernel)(const double *, const double *, double *)
MultiplyKernel m_mvKernel
Array< OneD, IndexType > IndexVectorType
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
Array< OneD, DataType > DataVectorType
IndexType GetRows() const
const DataType & GetValue(IndexType row, IndexType column) const
const_iterator begin() const
IndexVectorType m_pntr
IndexType GetNumStoredDoubles() const
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
unsigned int IndexType
std::map< CoordType, BCOEntryType > BCOMatType
std::pair< IndexType, IndexType > CoordType