Nektar++
StorageSmvBsr.hpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: StorageSmvBsr.hpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description: 0-based sparse BSR storage class with own unrolled multiply
32// kernels.
33//
34///////////////////////////////////////////////////////////////////////////////
35
36#ifndef NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
37#define NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
38
39#include <fstream>
40#include <map>
41#include <utility>
42#include <vector>
43
46
47namespace Nektar
48{
49/*
50 * Zero-based BSR (Block Sparse Row) storage class with its sparse
51 * multiply kernels built upon its own dense unrolled multiply kernels
52 * up to 4x4 matrices. When matrix is larger than or 4x4, the
53 * multiply kernel calls dense dgemv from BLAS.
54 *
55 * The BSR sparse format assumes sparse matrix is a CSR collection of
56 * dense square blocks of same size. In contrast with Nist BSR class
57 * this one uses zero-based storage. The constructor takes input matrix in
58 * block coordinate storage (BCO) sparse format.
59 *
60 */
61
62template <typename T> class StorageSmvBsr
63{
64
65public:
66 typedef T DataType;
70
71 typedef void (*MultiplyKernel)(const double *, const double *, double *);
72
73 /// \internal
74 /// \brief Forward iterator through nonzero (double) elements of the matrix
75 /// that mimics forward iteration of BCOMatType.
76 /// It's a dirty hack, not a real iterator in the C++ sense.
78 {
79 struct IterType
80 {
81 CoordType first; //< (row, column)
82 DataType second; //< value
83 IndexType nnzindex; //< index of this nnz entry
84 IndexType storageindex; //< offset of this nnz entry in the storage
85 };
86
87 public:
89 IndexType blkDim, const DataVectorType &val,
90 const IndexVectorType &indx,
91 const IndexVectorType &pntr);
94
97 const IterType &operator*();
98 const IterType *operator->();
99 bool operator==(const const_iterator &rhs);
100 bool operator!=(const const_iterator &rhs);
101
102 private:
103 void forward();
105
114 };
115
116public:
117 // Constructs zero-based BSR sparse matrix based on input BCO storage
119 const IndexType blkCols,
120 const IndexType blkDim,
121 const BCOMatType &bcoMat,
122 const MatrixStorage matType = eFULL);
123
124 // Copy constructor
126
128
136
139
141 IndexType column) const;
142
143 LIB_UTILITIES_EXPORT void Multiply(const DataType *in, DataType *out);
145 DataVectorType &out);
147 DataVectorType &out);
148
149protected:
150 // converts input vector of BCO blocks
151 // to the internal zero-based BSR representation
152 void processBcoInput(const IndexType blkRows, const IndexType blkDim,
153 const BCOMatType &bcoMat);
154
155 void Multiply_1x1(const int mb, const double *val, const int *bindx,
156 const int *bpntrb, const int *bpntre, const double *b,
157 double *c);
158
159 void Multiply_2x2(const int mb, const double *val, const int *bindx,
160 const int *bpntrb, const int *bpntre, const double *b,
161 double *c);
162
163 void Multiply_3x3(const int mb, const double *val, const int *bindx,
164 const int *bpntrb, const int *bpntre, const double *b,
165 double *c);
166
167 void Multiply_4x4(const int mb, const double *val, const int *bindx,
168 const int *bpntrb, const int *bpntre, const double *b,
169 double *c);
170
171 void Multiply_generic(const int mb, const double *val, const int *bindx,
172 const int *bpntrb, const int *bpntre, const double *b,
173 double *c);
174
175 // interface to lowest level LibSMV multiply kernels
177
179
180 IndexType m_blkRows; // number of block rows
181 IndexType m_blkCols; // number of block columns
182 IndexType m_blkDim; // rank of each block
183
184 IndexType m_bnnz; //< number of nonzero blocks
185 IndexType m_nnz; //< number of factual nonzero entries in the sparse matrix
186
187 DataVectorType m_val; // values of non-zero entries
188 IndexVectorType m_indx; // column indices of non-zero entries
189 IndexVectorType m_pntr; // m_pntr(i) contains index in m_val of first
190 // non-zero element in row i
191
192private:
193};
194
195} // namespace Nektar
196
197#endif // NEKTAR_LIB_UTILITIES_LINEAR_ALGEBRA_STORAGE_SMV_BSR_HPP
#define LIB_UTILITIES_EXPORT
1D Array of constant elements with garbage collection and bounds checking.
Definition: SharedArray.hpp:56
bool operator!=(const const_iterator &rhs)
CoordType storageIndexToFullCoord(IndexType storageIndex)
bool operator==(const const_iterator &rhs)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
const_iterator end() const
IndexType GetNumNonZeroEntries() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
IndexVectorType m_indx
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
Array< OneD, const DataType > ConstDataVectorType
DataType GetFillInRatio() const
size_t GetMemoryUsage() const
void Multiply(const DataType *in, DataType *out)
IndexType GetColumns() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0....
IndexType GetBlkSize() const
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
void(* MultiplyKernel)(const double *, const double *, double *)
MultiplyKernel m_mvKernel
Array< OneD, IndexType > IndexVectorType
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
Array< OneD, DataType > DataVectorType
IndexType GetRows() const
const DataType & GetValue(IndexType row, IndexType column) const
const_iterator begin() const
IndexVectorType m_pntr
IndexType GetNumStoredDoubles() const
unsigned int IndexType
std::map< CoordType, BCOEntryType > BCOMatType
std::pair< IndexType, IndexType > CoordType