50template <
typename DataType>
55 : m_matType(matType), m_iter(), m_begin(begin), m_end(end),
56 m_blkDim(blkDim), m_val(val), m_indx(indx), m_pntr(pntr)
81 std::cout <<
"const_iterator: 'begin' out stored values bounds"
94template <
typename DataType>
101 const IndexType block = storageIndex / elms;
102 const IndexType loc_offset = storageIndex % elms;
110 while (block >=
m_pntr[block_row + 1])
118 coord.first = block_row *
m_blkDim + loc_row;
119 coord.second = block_col *
m_blkDim + loc_col;
123template <
typename DataType>
131template <
typename DataType>
136template <
typename DataType>
145template <
typename DataType>
153template <
typename DataType>
160template <
typename DataType>
167template <
typename DataType>
174template <
typename DataType>
181template <
typename DataType>
184 while ((m_iter.storageindex + 1 <
m_val.
size()) &&
192 m_iter.nnzindex = m_end;
196 m_iter.second =
m_val[m_iter.storageindex];
198 CoordType c = storageIndexToFullCoord(m_iter.storageindex);
199 m_iter.first.first = c.first;
200 m_iter.first.second = c.second;
203template <
typename DataType>
219 std::cout <<
"matrix type not implemented" << std::endl;
226template <
typename DataType>
228 : m_matType(src.m_matType), m_blkRows(src.m_blkRows),
229 m_blkCols(src.m_blkCols), m_blkDim(src.m_blkDim), m_bnnz(src.m_bnnz),
230 m_nnz(src.m_nnz), m_val(src.m_val), m_indx(src.m_indx), m_pntr(src.m_pntr)
240 return m_blkRows * m_blkDim;
243template <
typename DataType>
246 return m_blkCols * m_blkDim;
249template <
typename DataType>
255template <
typename DataType>
261template <
typename DataType>
264 return m_bnnz * m_blkDim * m_blkDim;
267template <
typename DataType>
273template <
typename DataType>
276 return sizeof(
DataType) * m_val.capacity() +
283template <
typename DataType>
294 IndexType offset = m_pntr[brow] * m_blkDim * m_blkDim;
298 for (i = m_pntr[brow]; i < m_pntr[brow + 1]; i++)
300 if (bcol == m_indx[i])
302 return m_val[offset + lrow + lcol * m_blkDim];
304 offset += m_blkDim * m_blkDim;
307 return defaultReturnValue;
310template <
typename DataType>
314 return const_iterator(m_matType, 0, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
317template <
typename DataType>
321 return const_iterator(m_matType, m_nnz, m_nnz, m_blkDim, m_val, m_indx,
328template <
typename DataType>
332 const double *b = &in[0];
334 const double *val = &m_val[0];
335 const int *bindx = (
int *)&m_indx[0];
336 const int *bpntrb = (
int *)&m_pntr[0];
337 const int *bpntre = (
int *)&m_pntr[0] + 1;
338 const int mb = m_blkRows;
343 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
346 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
350 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
356template <
typename DataType>
359 const double *b = &in[0];
361 const double *val = &m_val[0];
362 const int *bindx = (
int *)&m_indx[0];
363 const int *bpntrb = (
int *)&m_pntr[0];
364 const int *bpntre = (
int *)&m_pntr[0] + 1;
365 const int mb = m_blkRows;
370 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
373 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
377 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
385template <
typename DataType>
389 const double *b = &in[0];
391 const double *val = &m_val[0];
392 const int *bindx = (
int *)&m_indx[0];
393 const int *bpntrb = (
int *)&m_pntr[0];
394 const int *bpntre = (
int *)&m_pntr[0] + 1;
395 const int mb = m_blkRows;
400 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
403 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
407 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
416template <
typename DataType>
418 const int *bindx,
const int *bpntrb,
419 const int *bpntre,
const double *b,
422 for (
int i = 0; i != mb; i++)
427 for (
int j = jb; j != je; j++)
429 t += b[bindx[j]] * (*val++);
438template <
typename DataType>
440 const int *bindx,
const int *bpntrb,
441 const int *bpntre,
const double *b,
446 const double *pval = val;
449 for (
int i = 0; i != mb; i++)
455 for (
int j = jb; j != je; j++)
457 int bs = bindx[j] * lb;
458 const double *pb = &b[bs];
460 pc[0] += pb[0] * pval[0] + pb[1] * pval[2];
461 pc[1] += pb[0] * pval[1] + pb[1] * pval[3];
469template <
typename DataType>
471 const int *bindx,
const int *bpntrb,
472 const int *bpntre,
const double *b,
477 const double *pval = val;
480 for (
int i = 0; i != mb; i++)
487 for (
int j = jb; j != je; j++)
489 int bs = bindx[j] * lb;
490 const double *pb = &b[bs];
492 pc[0] += pb[0] * pval[0] + pb[1] * pval[3] + pb[2] * pval[6];
493 pc[1] += pb[0] * pval[1] + pb[1] * pval[4] + pb[2] * pval[7];
494 pc[2] += pb[0] * pval[2] + pb[1] * pval[5] + pb[2] * pval[8];
502template <
typename DataType>
504 const int *bindx,
const int *bpntrb,
505 const int *bpntre,
const double *b,
510 const double *pval = val;
513 for (
int i = 0; i != mb; i++)
521 for (
int j = jb; j != je; j++)
523 int bs = bindx[j] * lb;
524 const double *pb = &b[bs];
526 pc[0] += pb[0] * pval[0] + pb[1] * pval[4] + pb[2] * pval[8] +
528 pc[1] += pb[0] * pval[1] + pb[1] * pval[5] + pb[2] * pval[9] +
530 pc[2] += pb[0] * pval[2] + pb[1] * pval[6] + pb[2] * pval[10] +
532 pc[3] += pb[0] * pval[3] + pb[1] * pval[7] + pb[2] * pval[11] +
541template <
typename DataType>
546 const double *b,
double *c)
548 const int lb = m_blkDim;
549 const double *pval = val;
550 const int mm = lb * lb;
552 for (
int i = 0; i != mb * lb; i++)
556 for (
int i = 0; i != mb; i++)
560 for (
int j = jb; j != je; j++)
562 Blas::Dgemv(
'N', lb, lb, 1.0, pval, lb, &b[bindx[j] * lb], 1, 1.0,
572template <
typename DataType>
583 std::vector<IndexType> tmp(blkRows + 1, 0);
587 for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
589 rowcoord = (entry->first).first;
596 for (i = 0; i < blkRows; i++)
598 m_pntr[i + 1] = m_pntr[i] + tmp[i];
603 std::copy(&m_pntr[0], &m_pntr[0] + blkRows + 1, &tmp[0]);
606 for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
608 rowcoord = (entry->first).first;
609 colcoord = (entry->first).second;
610 value = entry->second;
611 int blkSize = blkDim * blkDim;
613 for (i = 0; i < blkSize; i++)
615 m_val[blkSize * (tmp[rowcoord]) + i] = value[i];
620 m_indx[tmp[rowcoord]] = colcoord;
size_type size() const
Returns the array's size.
bool operator!=(const const_iterator &rhs)
CoordType storageIndexToFullCoord(IndexType storageIndex)
const DataVectorType & m_val
bool operator==(const const_iterator &rhs)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
const_iterator end() const
IndexType GetNumNonZeroEntries() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
DataType GetFillInRatio() const
size_t GetMemoryUsage() const
void Multiply(const DataType *in, DataType *out)
IndexType GetColumns() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0....
IndexType GetBlkSize() const
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
IndexType GetRows() const
const DataType & GetValue(IndexType row, IndexType column) const
const_iterator begin() const
IndexType GetNumStoredDoubles() const
static void Dgemv(const char &trans, const int &m, const int &n, const double &alpha, const double *a, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy)
BLAS level 2: Matrix vector multiply y = alpha A x plus beta y where A[m x n].
static const NekDouble kNekSparseNonZeroTol
The above copyright notice and this permission notice shall be included.
std::map< CoordType, BCOEntryType > BCOMatType
std::pair< IndexType, IndexType > CoordType
BCOMatType::const_iterator BCOMatTypeConstIt
scalarT< T > abs(scalarT< T > in)