Nektar++
StorageSmvBsr.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: StorageSmvBsr.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: 0-based sparse BSR storage class with own unrolled multiply
32 // kernels.
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
36 #include <cmath>
37 #include <fstream>
38 #include <map>
39 #include <utility>
40 #include <vector>
41 
46 
47 namespace Nektar
48 {
49 
50 template <typename DataType>
52  MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim,
53  const DataVectorType &val, const IndexVectorType &indx,
54  const IndexVectorType &pntr)
55  : m_matType(matType), m_iter(), m_begin(begin), m_end(end),
56  m_blkDim(blkDim), m_val(val), m_indx(indx), m_pntr(pntr)
57 {
59 
60  // Here we rewind the iterator to the 'begin'-th
61  // nonzero double value. Consecutive nonzeros are
62  // to lie within dense block and when it's done,
63  // jump to the next dense block.
64  // NB: m_val stores some explicit zeros that need to be skept.
65  if (begin < end)
66  {
67  // determine offset of 'begin'-th nonzero value
68 
69  m_iter.storageindex = 0;
70  m_iter.nnzindex = 0;
71  while (m_iter.nnzindex < begin)
72  {
73  // explicit zero?
75  {
76  m_iter.nnzindex++;
77  }
79  if (m_iter.storageindex >= m_val.size())
80  {
81  std::cout << "const_iterator: 'begin' out stored values bounds"
82  << std::endl;
83  throw 1;
84  }
85  }
87 
89  m_iter.first.first = c.first;
90  m_iter.first.second = c.second;
91  }
92 }
93 
94 template <typename DataType>
96  IndexType storageIndex)
97 {
98  // find local row and column indices within this block
99 
100  const IndexType elms = m_blkDim * m_blkDim;
101  const IndexType block = storageIndex / elms;
102  const IndexType loc_offset = storageIndex % elms;
103  const IndexType loc_row = loc_offset % m_blkDim;
104  const IndexType loc_col = loc_offset / m_blkDim;
105 
106  // find block row and block column
107 
108  IndexType block_col = m_indx[block];
109  IndexType block_row = 0;
110  while (block >= m_pntr[block_row + 1])
111  {
112  block_row++;
113  }
114 
115  // full matrix coordinates of this nonzero entry
116 
117  CoordType coord;
118  coord.first = block_row * m_blkDim + loc_row;
119  coord.second = block_col * m_blkDim + loc_col;
120  return coord;
121 }
122 
123 template <typename DataType>
125  const const_iterator &src)
126  : m_matType(src.m_matType), m_iter(src.m_iter), m_begin(src.m_begin),
127  m_end(src.m_end), m_val(src.m_val), m_indx(src.m_indx), m_pntr(src.m_pntr)
128 {
129 }
130 
131 template <typename DataType>
133 {
134 }
135 
136 template <typename DataType>
138  DataType>::const_iterator::operator++(int)
139 {
140  const_iterator out = *this;
141  forward();
142  return out;
143 }
144 
145 template <typename DataType>
147  DataType>::const_iterator::operator++()
148 {
149  forward();
150  return *this;
151 }
152 
153 template <typename DataType>
155  DataType>::const_iterator::operator*()
156 {
157  return m_iter;
158 }
159 
160 template <typename DataType>
162  DataType>::const_iterator::operator->()
163 {
164  return &m_iter;
165 }
166 
167 template <typename DataType>
169  const const_iterator &rhs)
170 {
171  return m_iter.nnzindex == rhs.m_iter.nnzindex;
172 }
173 
174 template <typename DataType>
176  const const_iterator &rhs)
177 {
178  return !(m_iter.nnzindex == rhs.m_iter.nnzindex);
179 }
180 
181 template <typename DataType>
183 {
184  while ((m_iter.storageindex + 1 < m_val.size()) &&
185  (m_val[++m_iter.storageindex] <= NekConstants::kNekSparseNonZeroTol))
186  ;
187 
188  m_iter.nnzindex++;
189 
190  if (m_iter.storageindex >= m_val.size())
191  {
192  m_iter.nnzindex = m_end;
193  return;
194  }
195 
196  m_iter.second = m_val[m_iter.storageindex];
197 
198  CoordType c = storageIndexToFullCoord(m_iter.storageindex);
199  m_iter.first.first = c.first;
200  m_iter.first.second = c.second;
201 }
202 
203 template <typename DataType>
205  const IndexType blkCols,
206  const IndexType blkDim,
207  const BCOMatType &bcoMat,
208  const MatrixStorage matType)
209  : m_matType(matType), m_blkRows(blkRows), m_blkCols(blkCols),
210  m_blkDim(blkDim), m_bnnz(bcoMat.size()), m_nnz(0),
211  m_val(m_bnnz * blkDim * blkDim), m_indx(m_bnnz + 1), m_pntr(blkRows + 1)
212 {
213  if (matType != Nektar::eFULL)
214  {
215  /// \todo: - iterators over strictly lower-triangular part
216  /// - number off-diagonal elements calculation
217  /// - clear distinction between stored and factual nonzeros
218  /// (row density)
219  std::cout << "matrix type not implemented" << std::endl;
220  throw 1;
221  }
222 
223  processBcoInput(blkRows, blkDim, bcoMat);
224 }
225 
226 template <typename DataType>
228  : m_matType(src.m_matType), m_blkRows(src.m_blkRows),
229  m_blkCols(src.m_blkCols), m_blkDim(src.m_blkDim), m_bnnz(src.m_bnnz),
230  m_nnz(src.m_nnz), m_val(src.m_val), m_indx(src.m_indx), m_pntr(src.m_pntr)
231 {
232 }
233 
234 template <typename DataType> StorageSmvBsr<DataType>::~StorageSmvBsr()
235 {
236 }
237 
238 template <typename DataType> IndexType StorageSmvBsr<DataType>::GetRows() const
239 {
240  return m_blkRows * m_blkDim;
241 }
242 
243 template <typename DataType>
245 {
246  return m_blkCols * m_blkDim;
247 }
248 
249 template <typename DataType>
251 {
252  return m_nnz;
253 }
254 
255 template <typename DataType>
257 {
258  return m_blkDim;
259 }
260 
261 template <typename DataType>
263 {
264  return m_bnnz * m_blkDim * m_blkDim;
265 }
266 
267 template <typename DataType>
269 {
270  return (DataType)(m_bnnz * m_blkDim * m_blkDim) / (DataType)m_nnz;
271 }
272 
273 template <typename DataType>
275 {
276  return sizeof(DataType) * m_val.capacity() +
277  sizeof(IndexType) * m_indx.capacity() +
278  sizeof(IndexType) * m_pntr.capacity() +
279  sizeof(IndexType) * 5 + //< blkRows + blkCols + blkDim + nnz + bnnz
280  sizeof(MatrixStorage);
281 }
282 
283 template <typename DataType>
285  IndexType gcolumn) const
286 {
287  IndexType brow = grow / m_blkDim;
288  IndexType bcol = gcolumn / m_blkDim;
289  IndexType lrow = grow % m_blkDim;
290  IndexType lcol = gcolumn % m_blkDim;
291 
292  // rewind to the first entry of the first
293  // block in the current block row
294  IndexType offset = m_pntr[brow] * m_blkDim * m_blkDim;
295 
296  IndexType i;
297  static DataType defaultReturnValue;
298  for (i = m_pntr[brow]; i < m_pntr[brow + 1]; i++)
299  {
300  if (bcol == m_indx[i])
301  {
302  return m_val[offset + lrow + lcol * m_blkDim];
303  }
304  offset += m_blkDim * m_blkDim;
305  }
306 
307  return defaultReturnValue;
308 }
309 
310 template <typename DataType>
312  DataType>::begin() const
313 {
314  return const_iterator(m_matType, 0, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
315 }
316 
317 template <typename DataType>
319  const
320 {
321  return const_iterator(m_matType, m_nnz, m_nnz, m_blkDim, m_val, m_indx,
322  m_pntr);
323 }
324 
325 // General non-symmetric zero-based BSR multiply
326 // C = A*B where A is matrix and B is vector.
327 // No scaling. Previous content of C is discarded.
328 template <typename DataType>
330  DataVectorType &out)
331 {
332  const double *b = &in[0];
333  double *c = &out[0];
334  const double *val = &m_val[0];
335  const int *bindx = (int *)&m_indx[0];
336  const int *bpntrb = (int *)&m_pntr[0];
337  const int *bpntre = (int *)&m_pntr[0] + 1;
338  const int mb = m_blkRows;
339 
340  switch (m_blkDim)
341  {
342  case 1:
343  Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
344  return;
345  case 2:
346  Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
347  return;
348  default:
349  {
350  Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
351  return;
352  }
353  }
354 }
355 
356 template <typename DataType>
358 {
359  const double *b = &in[0];
360  double *c = &out[0];
361  const double *val = &m_val[0];
362  const int *bindx = (int *)&m_indx[0];
363  const int *bpntrb = (int *)&m_pntr[0];
364  const int *bpntre = (int *)&m_pntr[0] + 1;
365  const int mb = m_blkRows;
366 
367  switch (m_blkDim)
368  {
369  case 1:
370  Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
371  return;
372  case 2:
373  Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
374  return;
375  default:
376  {
377  Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
378  return;
379  }
380  }
381 }
382 
383 // This function is defined for backward compatibility with
384 // NIST storage classes
385 template <typename DataType>
387  DataVectorType &out)
388 {
389  const double *b = &in[0];
390  double *c = &out[0];
391  const double *val = &m_val[0];
392  const int *bindx = (int *)&m_indx[0];
393  const int *bpntrb = (int *)&m_pntr[0];
394  const int *bpntre = (int *)&m_pntr[0] + 1;
395  const int mb = m_blkRows;
396 
397  switch (m_blkDim)
398  {
399  case 1:
400  Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
401  return;
402  case 2:
403  Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
404  return;
405  default:
406  {
407  Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
408  return;
409  }
410  }
411 }
412 
413 /// Zero-based CSR multiply.
414 /// Essentially this is slightly modified copy-paste from
415 /// NIST Sparse Blas 0.9b routine CSR_VecMult_CAB_double()
416 template <typename DataType>
417 void StorageSmvBsr<DataType>::Multiply_1x1(const int mb, const double *val,
418  const int *bindx, const int *bpntrb,
419  const int *bpntre, const double *b,
420  double *c)
421 {
422  for (int i = 0; i != mb; i++)
423  {
424  double t = 0;
425  int jb = bpntrb[i];
426  int je = bpntre[i];
427  for (int j = jb; j != je; j++)
428  {
429  t += b[bindx[j]] * (*val++);
430  }
431  c[i] = t;
432  }
433 }
434 
435 /// Zero-based BSR multiply unrolled for 2x2 blocks.
436 /// Essentially this is slightly optimised copy-paste from
437 /// NIST Sparse Blas 0.9b routine BSR_VecMult_BAB_double()
438 template <typename DataType>
439 void StorageSmvBsr<DataType>::Multiply_2x2(const int mb, const double *val,
440  const int *bindx, const int *bpntrb,
441  const int *bpntre, const double *b,
442  double *c)
443 {
444  const int lb = 2;
445 
446  const double *pval = val;
447  double *pc = c;
448 
449  for (int i = 0; i != mb; i++)
450  {
451  int jb = bpntrb[i];
452  int je = bpntre[i];
453  pc[0] = 0;
454  pc[1] = 0;
455  for (int j = jb; j != je; j++)
456  {
457  int bs = bindx[j] * lb;
458  const double *pb = &b[bs];
459 
460  pc[0] += pb[0] * pval[0] + pb[1] * pval[2];
461  pc[1] += pb[0] * pval[1] + pb[1] * pval[3];
462  pval += 4;
463  }
464  pc += 2;
465  }
466 }
467 
468 /// Zero-based BSR multiply unrolled for 3x3 blocks
469 template <typename DataType>
470 void StorageSmvBsr<DataType>::Multiply_3x3(const int mb, const double *val,
471  const int *bindx, const int *bpntrb,
472  const int *bpntre, const double *b,
473  double *c)
474 {
475  const int lb = 3;
476 
477  const double *pval = val;
478  double *pc = c;
479 
480  for (int i = 0; i != mb; i++)
481  {
482  int jb = bpntrb[i];
483  int je = bpntre[i];
484  pc[0] = 0;
485  pc[1] = 0;
486  pc[2] = 0;
487  for (int j = jb; j != je; j++)
488  {
489  int bs = bindx[j] * lb;
490  const double *pb = &b[bs];
491 
492  pc[0] += pb[0] * pval[0] + pb[1] * pval[3] + pb[2] * pval[6];
493  pc[1] += pb[0] * pval[1] + pb[1] * pval[4] + pb[2] * pval[7];
494  pc[2] += pb[0] * pval[2] + pb[1] * pval[5] + pb[2] * pval[8];
495  pval += 9;
496  }
497  pc += 3;
498  }
499 }
500 
501 /// Zero-based BSR multiply unrolled for 4x4 blocks
502 template <typename DataType>
503 void StorageSmvBsr<DataType>::Multiply_4x4(const int mb, const double *val,
504  const int *bindx, const int *bpntrb,
505  const int *bpntre, const double *b,
506  double *c)
507 {
508  const int lb = 4;
509 
510  const double *pval = val;
511  double *pc = c;
512 
513  for (int i = 0; i != mb; i++)
514  {
515  int jb = bpntrb[i];
516  int je = bpntre[i];
517  pc[0] = 0;
518  pc[1] = 0;
519  pc[2] = 0;
520  pc[3] = 0;
521  for (int j = jb; j != je; j++)
522  {
523  int bs = bindx[j] * lb;
524  const double *pb = &b[bs];
525 
526  pc[0] += pb[0] * pval[0] + pb[1] * pval[4] + pb[2] * pval[8] +
527  pb[3] * pval[12];
528  pc[1] += pb[0] * pval[1] + pb[1] * pval[5] + pb[2] * pval[9] +
529  pb[3] * pval[13];
530  pc[2] += pb[0] * pval[2] + pb[1] * pval[6] + pb[2] * pval[10] +
531  pb[3] * pval[14];
532  pc[3] += pb[0] * pval[3] + pb[1] * pval[7] + pb[2] * pval[11] +
533  pb[3] * pval[15];
534  pval += 16;
535  }
536  pc += 4;
537  }
538 }
539 
540 /// Generic zero-based BSR multiply for higher matrix ranks
541 template <typename DataType>
542 void StorageSmvBsr<DataType>::Multiply_generic(const int mb, const double *val,
543  const int *bindx,
544  const int *bpntrb,
545  const int *bpntre,
546  const double *b, double *c)
547 {
548  const int lb = m_blkDim;
549  const double *pval = val;
550  const int mm = lb * lb;
551  double *pc = c;
552  for (int i = 0; i != mb * lb; i++)
553  *pc++ = 0;
554 
555  pc = c;
556  for (int i = 0; i != mb; i++)
557  {
558  int jb = bpntrb[i];
559  int je = bpntre[i];
560  for (int j = jb; j != je; j++)
561  {
562  Blas::Dgemv('N', lb, lb, 1.0, pval, lb, &b[bindx[j] * lb], 1, 1.0,
563  pc, 1);
564  pval += mm;
565  }
566  pc += lb;
567  }
568 }
569 
570 // converts input vector of BCO blocks
571 // to the internal BSR representation
572 template <typename DataType>
574  const IndexType blkDim,
575  const BCOMatType &bcoMat)
576 {
577  IndexType i;
578  BCOMatTypeConstIt entry;
579  IndexType rowcoord;
580  IndexType colcoord;
581  BCOEntryType value;
582 
583  std::vector<IndexType> tmp(blkRows + 1, 0);
584 
585  // calculate the number of entries on each row
586  // and store the result in tmp
587  for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
588  {
589  rowcoord = (entry->first).first;
590  tmp[rowcoord]++;
591  }
592  // Based upon this information, fill the array m_pntr
593  // which basically contains the offset of each row's
594  // first entry in the other arrays m_val and m_indx
595  m_pntr[0] = 0;
596  for (i = 0; i < blkRows; i++)
597  {
598  m_pntr[i + 1] = m_pntr[i] + tmp[i];
599  }
600 
601  // Copy the values of m_pntr into tmp as this will be needed
602  // in the following step
603  std::copy(&m_pntr[0], &m_pntr[0] + blkRows + 1, &tmp[0]);
604 
605  // Now, fill in index and value entries.
606  for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
607  {
608  rowcoord = (entry->first).first;
609  colcoord = (entry->first).second;
610  value = entry->second;
611  int blkSize = blkDim * blkDim;
612 
613  for (i = 0; i < blkSize; i++)
614  {
615  m_val[blkSize * (tmp[rowcoord]) + i] = value[i];
617  m_nnz++;
618  }
619 
620  m_indx[tmp[rowcoord]] = colcoord;
621  tmp[rowcoord]++;
622  }
623 }
624 
625 // explicit instantiation
626 template class StorageSmvBsr<NekDouble>;
627 
628 } // namespace Nektar
size_type size() const
Returns the array's size.
bool operator!=(const const_iterator &rhs)
CoordType storageIndexToFullCoord(IndexType storageIndex)
bool operator==(const const_iterator &rhs)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
const_iterator end() const
IndexType GetNumNonZeroEntries() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
IndexVectorType m_indx
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
DataType GetFillInRatio() const
size_t GetMemoryUsage() const
void Multiply(const DataType *in, DataType *out)
IndexType GetColumns() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0....
IndexType GetBlkSize() const
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
IndexType GetRows() const
const DataType & GetValue(IndexType row, IndexType column) const
const_iterator begin() const
IndexVectorType m_pntr
IndexType GetNumStoredDoubles() const
static void Dgemv(const char &trans, const int &m, const int &n, const double &alpha, const double *a, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy)
BLAS level 2: Matrix vector multiply y = A x where A[m x n].
Definition: Blas.hpp:246
def copy(self)
Definition: pycml.py:2663
static const NekDouble kNekSparseNonZeroTol
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
unsigned int IndexType
std::map< CoordType, BCOEntryType > BCOMatType
std::pair< IndexType, IndexType > CoordType
BCOMatType::const_iterator BCOMatTypeConstIt
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298