Nektar++
StorageSmvBsr.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: StorageSmvBsr.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // Permission is hereby granted, free of charge, to any person obtaining a
14 // copy of this software and associated documentation files (the "Software"),
15 // to deal in the Software without restriction, including without limitation
16 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 // and/or sell copies of the Software, and to permit persons to whom the
18 // Software is furnished to do so, subject to the following conditions:
19 //
20 // The above copyright notice and this permission notice shall be included
21 // in all copies or substantial portions of the Software.
22 //
23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 // DEALINGS IN THE SOFTWARE.
30 //
31 // Description: 0-based sparse BSR storage class with own unrolled multiply
32 // kernels.
33 //
34 ///////////////////////////////////////////////////////////////////////////////
35 
36 #include <map>
37 #include <vector>
38 #include <utility>
39 #include <fstream>
40 #include <cmath>
41 
46 
47 namespace Nektar
48 {
49 
50  template<typename DataType>
52  MatrixStorage matType,
53  IndexType begin,
54  IndexType end,
55  IndexType blkDim,
56  const DataVectorType& val,
57  const IndexVectorType& indx,
58  const IndexVectorType& pntr) :
59  m_matType(matType),
60  m_iter(),
61  m_begin(begin),
62  m_end(end),
63  m_blkDim(blkDim),
64  m_val(val),
65  m_indx(indx),
66  m_pntr(pntr)
67  {
69 
70  // Here we rewind the iterator to the 'begin'-th
71  // nonzero double value. Consecutive nonzeros are
72  // to lie within dense block and when it's done,
73  // jump to the next dense block.
74  // NB: m_val stores some explicit zeros that need to be skept.
75  if (begin < end)
76  {
77  // determine offset of 'begin'-th nonzero value
78 
79  m_iter.storageindex = 0;
80  m_iter.nnzindex = 0;
81  while(m_iter.nnzindex < begin)
82  {
83  // explicit zero?
85  {
86  m_iter.nnzindex++;
87  }
90  {
91  std::cout << "const_iterator: 'begin' out stored values bounds" << std::endl;
92  throw 1;
93  }
94  }
96 
98  m_iter.first.first = c.first;
99  m_iter.first.second = c.second;
100  }
101  }
102 
103  template<typename DataType>
105  {
106  // find local row and column indices within this block
107 
108  const IndexType elms = m_blkDim*m_blkDim;
109  const IndexType block = storageIndex / elms;
110  const IndexType loc_offset = storageIndex % elms;
111  const IndexType loc_row = loc_offset % m_blkDim;
112  const IndexType loc_col = loc_offset / m_blkDim;
113 
114  // find block row and block column
115 
116  IndexType block_col = m_indx[block];
117  IndexType block_row = 0;
118  while(block >= m_pntr[block_row+1])
119  {
120  block_row++;
121  }
122 
123  // full matrix coordinates of this nonzero entry
124 
125  CoordType coord;
126  coord.first = block_row*m_blkDim + loc_row;
127  coord.second = block_col*m_blkDim + loc_col;
128  return coord;
129  }
130 
131 
132  template<typename DataType>
134  m_matType(src.m_matType),
135  m_iter(src.m_iter),
136  m_begin(src.m_begin),
137  m_end(src.m_end),
138  m_val(src.m_val),
139  m_indx(src.m_indx),
140  m_pntr(src.m_pntr)
141  {
142  }
143 
144  template<typename DataType>
146  {
147  }
148 
149  template<typename DataType>
151  {
152  const_iterator out = *this;
153  forward();
154  return out;
155  }
156 
157  template<typename DataType>
159  {
160  forward();
161  return *this;
162  }
163 
164  template<typename DataType>
166  {
167  return m_iter;
168  }
169 
170  template<typename DataType>
172  {
173  return &m_iter;
174  }
175 
176  template<typename DataType>
178  {
179  return m_iter.nnzindex == rhs.m_iter.nnzindex;
180  }
181 
182  template<typename DataType>
184  {
185  return !(m_iter.nnzindex == rhs.m_iter.nnzindex);
186  }
187 
188  template<typename DataType>
190  {
191  while((m_iter.storageindex+1 < m_val.num_elements()) &&
193 
194  m_iter.nnzindex++;
195 
197  {
199  return;
200  }
201 
203 
205  m_iter.first.first = c.first;
206  m_iter.first.second = c.second;
207  }
208 
209 
210 
211 
212 
213  template<typename DataType>
215  const IndexType blkRows,
216  const IndexType blkCols,
217  const IndexType blkDim,
218  const BCOMatType& bcoMat,
219  const MatrixStorage matType):
220  m_matType (matType),
221  m_blkRows (blkRows),
222  m_blkCols (blkCols),
223  m_blkDim (blkDim),
224  m_bnnz (bcoMat.size()),
225  m_nnz (0),
226  m_val (m_bnnz * blkDim*blkDim),
227  m_indx (m_bnnz+1),
228  m_pntr (blkRows+1)
229  {
230  if (matType != Nektar::eFULL)
231  {
232  /// \todo: - iterators over strictly lower-triangular part
233  /// - number off-diagonal elements calculation
234  /// - clear distinction between stored and factual nonzeros
235  /// (row density)
236  std::cout << "matrix type not implemented" << std::endl;
237  throw 1;
238  }
239 
240  processBcoInput(blkRows,blkDim,bcoMat);
241  }
242 
243 
244  template<typename DataType>
246  m_matType(src.m_matType),
247  m_blkRows (src.m_blkRows),
248  m_blkCols (src.m_blkCols),
249  m_blkDim(src.m_blkDim),
250  m_bnnz(src.m_bnnz),
251  m_nnz(src.m_nnz),
252  m_val(src.m_val),
253  m_indx(src.m_indx),
254  m_pntr(src.m_pntr)
255  {
256  }
257 
258  template<typename DataType>
260  {
261  }
262 
263 
264  template<typename DataType>
266  {
267  return m_blkRows*m_blkDim;
268  }
269 
270  template<typename DataType>
272  {
273  return m_blkCols*m_blkDim;
274  }
275 
276  template<typename DataType>
278  {
279  return m_nnz;
280  }
281 
282  template<typename DataType>
284  {
285  return m_blkDim;
286  }
287 
288 
289  template<typename DataType>
291  {
292  return m_bnnz*m_blkDim*m_blkDim;
293  }
294 
295  template<typename DataType>
297  {
299  }
300 
301 
302  template<typename DataType>
304  {
305  return sizeof(DataType) *m_val.capacity() +
306  sizeof(IndexType)*m_indx.capacity() +
307  sizeof(IndexType)*m_pntr.capacity() +
308  sizeof(IndexType)*5 + //< blkRows + blkCols + blkDim + nnz + bnnz
309  sizeof(MatrixStorage);
310  }
311 
312 
313  template<typename DataType>
315  {
316  IndexType brow = grow / m_blkDim;
317  IndexType bcol = gcolumn / m_blkDim;
318  IndexType lrow = grow % m_blkDim;
319  IndexType lcol = gcolumn % m_blkDim;
320 
321  // rewind to the first entry of the first
322  // block in the current block row
323  IndexType offset = m_pntr[brow]*m_blkDim*m_blkDim;
324 
325  IndexType i;
326  static DataType defaultReturnValue;
327  for( i = m_pntr[brow]; i < m_pntr[brow+1]; i++)
328  {
329  if(bcol == m_indx[i])
330  {
331  return m_val[offset+lrow + lcol*m_blkDim];
332  }
333  offset += m_blkDim*m_blkDim;
334  }
335 
336  return defaultReturnValue;
337  }
338 
339 
340  template<typename DataType>
342  {
344  }
345 
346  template<typename DataType>
348  {
350  }
351 
352 
353 
354  // General non-symmetric zero-based BSR multiply
355  // C = A*B where A is matrix and B is vector.
356  // No scaling. Previous content of C is discarded.
357  template<typename DataType>
359  const DataVectorType &in,
360  DataVectorType &out)
361  {
362  const double* b = &in[0];
363  double* c = &out[0];
364  const double* val = &m_val[0];
365  const int* bindx = (int*)&m_indx[0];
366  const int* bpntrb = (int*)&m_pntr[0];
367  const int* bpntre = (int*)&m_pntr[0]+1;
368  const int mb = m_blkRows;
369 
370  switch(m_blkDim)
371  {
372  case 1: Multiply_1x1(mb,val,bindx,bpntrb,bpntre,b,c); return;
373  case 2: Multiply_2x2(mb,val,bindx,bpntrb,bpntre,b,c); return;
374  default:
375  {
376  Multiply_generic(mb,val,bindx,bpntrb,bpntre,b,c); return;
377  }
378  }
379  }
380 
381 
382  template<typename DataType>
384  const DataType* in,
385  DataType* out)
386  {
387  const double* b = &in[0];
388  double* c = &out[0];
389  const double* val = &m_val[0];
390  const int* bindx = (int*)&m_indx[0];
391  const int* bpntrb = (int*)&m_pntr[0];
392  const int* bpntre = (int*)&m_pntr[0]+1;
393  const int mb = m_blkRows;
394 
395  switch(m_blkDim)
396  {
397  case 1: Multiply_1x1(mb,val,bindx,bpntrb,bpntre,b,c); return;
398  case 2: Multiply_2x2(mb,val,bindx,bpntrb,bpntre,b,c); return;
399  default:
400  {
401  Multiply_generic(mb,val,bindx,bpntrb,bpntre,b,c); return;
402  }
403  }
404  }
405 
406 
407  // This function is defined for backward compatibility with
408  // NIST storage classes
409  template<typename DataType>
411  const DataVectorType &in,
412  DataVectorType &out)
413  {
414  const double* b = &in[0];
415  double* c = &out[0];
416  const double* val = &m_val[0];
417  const int* bindx = (int*)&m_indx[0];
418  const int* bpntrb = (int*)&m_pntr[0];
419  const int* bpntre = (int*)&m_pntr[0]+1;
420  const int mb = m_blkRows;
421 
422  switch(m_blkDim)
423  {
424  case 1: Multiply_1x1(mb,val,bindx,bpntrb,bpntre,b,c); return;
425  case 2: Multiply_2x2(mb,val,bindx,bpntrb,bpntre,b,c); return;
426  default:
427  {
428  Multiply_generic(mb,val,bindx,bpntrb,bpntre,b,c); return;
429  }
430  }
431  }
432 
433  /// Zero-based CSR multiply.
434  /// Essentially this is slightly modified copy-paste from
435  /// NIST Sparse Blas 0.9b routine CSR_VecMult_CAB_double()
436  template<typename DataType>
438  const int mb,
439  const double* val,
440  const int* bindx,
441  const int* bpntrb,
442  const int* bpntre,
443  const double* b,
444  double* c)
445  {
446  for (int i=0;i!=mb;i++)
447  {
448  double t = 0;
449  int jb = bpntrb[i];
450  int je = bpntre[i];
451  for (int j=jb;j!=je;j++)
452  {
453  t += b[bindx[j]] * (*val++);
454  }
455  c[i] = t;
456  }
457  }
458 
459  /// Zero-based BSR multiply unrolled for 2x2 blocks.
460  /// Essentially this is slightly optimised copy-paste from
461  /// NIST Sparse Blas 0.9b routine BSR_VecMult_BAB_double()
462  template<typename DataType>
464  const int mb,
465  const double* val,
466  const int* bindx,
467  const int* bpntrb,
468  const int* bpntre,
469  const double* b,
470  double* c)
471  {
472  const int lb = 2;
473 
474  const double *pval = val;
475  double *pc=c;
476 
477  for (int i=0;i!=mb;i++)
478  {
479  int jb = bpntrb[i];
480  int je = bpntre[i];
481  pc[0] = 0;
482  pc[1] = 0;
483  for (int j=jb;j!=je;j++)
484  {
485  int bs=bindx[j]*lb;
486  const double *pb = &b[bs];
487 
488  pc[0] += pb[0] * pval[0] + pb[1] * pval[2];
489  pc[1] += pb[0] * pval[1] + pb[1] * pval[3];
490  pval += 4;
491  }
492  pc += 2;
493  }
494  }
495 
496  /// Zero-based BSR multiply unrolled for 3x3 blocks
497  template<typename DataType>
499  const int mb,
500  const double* val,
501  const int* bindx,
502  const int* bpntrb,
503  const int* bpntre,
504  const double* b,
505  double* c)
506  {
507  const int lb = 3;
508 
509  const double *pval = val;
510  double *pc=c;
511 
512  for (int i=0;i!=mb;i++)
513  {
514  int jb = bpntrb[i];
515  int je = bpntre[i];
516  pc[0] = 0;
517  pc[1] = 0;
518  pc[2] = 0;
519  for (int j=jb;j!=je;j++)
520  {
521  int bs=bindx[j]*lb;
522  const double *pb = &b[bs];
523 
524  pc[0] += pb[0] * pval[0] + pb[1] * pval[3] + pb[2] * pval[6];
525  pc[1] += pb[0] * pval[1] + pb[1] * pval[4] + pb[2] * pval[7];
526  pc[2] += pb[0] * pval[2] + pb[1] * pval[5] + pb[2] * pval[8];
527  pval += 9;
528  }
529  pc += 3;
530  }
531  }
532 
533  /// Zero-based BSR multiply unrolled for 4x4 blocks
534  template<typename DataType>
536  const int mb,
537  const double* val,
538  const int* bindx,
539  const int* bpntrb,
540  const int* bpntre,
541  const double* b,
542  double* c)
543  {
544  const int lb = 4;
545 
546  const double *pval = val;
547  double *pc=c;
548 
549  for (int i=0;i!=mb;i++)
550  {
551  int jb = bpntrb[i];
552  int je = bpntre[i];
553  pc[0] = 0;
554  pc[1] = 0;
555  pc[2] = 0;
556  pc[3] = 0;
557  for (int j=jb;j!=je;j++)
558  {
559  int bs=bindx[j]*lb;
560  const double *pb = &b[bs];
561 
562  pc[0] += pb[0] * pval[0] + pb[1] * pval[4] + pb[2] * pval[ 8] + pb[3] * pval[12];
563  pc[1] += pb[0] * pval[1] + pb[1] * pval[5] + pb[2] * pval[ 9] + pb[3] * pval[13];
564  pc[2] += pb[0] * pval[2] + pb[1] * pval[6] + pb[2] * pval[10] + pb[3] * pval[14];
565  pc[3] += pb[0] * pval[3] + pb[1] * pval[7] + pb[2] * pval[11] + pb[3] * pval[15];
566  pval += 16;
567  }
568  pc += 4;
569  }
570  }
571 
572  /// Generic zero-based BSR multiply for higher matrix ranks
573  template<typename DataType>
575  const int mb,
576  const double* val,
577  const int* bindx,
578  const int* bpntrb,
579  const int* bpntre,
580  const double* b,
581  double* c)
582  {
583  const int lb = m_blkDim;
584  const double *pval = val;
585  const int mm=lb*lb;
586  double *pc=c;
587  for (int i=0;i!=mb*lb;i++) *pc++ = 0;
588 
589  pc=c;
590  for (int i=0;i!=mb;i++)
591  {
592  int jb = bpntrb[i];
593  int je = bpntre[i];
594  for (int j=jb;j!=je;j++)
595  {
596  Blas::Dgemv('N',lb,lb,1.0,pval,lb,&b[bindx[j]*lb],1,1.0,pc,1);
597  pval+=mm;
598  }
599  pc += lb;
600  }
601  }
602 
603 
604 
605  // converts input vector of BCO blocks
606  // to the internal BSR representation
607  template<typename DataType>
609  const IndexType blkRows,
610  const IndexType blkDim,
611  const BCOMatType& bcoMat)
612  {
613  IndexType i;
614  BCOMatTypeConstIt entry;
615  IndexType rowcoord;
616  IndexType colcoord;
617  BCOEntryType value;
618 
619  std::vector<IndexType> tmp(blkRows+1,0);
620 
621  // calculate the number of entries on each row
622  // and store the result in tmp
623  for(entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
624  {
625  rowcoord = (entry->first).first;
626  tmp[rowcoord]++;
627  }
628  // Based upon this information, fill the array m_pntr
629  // which basically contains the offset of each row's
630  // first entry in the other arrays m_val and m_indx
631  m_pntr[0] = 0;
632  for(i = 0; i < blkRows; i++)
633  {
634  m_pntr[i+1] = m_pntr[i] + tmp[i];
635  }
636 
637  // Copy the values of m_pntr into tmp as this will be needed
638  // in the following step
639  std::copy(&m_pntr[0],&m_pntr[0]+blkRows+1,&tmp[0]);
640 
641  // Now, fill in index and value entries.
642  for(entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
643  {
644  rowcoord = (entry->first).first;
645  colcoord = (entry->first).second;
646  value = entry->second;
647  int blkSize = blkDim*blkDim;
648 
649  for (i = 0; i < blkSize; i++)
650  {
651  m_val [ blkSize*(tmp[rowcoord]) + i ] = value[i];
652  if (std::abs(value[i]) > NekConstants::kNekSparseNonZeroTol) m_nnz++;
653  }
654 
655  m_indx[ tmp[rowcoord] ] = colcoord;
656  tmp[rowcoord]++;
657  }
658  }
659 
660 
661  // explicit instantiation
662  template class StorageSmvBsr<NekDouble>;
663 
664 
665 } // namespace
666 
IndexVectorType m_pntr
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0...
IndexType GetBlkSize() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
const DataType & GetValue(IndexType row, IndexType column) const
DataType GetFillInRatio() const
def copy(self)
Definition: pycml.py:2663
bool operator==(const const_iterator &rhs)
BCOMatType::const_iterator BCOMatTypeConstIt
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
IndexType GetColumns() const
const_iterator begin() const
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
std::map< CoordType, BCOEntryType > BCOMatType
IndexType GetRows() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
static void Dgemv(const char &trans, const int &m, const int &n, const double &alpha, const double *a, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy)
BLAS level 2: Matrix vector multiply y = A x where A[m x n].
Definition: Blas.hpp:168
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
unsigned int IndexType
IndexType GetNumNonZeroEntries() const
CoordType storageIndexToFullCoord(IndexType storageIndex)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
static const NekDouble kNekSparseNonZeroTol
IndexVectorType m_indx
bool operator!=(const const_iterator &rhs)
void Multiply(const DataType *in, DataType *out)
StandardMatrixTag boost::call_traits< LhsDataType >::const_reference rhs
size_t GetMemoryUsage() const
size_t num_elements() const
Returns the array&#39;s size.
IndexType GetNumStoredDoubles() const
const_iterator end() const
std::pair< IndexType, IndexType > CoordType
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)