Nektar++
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StorageSmvBsr.cpp
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 // File: StorageSmvBsr.cpp
4 //
5 // For more information, please see: http://www.nektar.info
6 //
7 // The MIT License
8 //
9 // Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10 // Department of Aeronautics, Imperial College London (UK), and Scientific
11 // Computing and Imaging Institute, University of Utah (USA).
12 //
13 // License for the specific language governing rights and limitations under
14 // Permission is hereby granted, free of charge, to any person obtaining a
15 // copy of this software and associated documentation files (the "Software"),
16 // to deal in the Software without restriction, including without limitation
17 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
18 // and/or sell copies of the Software, and to permit persons to whom the
19 // Software is furnished to do so, subject to the following conditions:
20 //
21 // The above copyright notice and this permission notice shall be included
22 // in all copies or substantial portions of the Software.
23 //
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
25 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
27 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30 // DEALINGS IN THE SOFTWARE.
31 //
32 // Description: 0-based sparse BSR storage class with own unrolled and
33 // LibSMV multiply kernels.
34 //
35 ///////////////////////////////////////////////////////////////////////////////
36 
37 #include <map>
38 #include <vector>
39 #include <utility>
40 #include <fstream>
41 #include <cmath>
42 
47 
49 
50 #include <boost/preprocessor/iteration/local.hpp>
51 #include <boost/lexical_cast.hpp>
52 
53 namespace Nektar
54 {
55 
56  template<typename DataType>
58  MatrixStorage matType,
59  IndexType begin,
60  IndexType end,
61  IndexType blkDim,
62  const DataVectorType& val,
63  const IndexVectorType& indx,
64  const IndexVectorType& pntr) :
65  m_matType(matType),
66  m_iter(),
67  m_begin(begin),
68  m_end(end),
69  m_blkDim(blkDim),
70  m_val(val),
71  m_indx(indx),
72  m_pntr(pntr)
73  {
75 
76  // Here we rewind the iterator to the 'begin'-th
77  // nonzero double value. Consecutive nonzeros are
78  // to lie within dense block and when it's done,
79  // jump to the next dense block.
80  // NB: m_val stores some explicit zeros that need to be skept.
81  if (begin < end)
82  {
83  // determine offset of 'begin'-th nonzero value
84 
85  m_iter.storageindex = 0;
86  m_iter.nnzindex = 0;
87  while(m_iter.nnzindex < begin)
88  {
89  // explicit zero?
91  {
92  m_iter.nnzindex++;
93  }
96  {
97  std::cout << "const_iterator: 'begin' out stored values bounds" << std::endl;
98  throw 1;
99  }
100  }
102 
104  m_iter.first.first = c.first;
105  m_iter.first.second = c.second;
106  }
107  }
108 
109  template<typename DataType>
111  {
112  // find local row and column indices within this block
113 
114  const IndexType elms = m_blkDim*m_blkDim;
115  const IndexType block = storageIndex / elms;
116  const IndexType loc_offset = storageIndex % elms;
117  const IndexType loc_row = loc_offset % m_blkDim;
118  const IndexType loc_col = loc_offset / m_blkDim;
119 
120  // find block row and block column
121 
122  IndexType block_col = m_indx[block];
123  IndexType block_row = 0;
124  while(block >= m_pntr[block_row+1])
125  {
126  block_row++;
127  }
128 
129  // full matrix coordinates of this nonzero entry
130 
131  CoordType coord;
132  coord.first = block_row*m_blkDim + loc_row;
133  coord.second = block_col*m_blkDim + loc_col;
134  return coord;
135  }
136 
137 
138  template<typename DataType>
140  m_matType(src.m_matType),
141  m_iter(src.m_iter),
142  m_begin(src.m_begin),
143  m_end(src.m_end),
144  m_val(src.m_val),
145  m_indx(src.m_indx),
146  m_pntr(src.m_pntr)
147  {
148  }
149 
150  template<typename DataType>
152  {
153  }
154 
155  template<typename DataType>
157  {
158  const_iterator out = *this;
159  forward();
160  return out;
161  }
162 
163  template<typename DataType>
165  {
166  forward();
167  return *this;
168  }
169 
170  template<typename DataType>
172  {
173  return m_iter;
174  }
175 
176  template<typename DataType>
178  {
179  return &m_iter;
180  }
181 
182  template<typename DataType>
184  {
185  return m_iter.nnzindex == rhs.m_iter.nnzindex;
186  }
187 
188  template<typename DataType>
190  {
191  return !(m_iter.nnzindex == rhs.m_iter.nnzindex);
192  }
193 
194  template<typename DataType>
196  {
197  while((m_iter.storageindex+1 < m_val.num_elements()) &&
198  (m_val[++m_iter.storageindex] <= NekConstants::kNekSparseNonZeroTol));
199 
200  m_iter.nnzindex++;
201 
202  if (m_iter.storageindex >= m_val.num_elements())
203  {
204  m_iter.nnzindex = m_end;
205  return;
206  }
207 
208  m_iter.second = m_val[m_iter.storageindex];
209 
210  CoordType c = storageIndexToFullCoord(m_iter.storageindex);
211  m_iter.first.first = c.first;
212  m_iter.first.second = c.second;
213  }
214 
215 
216 
217 
218 
219  template<typename DataType>
221  const IndexType blkRows,
222  const IndexType blkCols,
223  const IndexType blkDim,
224  const BCOMatType& bcoMat,
225  const MatrixStorage matType):
226  m_matType (matType),
227  m_blkRows (blkRows),
228  m_blkCols (blkCols),
229  m_blkDim (blkDim),
230  m_bnnz (bcoMat.size()),
231  m_nnz (0),
232  m_val (m_bnnz * blkDim*blkDim),
233  m_indx (m_bnnz+1),
234  m_pntr (blkRows+1)
235  {
236  if (matType != Nektar::eFULL)
237  {
238  /// \todo: - iterators over strictly lower-triangular part
239  /// - number off-diagonal elements calculation
240  /// - clear distinction between stored and factual nonzeros
241  /// (row density)
242  std::cout << "matrix type not implemented" << std::endl;
243  throw 1;
244  }
245 
246 #ifdef NEKTAR_USING_SMV
247  // Set pointer to rank-specific matrix-vector multiply kernel.
248  // Number of ranks is defined by LibSMV library
249  switch (blkDim)
250  {
251  #define BOOST_PP_LOCAL_MACRO(n) case n: m_mvKernel = Smv::F77NAME(smv_##n); break;
252  #define BOOST_PP_LOCAL_LIMITS (1, LIBSMV_MAX_RANK)
253  #include BOOST_PP_LOCAL_ITERATE()
254  }
255 #endif
256 
257  processBcoInput(blkRows,blkCols,blkDim,bcoMat);
258  }
259 
260 
261  template<typename DataType>
263  m_matType(src.m_matType),
264  m_blkRows (src.m_blkRows),
265  m_blkCols (src.m_blkCols),
266  m_blkDim(src.m_blkDim),
267  m_bnnz(src.m_bnnz),
268  m_nnz(src.m_nnz),
269  m_val(src.m_val),
270  m_indx(src.m_indx),
271  m_pntr(src.m_pntr)
272  {
273  }
274 
275  template<typename DataType>
277  {
278  }
279 
280 
281  template<typename DataType>
283  {
284  return m_blkRows*m_blkDim;
285  }
286 
287  template<typename DataType>
289  {
290  return m_blkCols*m_blkDim;
291  }
292 
293  template<typename DataType>
295  {
296  return m_nnz;
297  }
298 
299  template<typename DataType>
301  {
302  return m_blkDim;
303  }
304 
305 
306  template<typename DataType>
308  {
309  return m_bnnz*m_blkDim*m_blkDim;
310  }
311 
312  template<typename DataType>
314  {
315  return (DataType)(m_bnnz*m_blkDim*m_blkDim)/(DataType)m_nnz;
316  }
317 
318 
319  template<typename DataType>
321  {
322  return sizeof(DataType) *m_val.capacity() +
323  sizeof(IndexType)*m_indx.capacity() +
324  sizeof(IndexType)*m_pntr.capacity() +
325  sizeof(IndexType)*5 + //< blkRows + blkCols + blkDim + nnz + bnnz
326  sizeof(MatrixStorage);
327  }
328 
329 
330  template<typename DataType>
331  const typename boost::call_traits<DataType>::const_reference StorageSmvBsr<DataType>::GetValue(IndexType grow, IndexType gcolumn) const
332  {
333  IndexType brow = grow / m_blkDim;
334  IndexType bcol = gcolumn / m_blkDim;
335  IndexType lrow = grow % m_blkDim;
336  IndexType lcol = gcolumn % m_blkDim;
337 
338  // rewind to the first entry of the first
339  // block in the current block row
340  IndexType offset = m_pntr[brow]*m_blkDim*m_blkDim;
341 
342  IndexType i;
343  static DataType defaultReturnValue;
344  for( i = m_pntr[brow]; i < m_pntr[brow+1]; i++)
345  {
346  if(bcol == m_indx[i])
347  {
348  return m_val[offset+lrow + lcol*m_blkDim];
349  }
350  offset += m_blkDim*m_blkDim;
351  }
352 
353  return defaultReturnValue;
354  }
355 
356 
357  template<typename DataType>
359  {
360  return const_iterator(m_matType, 0, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
361  }
362 
363  template<typename DataType>
365  {
366  return const_iterator(m_matType, m_nnz, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
367  }
368 
369 
370 
371  // General non-symmetric zero-based BSR multiply
372  // C = A*B where A is matrix and B is vector.
373  // No scaling. Previous content of C is discarded.
374  template<typename DataType>
376  const DataVectorType &in,
377  DataVectorType &out)
378  {
379  const double* b = &in[0];
380  double* c = &out[0];
381  const double* val = &m_val[0];
382  const int* bindx = (int*)&m_indx[0];
383  const int* bpntrb = (int*)&m_pntr[0];
384  const int* bpntre = (int*)&m_pntr[0]+1;
385  const int mb = m_blkRows;
386  const int kb = m_blkCols;
387 
388  switch(m_blkDim)
389  {
390  case 1: Multiply_1x1(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
391  case 2: Multiply_2x2(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
392 #ifndef NEKTAR_USING_SMV
393  case 3: Multiply_3x3(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
394  case 4: Multiply_4x4(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
395 #endif
396  default:
397 #ifdef NEKTAR_USING_SMV
398  if (m_blkDim <= LIBSMV_MAX_RANK)
399  {
400  Multiply_libsmv(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
401  }
402  else
403 #endif
404  {
405  Multiply_generic(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
406  }
407  }
408  }
409 
410 
411  template<typename DataType>
413  const DataType* in,
414  DataType* out)
415  {
416  const double* b = &in[0];
417  double* c = &out[0];
418  const double* val = &m_val[0];
419  const int* bindx = (int*)&m_indx[0];
420  const int* bpntrb = (int*)&m_pntr[0];
421  const int* bpntre = (int*)&m_pntr[0]+1;
422  const int mb = m_blkRows;
423  const int kb = m_blkCols;
424 
425  switch(m_blkDim)
426  {
427  case 1: Multiply_1x1(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
428  case 2: Multiply_2x2(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
429 #ifndef NEKTAR_USING_SMV
430  case 3: Multiply_3x3(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
431  case 4: Multiply_4x4(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
432 #endif
433  default:
434 #ifdef NEKTAR_USING_SMV
435  if (m_blkDim <= LIBSMV_MAX_RANK)
436  {
437  Multiply_libsmv(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
438  }
439  else
440 #endif
441  {
442  Multiply_generic(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
443  }
444  }
445  }
446 
447 
448  // This function is defined for backward compatibility with
449  // NIST storage classes
450  template<typename DataType>
452  const DataVectorType &in,
453  DataVectorType &out)
454  {
455  const double* b = &in[0];
456  double* c = &out[0];
457  const double* val = &m_val[0];
458  const int* bindx = (int*)&m_indx[0];
459  const int* bpntrb = (int*)&m_pntr[0];
460  const int* bpntre = (int*)&m_pntr[0]+1;
461  const int mb = m_blkRows;
462  const int kb = m_blkCols;
463 
464  switch(m_blkDim)
465  {
466  case 1: Multiply_1x1(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
467  case 2: Multiply_2x2(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
468 #ifndef NEKTAR_USING_SMV
469  case 3: Multiply_3x3(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
470  case 4: Multiply_4x4(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
471 #endif
472  default:
473 #ifdef NEKTAR_USING_SMV
474  if (m_blkDim <= LIBSMV_MAX_RANK)
475  {
476  Multiply_libsmv(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
477  }
478  else
479 #endif
480  {
481  Multiply_generic(mb,kb,val,bindx,bpntrb,bpntre,b,c); return;
482  }
483  }
484  }
485 
486  /// Zero-based CSR multiply.
487  /// Essentially this is slightly modified copy-paste from
488  /// NIST Sparse Blas 0.9b routine CSR_VecMult_CAB_double()
489  template<typename DataType>
491  const int mb,
492  const int kb,
493  const double* val,
494  const int* bindx,
495  const int* bpntrb,
496  const int* bpntre,
497  const double* b,
498  double* c)
499  {
500  for (int i=0;i!=mb;i++)
501  {
502  double t = 0;
503  int jb = bpntrb[i];
504  int je = bpntre[i];
505  for (int j=jb;j!=je;j++)
506  {
507  t += b[bindx[j]] * (*val++);
508  }
509  c[i] = t;
510  }
511  }
512 
513  /// Zero-based BSR multiply unrolled for 2x2 blocks.
514  /// Essentially this is slightly optimised copy-paste from
515  /// NIST Sparse Blas 0.9b routine BSR_VecMult_BAB_double()
516  template<typename DataType>
518  const int mb,
519  const int kb,
520  const double* val,
521  const int* bindx,
522  const int* bpntrb,
523  const int* bpntre,
524  const double* b,
525  double* c)
526  {
527  const int lb = 2;
528 
529  const double *pval = val;
530  double *pc=c;
531 
532  for (int i=0;i!=mb;i++)
533  {
534  int jb = bpntrb[i];
535  int je = bpntre[i];
536  pc[0] = 0;
537  pc[1] = 0;
538  for (int j=jb;j!=je;j++)
539  {
540  int bs=bindx[j]*lb;
541  const double *pb = &b[bs];
542 
543  pc[0] += pb[0] * pval[0] + pb[1] * pval[2];
544  pc[1] += pb[0] * pval[1] + pb[1] * pval[3];
545  pval += 4;
546  }
547  pc += 2;
548  }
549  }
550 
551  /// Zero-based BSR multiply unrolled for 3x3 blocks
552  template<typename DataType>
554  const int mb,
555  const int kb,
556  const double* val,
557  const int* bindx,
558  const int* bpntrb,
559  const int* bpntre,
560  const double* b,
561  double* c)
562  {
563  const int lb = 3;
564 
565  const double *pval = val;
566  double *pc=c;
567 
568  for (int i=0;i!=mb;i++)
569  {
570  int jb = bpntrb[i];
571  int je = bpntre[i];
572  pc[0] = 0;
573  pc[1] = 0;
574  pc[2] = 0;
575  for (int j=jb;j!=je;j++)
576  {
577  int bs=bindx[j]*lb;
578  const double *pb = &b[bs];
579 
580  pc[0] += pb[0] * pval[0] + pb[1] * pval[3] + pb[2] * pval[6];
581  pc[1] += pb[0] * pval[1] + pb[1] * pval[4] + pb[2] * pval[7];
582  pc[2] += pb[0] * pval[2] + pb[1] * pval[5] + pb[2] * pval[8];
583  pval += 9;
584  }
585  pc += 3;
586  }
587  }
588 
589  /// Zero-based BSR multiply unrolled for 4x4 blocks
590  template<typename DataType>
592  const int mb,
593  const int kb,
594  const double* val,
595  const int* bindx,
596  const int* bpntrb,
597  const int* bpntre,
598  const double* b,
599  double* c)
600  {
601  const int lb = 4;
602 
603  const double *pval = val;
604  double *pc=c;
605 
606  for (int i=0;i!=mb;i++)
607  {
608  int jb = bpntrb[i];
609  int je = bpntre[i];
610  pc[0] = 0;
611  pc[1] = 0;
612  pc[2] = 0;
613  pc[3] = 0;
614  for (int j=jb;j!=je;j++)
615  {
616  int bs=bindx[j]*lb;
617  const double *pb = &b[bs];
618 
619  pc[0] += pb[0] * pval[0] + pb[1] * pval[4] + pb[2] * pval[ 8] + pb[3] * pval[12];
620  pc[1] += pb[0] * pval[1] + pb[1] * pval[5] + pb[2] * pval[ 9] + pb[3] * pval[13];
621  pc[2] += pb[0] * pval[2] + pb[1] * pval[6] + pb[2] * pval[10] + pb[3] * pval[14];
622  pc[3] += pb[0] * pval[3] + pb[1] * pval[7] + pb[2] * pval[11] + pb[3] * pval[15];
623  pval += 16;
624  }
625  pc += 4;
626  }
627  }
628 
629 #ifdef NEKTAR_USING_SMV
630  /// Generic zero-based BSR multiply
631  template<typename DataType>
633  const int mb,
634  const int kb,
635  const double* val,
636  const int* bindx,
637  const int* bpntrb,
638  const int* bpntre,
639  const double* b,
640  double* c)
641  {
642  const int lb = m_blkDim;
643 
644  const double *pval = val;
645  const int mm=lb*lb;
646 
647  double *pc=c;
648  for (int i=0;i!=mb*lb;i++) *pc++ = 0;
649 
650  pc=c;
651  for (int i=0;i!=mb;i++)
652  {
653  int jb = bpntrb[i];
654  int je = bpntre[i];
655  for (int j=jb;j!=je;j++)
656  {
657  m_mvKernel(pval,&b[bindx[j]*lb],pc);
658  pval+=mm;
659  }
660  pc += lb;
661  }
662  }
663 #endif
664 
665  /// Generic zero-based BSR multiply for higher matrix ranks
666  template<typename DataType>
668  const int mb,
669  const int kb,
670  const double* val,
671  const int* bindx,
672  const int* bpntrb,
673  const int* bpntre,
674  const double* b,
675  double* c)
676  {
677  const int lb = m_blkDim;
678  const double *pval = val;
679  const int mm=lb*lb;
680  double *pc=c;
681  for (int i=0;i!=mb*lb;i++) *pc++ = 0;
682 
683  pc=c;
684  for (int i=0;i!=mb;i++)
685  {
686  int jb = bpntrb[i];
687  int je = bpntre[i];
688  for (int j=jb;j!=je;j++)
689  {
690  Blas::Dgemv('N',lb,lb,1.0,pval,lb,&b[bindx[j]*lb],1,1.0,pc,1);
691  pval+=mm;
692  }
693  pc += lb;
694  }
695  }
696 
697 
698 
699  // converts input vector of BCO blocks
700  // to the internal BSR representation
701  template<typename DataType>
703  const IndexType blkRows,
704  const IndexType blkColumns,
705  const IndexType blkDim,
706  const BCOMatType& bcoMat)
707  {
708  IndexType i;
709  BCOMatTypeConstIt entry;
710  IndexType rowcoord;
711  IndexType colcoord;
712  BCOEntryType value;
713 
714  std::vector<IndexType> tmp(blkRows+1,0);
715 
716  // calculate the number of entries on each row
717  // and store the result in tmp
718  for(entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
719  {
720  rowcoord = (entry->first).first;
721  tmp[rowcoord]++;
722  }
723  // Based upon this information, fill the array m_pntr
724  // which basically contains the offset of each row's
725  // first entry in the other arrays m_val and m_indx
726  m_pntr[0] = 0;
727  for(i = 0; i < blkRows; i++)
728  {
729  m_pntr[i+1] = m_pntr[i] + tmp[i];
730  }
731 
732  // Copy the values of m_pntr into tmp as this will be needed
733  // in the following step
734  std::copy(&m_pntr[0],&m_pntr[0]+blkRows+1,&tmp[0]);
735 
736  // Now, fill in index and value entries.
737  for(entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
738  {
739  rowcoord = (entry->first).first;
740  colcoord = (entry->first).second;
741  value = entry->second;
742  int blkSize = blkDim*blkDim;
743 
744  for (i = 0; i < blkSize; i++)
745  {
746  m_val [ blkSize*(tmp[rowcoord]) + i ] = value[i];
747  if (std::abs(value[i]) > NekConstants::kNekSparseNonZeroTol) m_nnz++;
748  }
749 
750  m_indx[ tmp[rowcoord] ] = colcoord;
751  tmp[rowcoord]++;
752  }
753  }
754 
755 
756  // explicit instantiation
757  template class StorageSmvBsr<NekDouble>;
758 
759 
760 } // namespace
761