50 #include <boost/preprocessor/iteration/local.hpp>
51 #include <boost/lexical_cast.hpp>
56 template<
typename DataType>
97 std::cout <<
"const_iterator: 'begin' out stored values bounds" << std::endl;
109 template<
typename DataType>
115 const IndexType block = storageIndex / elms;
116 const IndexType loc_offset = storageIndex % elms;
124 while(block >=
m_pntr[block_row+1])
132 coord.first = block_row*m_blkDim + loc_row;
133 coord.second = block_col*m_blkDim + loc_col;
138 template<
typename DataType>
142 m_begin(src.m_begin),
150 template<
typename DataType>
155 template<
typename DataType>
163 template<
typename DataType>
170 template<
typename DataType>
176 template<
typename DataType>
182 template<
typename DataType>
188 template<
typename DataType>
194 template<
typename DataType>
204 m_iter.nnzindex = m_end;
208 m_iter.second =
m_val[m_iter.storageindex];
210 CoordType c = storageIndexToFullCoord(m_iter.storageindex);
211 m_iter.first.first = c.first;
212 m_iter.first.second = c.second;
219 template<
typename DataType>
242 std::cout <<
"matrix type not implemented" << std::endl;
246 #ifdef NEKTAR_USING_SMV
251 #define BOOST_PP_LOCAL_MACRO(n) case n: m_mvKernel = Smv::F77NAME(smv_##n); break;
252 #define BOOST_PP_LOCAL_LIMITS (1, LIBSMV_MAX_RANK)
253 #include BOOST_PP_LOCAL_ITERATE()
261 template<
typename DataType>
263 m_matType(src.m_matType),
264 m_blkRows (src.m_blkRows),
265 m_blkCols (src.m_blkCols),
266 m_blkDim(src.m_blkDim),
275 template<
typename DataType>
281 template<
typename DataType>
284 return m_blkRows*m_blkDim;
287 template<
typename DataType>
290 return m_blkCols*m_blkDim;
293 template<
typename DataType>
299 template<
typename DataType>
306 template<
typename DataType>
309 return m_bnnz*m_blkDim*m_blkDim;
312 template<
typename DataType>
319 template<
typename DataType>
322 return sizeof(
DataType) *m_val.capacity() +
330 template<
typename DataType>
340 IndexType offset = m_pntr[brow]*m_blkDim*m_blkDim;
344 for( i = m_pntr[brow]; i < m_pntr[brow+1]; i++)
346 if(bcol == m_indx[i])
348 return m_val[offset+lrow + lcol*m_blkDim];
350 offset += m_blkDim*m_blkDim;
353 return defaultReturnValue;
357 template<
typename DataType>
360 return const_iterator(m_matType, 0, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
363 template<
typename DataType>
366 return const_iterator(m_matType, m_nnz, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
374 template<
typename DataType>
379 const double* b = &in[0];
381 const double* val = &m_val[0];
382 const int* bindx = (
int*)&m_indx[0];
383 const int* bpntrb = (
int*)&m_pntr[0];
384 const int* bpntre = (
int*)&m_pntr[0]+1;
385 const int mb = m_blkRows;
386 const int kb = m_blkCols;
390 case 1: Multiply_1x1(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
391 case 2: Multiply_2x2(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
392 #ifndef NEKTAR_USING_SMV
393 case 3: Multiply_3x3(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
394 case 4: Multiply_4x4(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
397 #ifdef NEKTAR_USING_SMV
398 if (m_blkDim <= LIBSMV_MAX_RANK)
400 Multiply_libsmv(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
405 Multiply_generic(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
411 template<
typename DataType>
416 const double* b = &in[0];
418 const double* val = &m_val[0];
419 const int* bindx = (
int*)&m_indx[0];
420 const int* bpntrb = (
int*)&m_pntr[0];
421 const int* bpntre = (
int*)&m_pntr[0]+1;
422 const int mb = m_blkRows;
423 const int kb = m_blkCols;
427 case 1: Multiply_1x1(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
428 case 2: Multiply_2x2(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
429 #ifndef NEKTAR_USING_SMV
430 case 3: Multiply_3x3(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
431 case 4: Multiply_4x4(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
434 #ifdef NEKTAR_USING_SMV
435 if (m_blkDim <= LIBSMV_MAX_RANK)
437 Multiply_libsmv(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
442 Multiply_generic(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
450 template<
typename DataType>
455 const double* b = &in[0];
457 const double* val = &m_val[0];
458 const int* bindx = (
int*)&m_indx[0];
459 const int* bpntrb = (
int*)&m_pntr[0];
460 const int* bpntre = (
int*)&m_pntr[0]+1;
461 const int mb = m_blkRows;
462 const int kb = m_blkCols;
466 case 1: Multiply_1x1(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
467 case 2: Multiply_2x2(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
468 #ifndef NEKTAR_USING_SMV
469 case 3: Multiply_3x3(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
470 case 4: Multiply_4x4(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
473 #ifdef NEKTAR_USING_SMV
474 if (m_blkDim <= LIBSMV_MAX_RANK)
476 Multiply_libsmv(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
481 Multiply_generic(mb,kb,val,bindx,bpntrb,bpntre,b,c);
return;
489 template<
typename DataType>
500 for (
int i=0;i!=mb;i++)
505 for (
int j=jb;j!=je;j++)
507 t += b[bindx[j]] * (*val++);
516 template<
typename DataType>
529 const double *pval = val;
532 for (
int i=0;i!=mb;i++)
538 for (
int j=jb;j!=je;j++)
541 const double *pb = &b[bs];
543 pc[0] += pb[0] * pval[0] + pb[1] * pval[2];
544 pc[1] += pb[0] * pval[1] + pb[1] * pval[3];
552 template<
typename DataType>
565 const double *pval = val;
568 for (
int i=0;i!=mb;i++)
575 for (
int j=jb;j!=je;j++)
578 const double *pb = &b[bs];
580 pc[0] += pb[0] * pval[0] + pb[1] * pval[3] + pb[2] * pval[6];
581 pc[1] += pb[0] * pval[1] + pb[1] * pval[4] + pb[2] * pval[7];
582 pc[2] += pb[0] * pval[2] + pb[1] * pval[5] + pb[2] * pval[8];
590 template<
typename DataType>
603 const double *pval = val;
606 for (
int i=0;i!=mb;i++)
614 for (
int j=jb;j!=je;j++)
617 const double *pb = &b[bs];
619 pc[0] += pb[0] * pval[0] + pb[1] * pval[4] + pb[2] * pval[ 8] + pb[3] * pval[12];
620 pc[1] += pb[0] * pval[1] + pb[1] * pval[5] + pb[2] * pval[ 9] + pb[3] * pval[13];
621 pc[2] += pb[0] * pval[2] + pb[1] * pval[6] + pb[2] * pval[10] + pb[3] * pval[14];
622 pc[3] += pb[0] * pval[3] + pb[1] * pval[7] + pb[2] * pval[11] + pb[3] * pval[15];
629 #ifdef NEKTAR_USING_SMV
631 template<
typename DataType>
642 const int lb = m_blkDim;
644 const double *pval = val;
648 for (
int i=0;i!=mb*lb;i++) *pc++ = 0;
651 for (
int i=0;i!=mb;i++)
655 for (
int j=jb;j!=je;j++)
657 m_mvKernel(pval,&b[bindx[j]*lb],pc);
666 template<
typename DataType>
677 const int lb = m_blkDim;
678 const double *pval = val;
681 for (
int i=0;i!=mb*lb;i++) *pc++ = 0;
684 for (
int i=0;i!=mb;i++)
688 for (
int j=jb;j!=je;j++)
690 Blas::Dgemv(
'N',lb,lb,1.0,pval,lb,&b[bindx[j]*lb],1,1.0,pc,1);
701 template<
typename DataType>
714 std::vector<IndexType> tmp(blkRows+1,0);
718 for(entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
720 rowcoord = (entry->first).first;
727 for(i = 0; i < blkRows; i++)
729 m_pntr[i+1] = m_pntr[i] + tmp[i];
734 std::copy(&m_pntr[0],&m_pntr[0]+blkRows+1,&tmp[0]);
737 for(entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
739 rowcoord = (entry->first).first;
740 colcoord = (entry->first).second;
741 value = entry->second;
742 int blkSize = blkDim*blkDim;
744 for (i = 0; i < blkSize; i++)
746 m_val [ blkSize*(tmp[rowcoord]) + i ] = value[i];
750 m_indx[ tmp[rowcoord] ] = colcoord;
const IterType & operator*()
const DataVectorType & m_val
void Multiply_generic(const int mb, const int kb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
const_iterator end() const
const_iterator & operator++()
BCOMatType::const_iterator BCOMatTypeConstIt
size_type num_elements() const
Returns the array's size.
const_iterator begin() const
const DataType GetFillInRatio() const
const IterType * operator->()
std::map< CoordType, BCOEntryType > BCOMatType
void Multiply_2x2(const int mb, const int kb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
const IndexType GetNumNonZeroEntries() const
const IndexType GetRows() const
const IndexType GetNumStoredDoubles() const
const bool operator!=(const const_iterator &rhs)
void Multiply_1x1(const int mb, const int kb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0...
CoordType storageIndexToFullCoord(IndexType storageIndex)
const IndexType GetBlkSize() const
const boost::call_traits< DataType >::const_reference GetValue(IndexType row, IndexType column) const
const IndexType GetColumns() const
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
const bool operator==(const const_iterator &rhs)
static const NekDouble kNekSparseNonZeroTol
void Multiply_4x4(const int mb, const int kb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
const size_t GetMemoryUsage(IndexType nnz, IndexType nRows) const
void Multiply_3x3(const int mb, const int kb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
void Multiply(const DataType *in, DataType *out)
std::pair< IndexType, IndexType > CoordType
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
void processBcoInput(const IndexType blkRows, const IndexType blkColumns, const IndexType blkDim, const BCOMatType &bcoMat)