Nektar++
StorageSmvBsr.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: StorageSmvBsr.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description: 0-based sparse BSR storage class with own unrolled multiply
32// kernels.
33//
34///////////////////////////////////////////////////////////////////////////////
35
36#include <cmath>
37#include <fstream>
38#include <map>
39#include <utility>
40#include <vector>
41
46
47namespace Nektar
48{
49
50template <typename DataType>
52 MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim,
53 const DataVectorType &val, const IndexVectorType &indx,
54 const IndexVectorType &pntr)
55 : m_matType(matType), m_iter(), m_begin(begin), m_end(end),
56 m_blkDim(blkDim), m_val(val), m_indx(indx), m_pntr(pntr)
57{
59
60 // Here we rewind the iterator to the 'begin'-th
61 // nonzero double value. Consecutive nonzeros are
62 // to lie within dense block and when it's done,
63 // jump to the next dense block.
64 // NB: m_val stores some explicit zeros that need to be skept.
65 if (begin < end)
66 {
67 // determine offset of 'begin'-th nonzero value
68
70 m_iter.nnzindex = 0;
71 while (m_iter.nnzindex < begin)
72 {
73 // explicit zero?
75 {
77 }
80 {
81 std::cout << "const_iterator: 'begin' out stored values bounds"
82 << std::endl;
83 throw 1;
84 }
85 }
87
89 m_iter.first.first = c.first;
90 m_iter.first.second = c.second;
91 }
92}
93
94template <typename DataType>
96 IndexType storageIndex)
97{
98 // find local row and column indices within this block
99
100 const IndexType elms = m_blkDim * m_blkDim;
101 const IndexType block = storageIndex / elms;
102 const IndexType loc_offset = storageIndex % elms;
103 const IndexType loc_row = loc_offset % m_blkDim;
104 const IndexType loc_col = loc_offset / m_blkDim;
105
106 // find block row and block column
107
108 IndexType block_col = m_indx[block];
109 IndexType block_row = 0;
110 while (block >= m_pntr[block_row + 1])
111 {
112 block_row++;
113 }
114
115 // full matrix coordinates of this nonzero entry
116
117 CoordType coord;
118 coord.first = block_row * m_blkDim + loc_row;
119 coord.second = block_col * m_blkDim + loc_col;
120 return coord;
121}
122
123template <typename DataType>
125 const const_iterator &src)
126 : m_matType(src.m_matType), m_iter(src.m_iter), m_begin(src.m_begin),
127 m_end(src.m_end), m_val(src.m_val), m_indx(src.m_indx), m_pntr(src.m_pntr)
128{
129}
130
131template <typename DataType>
133{
134}
135
136template <typename DataType>
138 DataType>::const_iterator::operator++(int)
139{
140 const_iterator out = *this;
141 forward();
142 return out;
143}
144
145template <typename DataType>
147 DataType>::const_iterator::operator++()
148{
149 forward();
150 return *this;
151}
152
153template <typename DataType>
155 DataType>::const_iterator::operator*()
156{
157 return m_iter;
158}
159
160template <typename DataType>
162 DataType>::const_iterator::operator->()
163{
164 return &m_iter;
165}
166
167template <typename DataType>
169 const const_iterator &rhs)
170{
171 return m_iter.nnzindex == rhs.m_iter.nnzindex;
172}
173
174template <typename DataType>
176 const const_iterator &rhs)
177{
178 return !(m_iter.nnzindex == rhs.m_iter.nnzindex);
179}
180
181template <typename DataType>
183{
184 while ((m_iter.storageindex + 1 < m_val.size()) &&
185 (m_val[++m_iter.storageindex] <= NekConstants::kNekSparseNonZeroTol))
186 {
187 ;
188 }
189
190 m_iter.nnzindex++;
191
192 if (m_iter.storageindex >= m_val.size())
193 {
194 m_iter.nnzindex = m_end;
195 return;
196 }
197
198 m_iter.second = m_val[m_iter.storageindex];
199
200 CoordType c = storageIndexToFullCoord(m_iter.storageindex);
201 m_iter.first.first = c.first;
202 m_iter.first.second = c.second;
203}
204
205template <typename DataType>
207 const IndexType blkCols,
208 const IndexType blkDim,
209 const BCOMatType &bcoMat,
210 const MatrixStorage matType)
211 : m_matType(matType), m_blkRows(blkRows), m_blkCols(blkCols),
212 m_blkDim(blkDim), m_bnnz(bcoMat.size()), m_nnz(0),
213 m_val(m_bnnz * blkDim * blkDim), m_indx(m_bnnz + 1), m_pntr(blkRows + 1)
214{
215 if (matType != Nektar::eFULL)
216 {
217 /// \todo: - iterators over strictly lower-triangular part
218 /// - number off-diagonal elements calculation
219 /// - clear distinction between stored and factual nonzeros
220 /// (row density)
221 std::cout << "matrix type not implemented" << std::endl;
222 throw 1;
223 }
224
225 processBcoInput(blkRows, blkDim, bcoMat);
226}
227
228template <typename DataType>
230 : m_matType(src.m_matType), m_blkRows(src.m_blkRows),
231 m_blkCols(src.m_blkCols), m_blkDim(src.m_blkDim), m_bnnz(src.m_bnnz),
232 m_nnz(src.m_nnz), m_val(src.m_val), m_indx(src.m_indx), m_pntr(src.m_pntr)
233{
234}
235
236template <typename DataType> StorageSmvBsr<DataType>::~StorageSmvBsr()
237{
238}
239
240template <typename DataType> IndexType StorageSmvBsr<DataType>::GetRows() const
241{
242 return m_blkRows * m_blkDim;
243}
244
245template <typename DataType>
247{
248 return m_blkCols * m_blkDim;
249}
250
251template <typename DataType>
253{
254 return m_nnz;
255}
256
257template <typename DataType>
259{
260 return m_blkDim;
261}
262
263template <typename DataType>
265{
266 return m_bnnz * m_blkDim * m_blkDim;
267}
268
269template <typename DataType>
271{
272 return (DataType)(m_bnnz * m_blkDim * m_blkDim) / (DataType)m_nnz;
273}
274
275template <typename DataType>
277{
278 return sizeof(DataType) * m_val.capacity() +
279 sizeof(IndexType) * m_indx.capacity() +
280 sizeof(IndexType) * m_pntr.capacity() +
281 sizeof(IndexType) * 5 + //< blkRows + blkCols + blkDim + nnz + bnnz
282 sizeof(MatrixStorage);
283}
284
285template <typename DataType>
287 IndexType gcolumn) const
288{
289 IndexType brow = grow / m_blkDim;
290 IndexType bcol = gcolumn / m_blkDim;
291 IndexType lrow = grow % m_blkDim;
292 IndexType lcol = gcolumn % m_blkDim;
293
294 // rewind to the first entry of the first
295 // block in the current block row
296 IndexType offset = m_pntr[brow] * m_blkDim * m_blkDim;
297
298 IndexType i;
299 static DataType defaultReturnValue;
300 for (i = m_pntr[brow]; i < m_pntr[brow + 1]; i++)
301 {
302 if (bcol == m_indx[i])
303 {
304 return m_val[offset + lrow + lcol * m_blkDim];
305 }
306 offset += m_blkDim * m_blkDim;
307 }
308
309 return defaultReturnValue;
310}
311
312template <typename DataType>
314 DataType>::begin() const
315{
316 return const_iterator(m_matType, 0, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
317}
318
319template <typename DataType>
321 const
322{
323 return const_iterator(m_matType, m_nnz, m_nnz, m_blkDim, m_val, m_indx,
324 m_pntr);
325}
326
327// General non-symmetric zero-based BSR multiply
328// C = A*B where A is matrix and B is vector.
329// No scaling. Previous content of C is discarded.
330template <typename DataType>
332 DataVectorType &out)
333{
334 const double *b = &in[0];
335 double *c = &out[0];
336 const double *val = &m_val[0];
337 const int *bindx = (int *)&m_indx[0];
338 const int *bpntrb = (int *)&m_pntr[0];
339 const int *bpntre = (int *)&m_pntr[0] + 1;
340 const int mb = m_blkRows;
341
342 switch (m_blkDim)
343 {
344 case 1:
345 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
346 return;
347 case 2:
348 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
349 return;
350 default:
351 {
352 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
353 return;
354 }
355 }
356}
357
358template <typename DataType>
360{
361 const double *b = &in[0];
362 double *c = &out[0];
363 const double *val = &m_val[0];
364 const int *bindx = (int *)&m_indx[0];
365 const int *bpntrb = (int *)&m_pntr[0];
366 const int *bpntre = (int *)&m_pntr[0] + 1;
367 const int mb = m_blkRows;
368
369 switch (m_blkDim)
370 {
371 case 1:
372 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
373 return;
374 case 2:
375 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
376 return;
377 default:
378 {
379 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
380 return;
381 }
382 }
383}
384
385// This function is defined for backward compatibility with
386// NIST storage classes
387template <typename DataType>
389 DataVectorType &out)
390{
391 const double *b = &in[0];
392 double *c = &out[0];
393 const double *val = &m_val[0];
394 const int *bindx = (int *)&m_indx[0];
395 const int *bpntrb = (int *)&m_pntr[0];
396 const int *bpntre = (int *)&m_pntr[0] + 1;
397 const int mb = m_blkRows;
398
399 switch (m_blkDim)
400 {
401 case 1:
402 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
403 return;
404 case 2:
405 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
406 return;
407 default:
408 {
409 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
410 return;
411 }
412 }
413}
414
415/// Zero-based CSR multiply.
416/// Essentially this is slightly modified copy-paste from
417/// NIST Sparse Blas 0.9b routine CSR_VecMult_CAB_double()
418template <typename DataType>
419void StorageSmvBsr<DataType>::Multiply_1x1(const int mb, const double *val,
420 const int *bindx, const int *bpntrb,
421 const int *bpntre, const double *b,
422 double *c)
423{
424 for (int i = 0; i != mb; i++)
425 {
426 double t = 0;
427 int jb = bpntrb[i];
428 int je = bpntre[i];
429 for (int j = jb; j != je; j++)
430 {
431 t += b[bindx[j]] * (*val++);
432 }
433 c[i] = t;
434 }
435}
436
437/// Zero-based BSR multiply unrolled for 2x2 blocks.
438/// Essentially this is slightly optimised copy-paste from
439/// NIST Sparse Blas 0.9b routine BSR_VecMult_BAB_double()
440template <typename DataType>
441void StorageSmvBsr<DataType>::Multiply_2x2(const int mb, const double *val,
442 const int *bindx, const int *bpntrb,
443 const int *bpntre, const double *b,
444 double *c)
445{
446 const int lb = 2;
447
448 const double *pval = val;
449 double *pc = c;
450
451 for (int i = 0; i != mb; i++)
452 {
453 int jb = bpntrb[i];
454 int je = bpntre[i];
455 pc[0] = 0;
456 pc[1] = 0;
457 for (int j = jb; j != je; j++)
458 {
459 int bs = bindx[j] * lb;
460 const double *pb = &b[bs];
461
462 pc[0] += pb[0] * pval[0] + pb[1] * pval[2];
463 pc[1] += pb[0] * pval[1] + pb[1] * pval[3];
464 pval += 4;
465 }
466 pc += 2;
467 }
468}
469
470/// Zero-based BSR multiply unrolled for 3x3 blocks
471template <typename DataType>
472void StorageSmvBsr<DataType>::Multiply_3x3(const int mb, const double *val,
473 const int *bindx, const int *bpntrb,
474 const int *bpntre, const double *b,
475 double *c)
476{
477 const int lb = 3;
478
479 const double *pval = val;
480 double *pc = c;
481
482 for (int i = 0; i != mb; i++)
483 {
484 int jb = bpntrb[i];
485 int je = bpntre[i];
486 pc[0] = 0;
487 pc[1] = 0;
488 pc[2] = 0;
489 for (int j = jb; j != je; j++)
490 {
491 int bs = bindx[j] * lb;
492 const double *pb = &b[bs];
493
494 pc[0] += pb[0] * pval[0] + pb[1] * pval[3] + pb[2] * pval[6];
495 pc[1] += pb[0] * pval[1] + pb[1] * pval[4] + pb[2] * pval[7];
496 pc[2] += pb[0] * pval[2] + pb[1] * pval[5] + pb[2] * pval[8];
497 pval += 9;
498 }
499 pc += 3;
500 }
501}
502
503/// Zero-based BSR multiply unrolled for 4x4 blocks
504template <typename DataType>
505void StorageSmvBsr<DataType>::Multiply_4x4(const int mb, const double *val,
506 const int *bindx, const int *bpntrb,
507 const int *bpntre, const double *b,
508 double *c)
509{
510 const int lb = 4;
511
512 const double *pval = val;
513 double *pc = c;
514
515 for (int i = 0; i != mb; i++)
516 {
517 int jb = bpntrb[i];
518 int je = bpntre[i];
519 pc[0] = 0;
520 pc[1] = 0;
521 pc[2] = 0;
522 pc[3] = 0;
523 for (int j = jb; j != je; j++)
524 {
525 int bs = bindx[j] * lb;
526 const double *pb = &b[bs];
527
528 pc[0] += pb[0] * pval[0] + pb[1] * pval[4] + pb[2] * pval[8] +
529 pb[3] * pval[12];
530 pc[1] += pb[0] * pval[1] + pb[1] * pval[5] + pb[2] * pval[9] +
531 pb[3] * pval[13];
532 pc[2] += pb[0] * pval[2] + pb[1] * pval[6] + pb[2] * pval[10] +
533 pb[3] * pval[14];
534 pc[3] += pb[0] * pval[3] + pb[1] * pval[7] + pb[2] * pval[11] +
535 pb[3] * pval[15];
536 pval += 16;
537 }
538 pc += 4;
539 }
540}
541
542/// Generic zero-based BSR multiply for higher matrix ranks
543template <typename DataType>
544void StorageSmvBsr<DataType>::Multiply_generic(const int mb, const double *val,
545 const int *bindx,
546 const int *bpntrb,
547 const int *bpntre,
548 const double *b, double *c)
549{
550 const int lb = m_blkDim;
551 const double *pval = val;
552 const int mm = lb * lb;
553 double *pc = c;
554 for (int i = 0; i != mb * lb; i++)
555 {
556 *pc++ = 0;
557 }
558
559 pc = c;
560 for (int i = 0; i != mb; i++)
561 {
562 int jb = bpntrb[i];
563 int je = bpntre[i];
564 for (int j = jb; j != je; j++)
565 {
566 Blas::Dgemv('N', lb, lb, 1.0, pval, lb, &b[bindx[j] * lb], 1, 1.0,
567 pc, 1);
568 pval += mm;
569 }
570 pc += lb;
571 }
572}
573
574// converts input vector of BCO blocks
575// to the internal BSR representation
576template <typename DataType>
578 const IndexType blkDim,
579 const BCOMatType &bcoMat)
580{
581 IndexType i;
582 BCOMatTypeConstIt entry;
583 IndexType rowcoord;
584 IndexType colcoord;
585 BCOEntryType value;
586
587 std::vector<IndexType> tmp(blkRows + 1, 0);
588
589 // calculate the number of entries on each row
590 // and store the result in tmp
591 for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
592 {
593 rowcoord = (entry->first).first;
594 tmp[rowcoord]++;
595 }
596 // Based upon this information, fill the array m_pntr
597 // which basically contains the offset of each row's
598 // first entry in the other arrays m_val and m_indx
599 m_pntr[0] = 0;
600 for (i = 0; i < blkRows; i++)
601 {
602 m_pntr[i + 1] = m_pntr[i] + tmp[i];
603 }
604
605 // Copy the values of m_pntr into tmp as this will be needed
606 // in the following step
607 std::copy(&m_pntr[0], &m_pntr[0] + blkRows + 1, &tmp[0]);
608
609 // Now, fill in index and value entries.
610 for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
611 {
612 rowcoord = (entry->first).first;
613 colcoord = (entry->first).second;
614 value = entry->second;
615 int blkSize = blkDim * blkDim;
616
617 for (i = 0; i < blkSize; i++)
618 {
619 m_val[blkSize * (tmp[rowcoord]) + i] = value[i];
621 {
622 m_nnz++;
623 }
624 }
625
626 m_indx[tmp[rowcoord]] = colcoord;
627 tmp[rowcoord]++;
628 }
629}
630
631// explicit instantiation
632template class StorageSmvBsr<NekDouble>;
633
634} // namespace Nektar
size_type size() const
Returns the array's size.
bool operator!=(const const_iterator &rhs)
CoordType storageIndexToFullCoord(IndexType storageIndex)
bool operator==(const const_iterator &rhs)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
const_iterator end() const
IndexType GetNumNonZeroEntries() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
IndexVectorType m_indx
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
DataType GetFillInRatio() const
size_t GetMemoryUsage() const
void Multiply(const DataType *in, DataType *out)
IndexType GetColumns() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0....
IndexType GetBlkSize() const
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
IndexType GetRows() const
const DataType & GetValue(IndexType row, IndexType column) const
const_iterator begin() const
IndexVectorType m_pntr
IndexType GetNumStoredDoubles() const
static void Dgemv(const char &trans, const int &m, const int &n, const double &alpha, const double *a, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy)
BLAS level 2: Matrix vector multiply y = alpha A x plus beta y where A[m x n].
Definition: Blas.hpp:211
def copy(self)
Definition: pycml.py:2663
static const NekDouble kNekSparseNonZeroTol
unsigned int IndexType
std::map< CoordType, BCOEntryType > BCOMatType
std::pair< IndexType, IndexType > CoordType
BCOMatType::const_iterator BCOMatTypeConstIt
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298