Nektar++
StorageSmvBsr.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: StorageSmvBsr.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description: 0-based sparse BSR storage class with own unrolled multiply
32// kernels.
33//
34///////////////////////////////////////////////////////////////////////////////
35
36#include <cmath>
37#include <fstream>
38#include <map>
39#include <utility>
40#include <vector>
41
46
47namespace Nektar
48{
49
50template <typename DataType>
52 MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim,
53 const DataVectorType &val, const IndexVectorType &indx,
54 const IndexVectorType &pntr)
55 : m_matType(matType), m_iter(), m_begin(begin), m_end(end),
56 m_blkDim(blkDim), m_val(val), m_indx(indx), m_pntr(pntr)
57{
59
60 // Here we rewind the iterator to the 'begin'-th
61 // nonzero double value. Consecutive nonzeros are
62 // to lie within dense block and when it's done,
63 // jump to the next dense block.
64 // NB: m_val stores some explicit zeros that need to be skept.
65 if (begin < end)
66 {
67 // determine offset of 'begin'-th nonzero value
68
70 m_iter.nnzindex = 0;
71 while (m_iter.nnzindex < begin)
72 {
73 // explicit zero?
75 {
77 }
80 {
81 std::cout << "const_iterator: 'begin' out stored values bounds"
82 << std::endl;
83 throw 1;
84 }
85 }
87
89 m_iter.first.first = c.first;
90 m_iter.first.second = c.second;
91 }
92}
93
94template <typename DataType>
96 IndexType storageIndex)
97{
98 // find local row and column indices within this block
99
100 const IndexType elms = m_blkDim * m_blkDim;
101 const IndexType block = storageIndex / elms;
102 const IndexType loc_offset = storageIndex % elms;
103 const IndexType loc_row = loc_offset % m_blkDim;
104 const IndexType loc_col = loc_offset / m_blkDim;
105
106 // find block row and block column
107
108 IndexType block_col = m_indx[block];
109 IndexType block_row = 0;
110 while (block >= m_pntr[block_row + 1])
111 {
112 block_row++;
113 }
114
115 // full matrix coordinates of this nonzero entry
116
117 CoordType coord;
118 coord.first = block_row * m_blkDim + loc_row;
119 coord.second = block_col * m_blkDim + loc_col;
120 return coord;
121}
122
123template <typename DataType>
125 const const_iterator &src)
126 : m_matType(src.m_matType), m_iter(src.m_iter), m_begin(src.m_begin),
127 m_end(src.m_end), m_val(src.m_val), m_indx(src.m_indx), m_pntr(src.m_pntr)
128{
129}
130
131template <typename DataType>
133{
134}
135
136template <typename DataType>
138 DataType>::const_iterator::operator++(int)
139{
140 const_iterator out = *this;
141 forward();
142 return out;
143}
144
145template <typename DataType>
147 DataType>::const_iterator::operator++()
148{
149 forward();
150 return *this;
151}
152
153template <typename DataType>
155 DataType>::const_iterator::operator*()
156{
157 return m_iter;
158}
159
160template <typename DataType>
162 DataType>::const_iterator::operator->()
163{
164 return &m_iter;
165}
166
167template <typename DataType>
169 const const_iterator &rhs)
170{
171 return m_iter.nnzindex == rhs.m_iter.nnzindex;
172}
173
174template <typename DataType>
176 const const_iterator &rhs)
177{
178 return !(m_iter.nnzindex == rhs.m_iter.nnzindex);
179}
180
181template <typename DataType>
183{
184 while ((m_iter.storageindex + 1 < m_val.size()) &&
185 (m_val[++m_iter.storageindex] <= NekConstants::kNekSparseNonZeroTol))
186 ;
187
188 m_iter.nnzindex++;
189
190 if (m_iter.storageindex >= m_val.size())
191 {
192 m_iter.nnzindex = m_end;
193 return;
194 }
195
196 m_iter.second = m_val[m_iter.storageindex];
197
198 CoordType c = storageIndexToFullCoord(m_iter.storageindex);
199 m_iter.first.first = c.first;
200 m_iter.first.second = c.second;
201}
202
203template <typename DataType>
205 const IndexType blkCols,
206 const IndexType blkDim,
207 const BCOMatType &bcoMat,
208 const MatrixStorage matType)
209 : m_matType(matType), m_blkRows(blkRows), m_blkCols(blkCols),
210 m_blkDim(blkDim), m_bnnz(bcoMat.size()), m_nnz(0),
211 m_val(m_bnnz * blkDim * blkDim), m_indx(m_bnnz + 1), m_pntr(blkRows + 1)
212{
213 if (matType != Nektar::eFULL)
214 {
215 /// \todo: - iterators over strictly lower-triangular part
216 /// - number off-diagonal elements calculation
217 /// - clear distinction between stored and factual nonzeros
218 /// (row density)
219 std::cout << "matrix type not implemented" << std::endl;
220 throw 1;
221 }
222
223 processBcoInput(blkRows, blkDim, bcoMat);
224}
225
226template <typename DataType>
228 : m_matType(src.m_matType), m_blkRows(src.m_blkRows),
229 m_blkCols(src.m_blkCols), m_blkDim(src.m_blkDim), m_bnnz(src.m_bnnz),
230 m_nnz(src.m_nnz), m_val(src.m_val), m_indx(src.m_indx), m_pntr(src.m_pntr)
231{
232}
233
234template <typename DataType> StorageSmvBsr<DataType>::~StorageSmvBsr()
235{
236}
237
238template <typename DataType> IndexType StorageSmvBsr<DataType>::GetRows() const
239{
240 return m_blkRows * m_blkDim;
241}
242
243template <typename DataType>
245{
246 return m_blkCols * m_blkDim;
247}
248
249template <typename DataType>
251{
252 return m_nnz;
253}
254
255template <typename DataType>
257{
258 return m_blkDim;
259}
260
261template <typename DataType>
263{
264 return m_bnnz * m_blkDim * m_blkDim;
265}
266
267template <typename DataType>
269{
270 return (DataType)(m_bnnz * m_blkDim * m_blkDim) / (DataType)m_nnz;
271}
272
273template <typename DataType>
275{
276 return sizeof(DataType) * m_val.capacity() +
277 sizeof(IndexType) * m_indx.capacity() +
278 sizeof(IndexType) * m_pntr.capacity() +
279 sizeof(IndexType) * 5 + //< blkRows + blkCols + blkDim + nnz + bnnz
280 sizeof(MatrixStorage);
281}
282
283template <typename DataType>
285 IndexType gcolumn) const
286{
287 IndexType brow = grow / m_blkDim;
288 IndexType bcol = gcolumn / m_blkDim;
289 IndexType lrow = grow % m_blkDim;
290 IndexType lcol = gcolumn % m_blkDim;
291
292 // rewind to the first entry of the first
293 // block in the current block row
294 IndexType offset = m_pntr[brow] * m_blkDim * m_blkDim;
295
296 IndexType i;
297 static DataType defaultReturnValue;
298 for (i = m_pntr[brow]; i < m_pntr[brow + 1]; i++)
299 {
300 if (bcol == m_indx[i])
301 {
302 return m_val[offset + lrow + lcol * m_blkDim];
303 }
304 offset += m_blkDim * m_blkDim;
305 }
306
307 return defaultReturnValue;
308}
309
310template <typename DataType>
312 DataType>::begin() const
313{
314 return const_iterator(m_matType, 0, m_nnz, m_blkDim, m_val, m_indx, m_pntr);
315}
316
317template <typename DataType>
319 const
320{
321 return const_iterator(m_matType, m_nnz, m_nnz, m_blkDim, m_val, m_indx,
322 m_pntr);
323}
324
325// General non-symmetric zero-based BSR multiply
326// C = A*B where A is matrix and B is vector.
327// No scaling. Previous content of C is discarded.
328template <typename DataType>
330 DataVectorType &out)
331{
332 const double *b = &in[0];
333 double *c = &out[0];
334 const double *val = &m_val[0];
335 const int *bindx = (int *)&m_indx[0];
336 const int *bpntrb = (int *)&m_pntr[0];
337 const int *bpntre = (int *)&m_pntr[0] + 1;
338 const int mb = m_blkRows;
339
340 switch (m_blkDim)
341 {
342 case 1:
343 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
344 return;
345 case 2:
346 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
347 return;
348 default:
349 {
350 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
351 return;
352 }
353 }
354}
355
356template <typename DataType>
358{
359 const double *b = &in[0];
360 double *c = &out[0];
361 const double *val = &m_val[0];
362 const int *bindx = (int *)&m_indx[0];
363 const int *bpntrb = (int *)&m_pntr[0];
364 const int *bpntre = (int *)&m_pntr[0] + 1;
365 const int mb = m_blkRows;
366
367 switch (m_blkDim)
368 {
369 case 1:
370 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
371 return;
372 case 2:
373 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
374 return;
375 default:
376 {
377 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
378 return;
379 }
380 }
381}
382
383// This function is defined for backward compatibility with
384// NIST storage classes
385template <typename DataType>
387 DataVectorType &out)
388{
389 const double *b = &in[0];
390 double *c = &out[0];
391 const double *val = &m_val[0];
392 const int *bindx = (int *)&m_indx[0];
393 const int *bpntrb = (int *)&m_pntr[0];
394 const int *bpntre = (int *)&m_pntr[0] + 1;
395 const int mb = m_blkRows;
396
397 switch (m_blkDim)
398 {
399 case 1:
400 Multiply_1x1(mb, val, bindx, bpntrb, bpntre, b, c);
401 return;
402 case 2:
403 Multiply_2x2(mb, val, bindx, bpntrb, bpntre, b, c);
404 return;
405 default:
406 {
407 Multiply_generic(mb, val, bindx, bpntrb, bpntre, b, c);
408 return;
409 }
410 }
411}
412
413/// Zero-based CSR multiply.
414/// Essentially this is slightly modified copy-paste from
415/// NIST Sparse Blas 0.9b routine CSR_VecMult_CAB_double()
416template <typename DataType>
417void StorageSmvBsr<DataType>::Multiply_1x1(const int mb, const double *val,
418 const int *bindx, const int *bpntrb,
419 const int *bpntre, const double *b,
420 double *c)
421{
422 for (int i = 0; i != mb; i++)
423 {
424 double t = 0;
425 int jb = bpntrb[i];
426 int je = bpntre[i];
427 for (int j = jb; j != je; j++)
428 {
429 t += b[bindx[j]] * (*val++);
430 }
431 c[i] = t;
432 }
433}
434
435/// Zero-based BSR multiply unrolled for 2x2 blocks.
436/// Essentially this is slightly optimised copy-paste from
437/// NIST Sparse Blas 0.9b routine BSR_VecMult_BAB_double()
438template <typename DataType>
439void StorageSmvBsr<DataType>::Multiply_2x2(const int mb, const double *val,
440 const int *bindx, const int *bpntrb,
441 const int *bpntre, const double *b,
442 double *c)
443{
444 const int lb = 2;
445
446 const double *pval = val;
447 double *pc = c;
448
449 for (int i = 0; i != mb; i++)
450 {
451 int jb = bpntrb[i];
452 int je = bpntre[i];
453 pc[0] = 0;
454 pc[1] = 0;
455 for (int j = jb; j != je; j++)
456 {
457 int bs = bindx[j] * lb;
458 const double *pb = &b[bs];
459
460 pc[0] += pb[0] * pval[0] + pb[1] * pval[2];
461 pc[1] += pb[0] * pval[1] + pb[1] * pval[3];
462 pval += 4;
463 }
464 pc += 2;
465 }
466}
467
468/// Zero-based BSR multiply unrolled for 3x3 blocks
469template <typename DataType>
470void StorageSmvBsr<DataType>::Multiply_3x3(const int mb, const double *val,
471 const int *bindx, const int *bpntrb,
472 const int *bpntre, const double *b,
473 double *c)
474{
475 const int lb = 3;
476
477 const double *pval = val;
478 double *pc = c;
479
480 for (int i = 0; i != mb; i++)
481 {
482 int jb = bpntrb[i];
483 int je = bpntre[i];
484 pc[0] = 0;
485 pc[1] = 0;
486 pc[2] = 0;
487 for (int j = jb; j != je; j++)
488 {
489 int bs = bindx[j] * lb;
490 const double *pb = &b[bs];
491
492 pc[0] += pb[0] * pval[0] + pb[1] * pval[3] + pb[2] * pval[6];
493 pc[1] += pb[0] * pval[1] + pb[1] * pval[4] + pb[2] * pval[7];
494 pc[2] += pb[0] * pval[2] + pb[1] * pval[5] + pb[2] * pval[8];
495 pval += 9;
496 }
497 pc += 3;
498 }
499}
500
501/// Zero-based BSR multiply unrolled for 4x4 blocks
502template <typename DataType>
503void StorageSmvBsr<DataType>::Multiply_4x4(const int mb, const double *val,
504 const int *bindx, const int *bpntrb,
505 const int *bpntre, const double *b,
506 double *c)
507{
508 const int lb = 4;
509
510 const double *pval = val;
511 double *pc = c;
512
513 for (int i = 0; i != mb; i++)
514 {
515 int jb = bpntrb[i];
516 int je = bpntre[i];
517 pc[0] = 0;
518 pc[1] = 0;
519 pc[2] = 0;
520 pc[3] = 0;
521 for (int j = jb; j != je; j++)
522 {
523 int bs = bindx[j] * lb;
524 const double *pb = &b[bs];
525
526 pc[0] += pb[0] * pval[0] + pb[1] * pval[4] + pb[2] * pval[8] +
527 pb[3] * pval[12];
528 pc[1] += pb[0] * pval[1] + pb[1] * pval[5] + pb[2] * pval[9] +
529 pb[3] * pval[13];
530 pc[2] += pb[0] * pval[2] + pb[1] * pval[6] + pb[2] * pval[10] +
531 pb[3] * pval[14];
532 pc[3] += pb[0] * pval[3] + pb[1] * pval[7] + pb[2] * pval[11] +
533 pb[3] * pval[15];
534 pval += 16;
535 }
536 pc += 4;
537 }
538}
539
540/// Generic zero-based BSR multiply for higher matrix ranks
541template <typename DataType>
542void StorageSmvBsr<DataType>::Multiply_generic(const int mb, const double *val,
543 const int *bindx,
544 const int *bpntrb,
545 const int *bpntre,
546 const double *b, double *c)
547{
548 const int lb = m_blkDim;
549 const double *pval = val;
550 const int mm = lb * lb;
551 double *pc = c;
552 for (int i = 0; i != mb * lb; i++)
553 *pc++ = 0;
554
555 pc = c;
556 for (int i = 0; i != mb; i++)
557 {
558 int jb = bpntrb[i];
559 int je = bpntre[i];
560 for (int j = jb; j != je; j++)
561 {
562 Blas::Dgemv('N', lb, lb, 1.0, pval, lb, &b[bindx[j] * lb], 1, 1.0,
563 pc, 1);
564 pval += mm;
565 }
566 pc += lb;
567 }
568}
569
570// converts input vector of BCO blocks
571// to the internal BSR representation
572template <typename DataType>
574 const IndexType blkDim,
575 const BCOMatType &bcoMat)
576{
577 IndexType i;
578 BCOMatTypeConstIt entry;
579 IndexType rowcoord;
580 IndexType colcoord;
581 BCOEntryType value;
582
583 std::vector<IndexType> tmp(blkRows + 1, 0);
584
585 // calculate the number of entries on each row
586 // and store the result in tmp
587 for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
588 {
589 rowcoord = (entry->first).first;
590 tmp[rowcoord]++;
591 }
592 // Based upon this information, fill the array m_pntr
593 // which basically contains the offset of each row's
594 // first entry in the other arrays m_val and m_indx
595 m_pntr[0] = 0;
596 for (i = 0; i < blkRows; i++)
597 {
598 m_pntr[i + 1] = m_pntr[i] + tmp[i];
599 }
600
601 // Copy the values of m_pntr into tmp as this will be needed
602 // in the following step
603 std::copy(&m_pntr[0], &m_pntr[0] + blkRows + 1, &tmp[0]);
604
605 // Now, fill in index and value entries.
606 for (entry = bcoMat.begin(); entry != bcoMat.end(); entry++)
607 {
608 rowcoord = (entry->first).first;
609 colcoord = (entry->first).second;
610 value = entry->second;
611 int blkSize = blkDim * blkDim;
612
613 for (i = 0; i < blkSize; i++)
614 {
615 m_val[blkSize * (tmp[rowcoord]) + i] = value[i];
617 m_nnz++;
618 }
619
620 m_indx[tmp[rowcoord]] = colcoord;
621 tmp[rowcoord]++;
622 }
623}
624
625// explicit instantiation
626template class StorageSmvBsr<NekDouble>;
627
628} // namespace Nektar
size_type size() const
Returns the array's size.
bool operator!=(const const_iterator &rhs)
CoordType storageIndexToFullCoord(IndexType storageIndex)
bool operator==(const const_iterator &rhs)
const_iterator(MatrixStorage matType, IndexType begin, IndexType end, IndexType blkDim, const DataVectorType &val, const IndexVectorType &indx, const IndexVectorType &pntr)
StorageSmvBsr(const IndexType blkRows, const IndexType blkCols, const IndexType blkDim, const BCOMatType &bcoMat, const MatrixStorage matType=eFULL)
const_iterator end() const
IndexType GetNumNonZeroEntries() const
void MultiplyLight(const DataVectorType &in, DataVectorType &out)
IndexVectorType m_indx
void processBcoInput(const IndexType blkRows, const IndexType blkDim, const BCOMatType &bcoMat)
DataType GetFillInRatio() const
size_t GetMemoryUsage() const
void Multiply(const DataType *in, DataType *out)
IndexType GetColumns() const
void Multiply_generic(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Generic zero-based BSR multiply for higher matrix ranks.
void Multiply_1x1(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based CSR multiply. Essentially this is slightly modified copy-paste from NIST Sparse Blas 0....
IndexType GetBlkSize() const
void Multiply_2x2(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 2x2 blocks. Essentially this is slightly optimised copy-paste fr...
void Multiply_4x4(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 4x4 blocks.
void Multiply_3x3(const int mb, const double *val, const int *bindx, const int *bpntrb, const int *bpntre, const double *b, double *c)
Zero-based BSR multiply unrolled for 3x3 blocks.
IndexType GetRows() const
const DataType & GetValue(IndexType row, IndexType column) const
const_iterator begin() const
IndexVectorType m_pntr
IndexType GetNumStoredDoubles() const
static void Dgemv(const char &trans, const int &m, const int &n, const double &alpha, const double *a, const int &lda, const double *x, const int &incx, const double &beta, double *y, const int &incy)
BLAS level 2: Matrix vector multiply y = alpha A x plus beta y where A[m x n].
Definition: Blas.hpp:213
def copy(self)
Definition: pycml.py:2663
static const NekDouble kNekSparseNonZeroTol
The above copyright notice and this permission notice shall be included.
Definition: CoupledSolver.h:2
unsigned int IndexType
std::map< CoordType, BCOEntryType > BCOMatType
std::pair< IndexType, IndexType > CoordType
BCOMatType::const_iterator BCOMatTypeConstIt
scalarT< T > abs(scalarT< T > in)
Definition: scalar.hpp:298