Nektar++
TestDgemmOptimizations.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: TestDgemmOptimizations.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description:
32//
33///////////////////////////////////////////////////////////////////////////////
34
35#include <boost/test/unit_test.hpp>
36
38
39#include <boost/test/tools/floating_point_comparison.hpp>
40#include <boost/test/unit_test.hpp>
41
42namespace Nektar
43{
44
45BOOST_AUTO_TEST_CASE(TestABPlusbCScaled)
46{
47 typedef NekMatrix<NekMatrix<double>, ScaledMatrixTag> ScaledMatrix;
48
49 double abuf[] = {1, 2, 3, 4};
50 double bbuf[] = {5, 6, 7, 8};
51 double cbuf[] = {9, 10, 11, 12};
52 double beta = 7.0;
53 double alpha = 2.0;
54
55 std::shared_ptr<NekMatrix<double>> innerA(
56 new NekMatrix<double>(2, 2, abuf));
57 std::shared_ptr<NekMatrix<double>> innerB(
58 new NekMatrix<double>(2, 2, bbuf));
59 std::shared_ptr<NekMatrix<double>> innerC(
60 new NekMatrix<double>(2, 2, cbuf));
61
62 ScaledMatrix A(2.0, innerA);
63 ScaledMatrix B(3.0, innerB);
64 ScaledMatrix C(4.0, innerC);
65 double epsilon = 1e-11;
66
67 NekMatrix<double> result1 = A * B + C;
68 BOOST_CHECK_CLOSE(174.0, *result1(0, 0), epsilon);
69 BOOST_CHECK_CLOSE(244.0, *result1(1, 0), epsilon);
70 BOOST_CHECK_CLOSE(230.0, *result1(0, 1), epsilon);
71 BOOST_CHECK_CLOSE(324.0, *result1(1, 1), epsilon);
72
73 NekMatrix<double> result2 = A * B + beta * C;
74 BOOST_CHECK_CLOSE(390.0, *result2(0, 0), epsilon);
75 BOOST_CHECK_CLOSE(484.0, *result2(1, 0), epsilon);
76 BOOST_CHECK_CLOSE(494.0, *result2(0, 1), epsilon);
77 BOOST_CHECK_CLOSE(612.0, *result2(1, 1), epsilon);
78
79 NekMatrix<double> result3 = alpha * A * B + C;
80 BOOST_CHECK_CLOSE(312.0, *result3(0, 0), epsilon);
81 BOOST_CHECK_CLOSE(448.0, *result3(1, 0), epsilon);
82 BOOST_CHECK_CLOSE(416.0, *result3(0, 1), epsilon);
83 BOOST_CHECK_CLOSE(600.0, *result3(1, 1), epsilon);
84
85 NekMatrix<double> result4 = alpha * A * B + beta * C;
86 BOOST_CHECK_CLOSE(528.0, *result4(0, 0), epsilon);
87 BOOST_CHECK_CLOSE(688.0, *result4(1, 0), epsilon);
88 BOOST_CHECK_CLOSE(680.0, *result4(0, 1), epsilon);
89 BOOST_CHECK_CLOSE(888.0, *result4(1, 1), epsilon);
90
91 NekMatrix<double> result5 = alpha * (A * B) + C;
92 BOOST_CHECK_CLOSE(312.0, *result5(0, 0), epsilon);
93 BOOST_CHECK_CLOSE(448.0, *result5(1, 0), epsilon);
94 BOOST_CHECK_CLOSE(416.0, *result5(0, 1), epsilon);
95 BOOST_CHECK_CLOSE(600.0, *result5(1, 1), epsilon);
96
97 NekMatrix<double> result6 = alpha * (A * B) + beta * C;
98 BOOST_CHECK_CLOSE(528.0, *result6(0, 0), epsilon);
99 BOOST_CHECK_CLOSE(688.0, *result6(1, 0), epsilon);
100 BOOST_CHECK_CLOSE(680.0, *result6(0, 1), epsilon);
101 BOOST_CHECK_CLOSE(888.0, *result6(1, 1), epsilon);
102}
103
104BOOST_AUTO_TEST_CASE(TestABPlusbCBlock)
105{
106 typedef NekMatrix<NekMatrix<double>, BlockMatrixTag> BlockMatrix;
107
108 double abuf[] = {1, 2, 3, 4};
109 double bbuf[] = {5, 6, 7, 8};
110 double cbuf[] = {9, 10, 11, 12};
111 double dbuf[] = {13, 14, 15, 16};
112 double beta = 7.0;
113 double alpha = 2.0;
114
115 std::shared_ptr<NekMatrix<double>> innerA(
116 new NekMatrix<double>(2, 2, abuf));
117 std::shared_ptr<NekMatrix<double>> innerB(
118 new NekMatrix<double>(2, 2, bbuf));
119 std::shared_ptr<NekMatrix<double>> innerC(
120 new NekMatrix<double>(2, 2, cbuf));
121 std::shared_ptr<NekMatrix<double>> innerD(
122 new NekMatrix<double>(2, 2, dbuf));
123
124 BlockMatrix A(2, 2, 2, 2);
125 BlockMatrix B(2, 2, 2, 2);
126 BlockMatrix C(2, 2, 2, 2);
127
128 A.SetBlock(0, 0, innerA);
129 A.SetBlock(0, 1, innerB);
130 A.SetBlock(1, 0, innerC);
131 A.SetBlock(1, 1, innerD);
132
133 B.SetBlock(0, 0, innerA);
134 B.SetBlock(0, 1, innerB);
135 B.SetBlock(1, 0, innerC);
136 B.SetBlock(1, 1, innerD);
137
138 C.SetBlock(0, 0, innerA);
139 C.SetBlock(0, 1, innerB);
140 C.SetBlock(1, 0, innerC);
141 C.SetBlock(1, 1, innerD);
142 double epsilon = 1e-11;
143
144 NekMatrix<double> result1 = A * B + C;
145 double expected_result_buf1[] = {123, 146, 307, 330, 157, 188, 405, 436,
146 191, 230, 503, 542, 225, 272, 601, 648};
147 NekMatrix<double> expected_result1(4, 4, expected_result_buf1);
148 BOOST_CHECK_EQUAL(expected_result1, result1);
149
150 NekMatrix<double> result2 = A * B + beta * C;
151 BOOST_CHECK_CLOSE(129.0, *result2(0, 0), epsilon);
152 BOOST_CHECK_CLOSE(158.0, *result2(1, 0), epsilon);
153 BOOST_CHECK_CLOSE(361.0, *result2(2, 0), epsilon);
154 BOOST_CHECK_CLOSE(390.0, *result2(3, 0), epsilon);
155 BOOST_CHECK_CLOSE(175.0, *result2(0, 1), epsilon);
156 BOOST_CHECK_CLOSE(212.0, *result2(1, 1), epsilon);
157 BOOST_CHECK_CLOSE(471.0, *result2(2, 1), epsilon);
158 BOOST_CHECK_CLOSE(508.0, *result2(3, 1), epsilon);
159 BOOST_CHECK_CLOSE(221.0, *result2(0, 2), epsilon);
160 BOOST_CHECK_CLOSE(266.0, *result2(1, 2), epsilon);
161 BOOST_CHECK_CLOSE(581.0, *result2(2, 2), epsilon);
162 BOOST_CHECK_CLOSE(626.0, *result2(3, 2), epsilon);
163 BOOST_CHECK_CLOSE(267.0, *result2(0, 3), epsilon);
164 BOOST_CHECK_CLOSE(320.0, *result2(1, 3), epsilon);
165 BOOST_CHECK_CLOSE(691.0, *result2(2, 3), epsilon);
166 BOOST_CHECK_CLOSE(744.0, *result2(3, 3), epsilon);
167
168 NekMatrix<double> result3 = alpha * A * B + C;
169 BOOST_CHECK_CLOSE(245.0, *result3(0, 0), epsilon);
170 BOOST_CHECK_CLOSE(290.0, *result3(1, 0), epsilon);
171 BOOST_CHECK_CLOSE(605.0, *result3(2, 0), epsilon);
172 BOOST_CHECK_CLOSE(650.0, *result3(3, 0), epsilon);
173 BOOST_CHECK_CLOSE(311.0, *result3(0, 1), epsilon);
174 BOOST_CHECK_CLOSE(372.0, *result3(1, 1), epsilon);
175 BOOST_CHECK_CLOSE(799.0, *result3(2, 1), epsilon);
176 BOOST_CHECK_CLOSE(860.0, *result3(3, 1), epsilon);
177 BOOST_CHECK_CLOSE(377.0, *result3(0, 2), epsilon);
178 BOOST_CHECK_CLOSE(454.0, *result3(1, 2), epsilon);
179 BOOST_CHECK_CLOSE(993.0, *result3(2, 2), epsilon);
180 BOOST_CHECK_CLOSE(1070.0, *result3(3, 2), epsilon);
181 BOOST_CHECK_CLOSE(443.0, *result3(0, 3), epsilon);
182 BOOST_CHECK_CLOSE(536.0, *result3(1, 3), epsilon);
183 BOOST_CHECK_CLOSE(1187.0, *result3(2, 3), epsilon);
184 BOOST_CHECK_CLOSE(1280.0, *result3(3, 3), epsilon);
185
186 NekMatrix<double> result4 = alpha * A * B + beta * C;
187 BOOST_CHECK_CLOSE(251.0, *result4(0, 0), epsilon);
188 BOOST_CHECK_CLOSE(302.0, *result4(1, 0), epsilon);
189 BOOST_CHECK_CLOSE(659.0, *result4(2, 0), epsilon);
190 BOOST_CHECK_CLOSE(710.0, *result4(3, 0), epsilon);
191 BOOST_CHECK_CLOSE(329.0, *result4(0, 1), epsilon);
192 BOOST_CHECK_CLOSE(396.0, *result4(1, 1), epsilon);
193 BOOST_CHECK_CLOSE(865.0, *result4(2, 1), epsilon);
194 BOOST_CHECK_CLOSE(932.0, *result4(3, 1), epsilon);
195 BOOST_CHECK_CLOSE(407.0, *result4(0, 2), epsilon);
196 BOOST_CHECK_CLOSE(490.0, *result4(1, 2), epsilon);
197 BOOST_CHECK_CLOSE(1071.0, *result4(2, 2), epsilon);
198 BOOST_CHECK_CLOSE(1154.0, *result4(3, 2), epsilon);
199 BOOST_CHECK_CLOSE(485.0, *result4(0, 3), epsilon);
200 BOOST_CHECK_CLOSE(584.0, *result4(1, 3), epsilon);
201 BOOST_CHECK_CLOSE(1277.0, *result4(2, 3), epsilon);
202 BOOST_CHECK_CLOSE(1376.0, *result4(3, 3), epsilon);
203
204 NekMatrix<double> result5 = alpha * (A * B) + C;
205 BOOST_CHECK_CLOSE(245.0, *result5(0, 0), epsilon);
206 BOOST_CHECK_CLOSE(290.0, *result5(1, 0), epsilon);
207 BOOST_CHECK_CLOSE(605.0, *result5(2, 0), epsilon);
208 BOOST_CHECK_CLOSE(650.0, *result5(3, 0), epsilon);
209 BOOST_CHECK_CLOSE(311.0, *result5(0, 1), epsilon);
210 BOOST_CHECK_CLOSE(372.0, *result5(1, 1), epsilon);
211 BOOST_CHECK_CLOSE(799.0, *result5(2, 1), epsilon);
212 BOOST_CHECK_CLOSE(860.0, *result5(3, 1), epsilon);
213 BOOST_CHECK_CLOSE(377.0, *result5(0, 2), epsilon);
214 BOOST_CHECK_CLOSE(454.0, *result5(1, 2), epsilon);
215 BOOST_CHECK_CLOSE(993.0, *result5(2, 2), epsilon);
216 BOOST_CHECK_CLOSE(1070.0, *result5(3, 2), epsilon);
217 BOOST_CHECK_CLOSE(443.0, *result5(0, 3), epsilon);
218 BOOST_CHECK_CLOSE(536.0, *result5(1, 3), epsilon);
219 BOOST_CHECK_CLOSE(1187.0, *result5(2, 3), epsilon);
220 BOOST_CHECK_CLOSE(1280.0, *result5(3, 3), epsilon);
221
222 NekMatrix<double> result6 = alpha * (A * B) + beta * C;
223 BOOST_CHECK_CLOSE(251.0, *result6(0, 0), epsilon);
224 BOOST_CHECK_CLOSE(302.0, *result6(1, 0), epsilon);
225 BOOST_CHECK_CLOSE(659.0, *result6(2, 0), epsilon);
226 BOOST_CHECK_CLOSE(710.0, *result6(3, 0), epsilon);
227 BOOST_CHECK_CLOSE(329.0, *result6(0, 1), epsilon);
228 BOOST_CHECK_CLOSE(396.0, *result6(1, 1), epsilon);
229 BOOST_CHECK_CLOSE(865.0, *result6(2, 1), epsilon);
230 BOOST_CHECK_CLOSE(932.0, *result6(3, 1), epsilon);
231 BOOST_CHECK_CLOSE(407.0, *result6(0, 2), epsilon);
232 BOOST_CHECK_CLOSE(490.0, *result6(1, 2), epsilon);
233 BOOST_CHECK_CLOSE(1071.0, *result6(2, 2), epsilon);
234 BOOST_CHECK_CLOSE(1154.0, *result6(3, 2), epsilon);
235 BOOST_CHECK_CLOSE(485.0, *result6(0, 3), epsilon);
236 BOOST_CHECK_CLOSE(584.0, *result6(1, 3), epsilon);
237 BOOST_CHECK_CLOSE(1277.0, *result6(2, 3), epsilon);
238 BOOST_CHECK_CLOSE(1376.0, *result6(3, 3), epsilon);
239}
240} // namespace Nektar
@ beta
Gauss Radau pinned at x=-1,.
Definition: PointsType.h:59
NekMatrix< DenseMatrix, BlockMatrixTag > BlockMatrix
Definition: NekTypeDefs.hpp:59
NekMatrix< DenseMatrix, ScaledMatrixTag > ScaledMatrix
Definition: NekTypeDefs.hpp:55
BOOST_AUTO_TEST_CASE(TestCanGetRawPtr)