Nektar++
TimeRoeKernel.cpp
Go to the documentation of this file.
1///////////////////////////////////////////////////////////////////////////////
2//
3// File: TimeRoeKernel.cpp
4//
5// For more information, please see: http://www.nektar.info
6//
7// The MIT License
8//
9// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
10// Department of Aeronautics, Imperial College London (UK), and Scientific
11// Computing and Imaging Institute, University of Utah (USA).
12//
13// Permission is hereby granted, free of charge, to any person obtaining a
14// copy of this software and associated documentation files (the "Software"),
15// to deal in the Software without restriction, including without limitation
16// the rights to use, copy, modify, merge, publish, distribute, sublicense,
17// and/or sell copies of the Software, and to permit persons to whom the
18// Software is furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included
21// in all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29// DEALINGS IN THE SOFTWARE.
30//
31// Description:
32//
33///////////////////////////////////////////////////////////////////////////////
34
35#include "../RiemannSolvers/RoeSolver.h"
36
38
40
43
44using namespace Nektar;
45
46int main(int argc, char const *argv[])
47{
48
51 LIKWID_MARKER_REGISTER("scalar");
52 LIKWID_MARKER_REGISTER("vector");
53 LIKWID_MARKER_REGISTER("vectorOfVector");
54
55 using namespace tinysimd;
56 using vec_t = simd<NekDouble>;
57
58 size_t nEle;
59 if (argc < 2)
60 {
61 nEle = 10;
62 }
63 else
64 {
65 nEle = std::stoi(argv[1]);
66 }
67
68 std::cout << "number of faces\t" << nEle
69 << "\t(assuming 4*4 nodes per face)\n";
70
71 size_t sizeScalar = 4 * 4 * nEle;
72 size_t nVars = 5;
73 size_t spaceDim = nVars - 2;
74 size_t sizeVec = sizeScalar / vec_t::width;
75
76 double gamma = 1.4;
77
78 std::vector<std::vector<double>> Fwd(nVars), Bwd(nVars), Flux(nVars);
79
80 for (size_t i = 0; i < nVars; ++i)
81 {
82 Fwd[i] = std::vector<double>(sizeScalar);
83 Bwd[i] = std::vector<double>(sizeScalar);
84 Flux[i] = std::vector<double>(sizeScalar);
85
86 for (size_t j = 0; j < sizeScalar; ++j)
87 {
88 Fwd[i][j] = 1.;
89 Bwd[i][j] = 1.;
90 // fix energy to avoid negative pressure
91 if (i == nVars - 1)
92 {
93 Fwd[i][j] = 10.;
94 Bwd[i][j] = 10.;
95 }
96 Flux[i][j] = 0.;
97 }
98 }
99
100 std::vector<std::vector<vec_t, allocator<vec_t>>> alignedFwd(nVars),
101 alignedBwd(nVars), alignedFlux(nVars);
102
103 for (size_t i = 0; i < nVars; ++i)
104 {
105 alignedFwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
106 alignedBwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
107 alignedFlux[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
108
109 for (size_t j = 0; j < sizeVec; ++j)
110 {
111 alignedFwd[i][j] = 1.;
112 alignedBwd[i][j] = 1.;
113 // fix energy to avoid negative pressure
114 if (i == nVars - 1)
115 {
116 alignedFwd[i][j] = 10.;
117 alignedBwd[i][j] = 10.;
118 }
119 alignedFlux[i][j] = 0.;
120 }
121 }
122
123 // number of experiments
124 constexpr size_t experiments = 1 << 18;
125
126 LIKWID_MARKER_START("scalar");
127 for (size_t j = 0; j < experiments; ++j)
128 {
129 // time scalar
130 // loop
131 for (size_t i = 0; i < sizeScalar; ++i)
132 {
133 double rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
134 double rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
135
136 // load
137 rhoL = Fwd[0][i];
138 rhouL = Fwd[1][i];
139 EL = Fwd[spaceDim + 1][i];
140 rhoR = Bwd[0][i];
141 rhouR = Bwd[1][i];
142 ER = Bwd[spaceDim + 1][i];
143
144 if (spaceDim == 2)
145 {
146 rhovL = Fwd[2][i];
147 rhovR = Bwd[2][i];
148 }
149 else if (spaceDim == 3)
150 {
151 rhovL = Fwd[2][i];
152 rhowL = Fwd[3][i];
153 rhovR = Bwd[2][i];
154 rhowR = Bwd[3][i];
155 }
156
157 double rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
158
159 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
160 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
161
162 // store
163 Flux[0][i] = rhof;
164 Flux[1][i] = rhouf;
165 Flux[nVars - 1][i] = Ef;
166 if (spaceDim == 2)
167 {
168 Flux[2][i] = rhovf;
169 }
170 else if (spaceDim == 3)
171 {
172 Flux[2][i] = rhovf;
173 Flux[3][i] = rhowf;
174 }
175
176 } // loop
177 }
178 LIKWID_MARKER_STOP("scalar");
179 // get likwid events
180 constexpr short CPU_CLK_UNHALTED_REF_id = 2;
181 int nevents{20};
182 std::vector<double> events(nevents);
183 [[maybe_unused]] double time;
184 [[maybe_unused]] int count;
185
186 LIKWID_MARKER_GET("scalar", &nevents, events.data(), &time, &count);
187 // print out CPE
188 double cpeScalar =
189 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
190 std::cout << "scalar likwid CPE\t" << cpeScalar << '\n';
191 // avoid opt out
192 std::cout << Flux[0][0] << std::endl;
193
194 LIKWID_MARKER_START("vector");
195 // time SIMD
196 for (size_t j = 0; j < experiments; ++j)
197 {
198 // loop
199 for (size_t i = 0; i < sizeScalar; i += vec_t::width)
200 {
201 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
202 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
203
204 // load
205 rhoL.load(&(Fwd[0][i]), is_not_aligned);
206 rhouL.load(&(Fwd[1][i]), is_not_aligned);
207 EL.load(&(Fwd[spaceDim + 1][i]), is_not_aligned);
208 rhoR.load(&(Bwd[0][i]), is_not_aligned);
209 rhouR.load(&(Bwd[1][i]), is_not_aligned);
210 ER.load(&(Bwd[spaceDim + 1][i]), is_not_aligned);
211
212 if (spaceDim == 2)
213 {
214 rhovL.load(&(Fwd[2][i]), is_not_aligned);
215 rhovR.load(&(Bwd[2][i]), is_not_aligned);
216 }
217 else if (spaceDim == 3)
218 {
219 rhovL.load(&(Fwd[2][i]), is_not_aligned);
220 rhowL.load(&(Fwd[3][i]), is_not_aligned);
221 rhovR.load(&(Bwd[2][i]), is_not_aligned);
222 rhowR.load(&(Bwd[3][i]), is_not_aligned);
223 }
224
225 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
226
227 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
228 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
229
230 // store
231 rhof.store(&(Flux[0][i]), is_not_aligned);
232 rhouf.store(&(Flux[1][i]), is_not_aligned);
233 Ef.store(&(Flux[nVars - 1][i]), is_not_aligned);
234 if (spaceDim == 2)
235 {
236 rhovf.store(&(Flux[2][i]), is_not_aligned);
237 }
238 else if (spaceDim == 3)
239 {
240 rhovf.store(&(Flux[2][i]), is_not_aligned);
241 rhowf.store(&(Flux[3][i]), is_not_aligned);
242 }
243
244 } // loop
245 }
246 LIKWID_MARKER_STOP("vector");
247 // get likwid events
248 LIKWID_MARKER_GET("vector", &nevents, events.data(), &time, &count);
249 // print out CPE
250 double cpeVector =
251 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
252 std::cout << "vector likwid CPE\t" << cpeVector << '\t'
253 << cpeScalar / cpeVector << " %\n";
254 // avoid opt out
255 std::cout << Flux[0][0] << std::endl;
256
257 LIKWID_MARKER_START("vectorOfVector");
258 // time SIMD
259 for (size_t j = 0; j < experiments; ++j)
260 {
261 // loop
262 for (size_t i = 0; i < sizeVec; ++i)
263 {
264 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
265 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
266
267 // load
268 rhoL = alignedFwd[0][i];
269 rhouL = alignedFwd[1][i];
270 EL = alignedFwd[spaceDim + 1][i];
271 rhoR = alignedBwd[0][i];
272 rhouR = alignedBwd[1][i];
273 ER = alignedBwd[spaceDim + 1][i];
274
275 if (spaceDim == 2)
276 {
277 rhovL = alignedFwd[2][i];
278 rhovR = alignedBwd[2][i];
279 }
280 else if (spaceDim == 3)
281 {
282 rhovL = alignedFwd[2][i];
283 rhowL = alignedFwd[3][i];
284 rhovR = alignedBwd[2][i];
285 rhowR = alignedBwd[3][i];
286 }
287
288 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
289
290 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
291 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
292
293 // store
294 alignedFlux[0][i] = rhof;
295 alignedFlux[1][i] = rhouf;
296 alignedFlux[nVars - 1][i] = Ef;
297 if (spaceDim == 2)
298 {
299 alignedFlux[2][i] = rhovf;
300 }
301 else if (spaceDim == 3)
302 {
303 alignedFlux[2][i] = rhovf;
304 alignedFlux[3][i] = rhowf;
305 }
306
307 } // loop
308 }
309 LIKWID_MARKER_STOP("vectorOfVector");
310 // get likwid events
311 LIKWID_MARKER_GET("vectorOfVector", &nevents, events.data(), &time, &count);
312 // print out CPE
313 double cpevectorOfVector =
314 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
315 std::cout << "vectorOfVector likwid CPE\t" << cpevectorOfVector << '\t'
316 << cpeScalar / cpevectorOfVector << " %\n";
317 // avoid opt out
318 std::cout << alignedFlux[0][0][0] << std::endl;
319
321}
#define LIKWID_MARKER_THREADINIT
Definition: Likwid.hpp:43
#define LIKWID_MARKER_START(regionTag)
Definition: Likwid.hpp:46
#define LIKWID_MARKER_CLOSE
Definition: Likwid.hpp:48
#define LIKWID_MARKER_INIT
Definition: Likwid.hpp:42
#define LIKWID_MARKER_REGISTER(regionTag)
Definition: Likwid.hpp:45
#define LIKWID_MARKER_STOP(regionTag)
Definition: Likwid.hpp:47
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
Definition: Likwid.hpp:49
int main(int argc, char const *argv[])
void RoeKernel(T &rhoL, T &rhouL, T &rhovL, T &rhowL, T &EL, T &rhoR, T &rhouR, T &rhovR, T &rhowR, T &ER, T &rhof, T &rhouf, T &rhovf, T &rhowf, T &Ef, NekDouble gamma)
Definition: RoeSolver.h:73
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80