Nektar++
Functions
TimeRoeKernel.cpp File Reference
#include "../RiemannSolvers/RoeSolver.h"
#include <LibUtilities/SimdLib/tinysimd.hpp>
#include <LibUtilities/BasicUtils/SharedArray.hpp>
#include <LibUtilities/BasicUtils/Likwid.hpp>
#include <LibUtilities/BasicUtils/Timer.h>

Go to the source code of this file.

Functions

int main (int argc, char const *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char const *  argv[] 
)

Definition at line 46 of file TimeRoeKernel.cpp.

47{
48
51 LIKWID_MARKER_REGISTER("scalar");
52 LIKWID_MARKER_REGISTER("vector");
53 LIKWID_MARKER_REGISTER("vectorOfVector");
54
55 using namespace tinysimd;
56 using vec_t = simd<NekDouble>;
57
58 size_t nEle;
59 if (argc < 2)
60 {
61 nEle = 10;
62 }
63 else
64 {
65 nEle = std::stoi(argv[1]);
66 }
67
68 std::cout << "number of faces\t" << nEle
69 << "\t(assuming 4*4 nodes per face)\n";
70
71 size_t sizeScalar = 4 * 4 * nEle;
72 size_t nVars = 5;
73 size_t spaceDim = nVars - 2;
74 size_t sizeVec = sizeScalar / vec_t::width;
75
76 double gamma = 1.4;
77
78 std::vector<std::vector<double>> Fwd(nVars), Bwd(nVars), Flux(nVars);
79
80 for (size_t i = 0; i < nVars; ++i)
81 {
82 Fwd[i] = std::vector<double>(sizeScalar);
83 Bwd[i] = std::vector<double>(sizeScalar);
84 Flux[i] = std::vector<double>(sizeScalar);
85
86 for (size_t j = 0; j < sizeScalar; ++j)
87 {
88 Fwd[i][j] = 1.;
89 Bwd[i][j] = 1.;
90 // fix energy to avoid negative pressure
91 if (i == nVars - 1)
92 {
93 Fwd[i][j] = 10.;
94 Bwd[i][j] = 10.;
95 }
96 Flux[i][j] = 0.;
97 }
98 }
99
100 std::vector<std::vector<vec_t, allocator<vec_t>>> alignedFwd(nVars),
101 alignedBwd(nVars), alignedFlux(nVars);
102
103 for (size_t i = 0; i < nVars; ++i)
104 {
105 alignedFwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
106 alignedBwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
107 alignedFlux[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
108
109 for (size_t j = 0; j < sizeVec; ++j)
110 {
111 alignedFwd[i][j] = 1.;
112 alignedBwd[i][j] = 1.;
113 // fix energy to avoid negative pressure
114 if (i == nVars - 1)
115 {
116 alignedFwd[i][j] = 10.;
117 alignedBwd[i][j] = 10.;
118 }
119 alignedFlux[i][j] = 0.;
120 }
121 }
122
123 // number of experiments
124 constexpr size_t experiments = 1 << 18;
125
126 LIKWID_MARKER_START("scalar");
127 for (size_t j = 0; j < experiments; ++j)
128 {
129 // time scalar
130 // loop
131 for (size_t i = 0; i < sizeScalar; ++i)
132 {
133 double rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
134 double rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
135
136 // load
137 rhoL = Fwd[0][i];
138 rhouL = Fwd[1][i];
139 EL = Fwd[spaceDim + 1][i];
140 rhoR = Bwd[0][i];
141 rhouR = Bwd[1][i];
142 ER = Bwd[spaceDim + 1][i];
143
144 if (spaceDim == 2)
145 {
146 rhovL = Fwd[2][i];
147 rhovR = Bwd[2][i];
148 }
149 else if (spaceDim == 3)
150 {
151 rhovL = Fwd[2][i];
152 rhowL = Fwd[3][i];
153 rhovR = Bwd[2][i];
154 rhowR = Bwd[3][i];
155 }
156
157 double rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
158
159 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
160 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
161
162 // store
163 Flux[0][i] = rhof;
164 Flux[1][i] = rhouf;
165 Flux[nVars - 1][i] = Ef;
166 if (spaceDim == 2)
167 {
168 Flux[2][i] = rhovf;
169 }
170 else if (spaceDim == 3)
171 {
172 Flux[2][i] = rhovf;
173 Flux[3][i] = rhowf;
174 }
175
176 } // loop
177 }
178 LIKWID_MARKER_STOP("scalar");
179 // get likwid events
180 constexpr short CPU_CLK_UNHALTED_REF_id = 2;
181 int nevents{20};
182 std::vector<double> events(nevents);
183 double time;
184 int count;
185 boost::ignore_unused(time, count);
186
187 LIKWID_MARKER_GET("scalar", &nevents, events.data(), &time, &count);
188 // print out CPE
189 double cpeScalar =
190 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
191 std::cout << "scalar likwid CPE\t" << cpeScalar << '\n';
192 // avoid opt out
193 std::cout << Flux[0][0] << std::endl;
194
195 LIKWID_MARKER_START("vector");
196 // time SIMD
197 for (size_t j = 0; j < experiments; ++j)
198 {
199 // loop
200 for (size_t i = 0; i < sizeScalar; i += vec_t::width)
201 {
202 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
203 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
204
205 // load
206 rhoL.load(&(Fwd[0][i]), is_not_aligned);
207 rhouL.load(&(Fwd[1][i]), is_not_aligned);
208 EL.load(&(Fwd[spaceDim + 1][i]), is_not_aligned);
209 rhoR.load(&(Bwd[0][i]), is_not_aligned);
210 rhouR.load(&(Bwd[1][i]), is_not_aligned);
211 ER.load(&(Bwd[spaceDim + 1][i]), is_not_aligned);
212
213 if (spaceDim == 2)
214 {
215 rhovL.load(&(Fwd[2][i]), is_not_aligned);
216 rhovR.load(&(Bwd[2][i]), is_not_aligned);
217 }
218 else if (spaceDim == 3)
219 {
220 rhovL.load(&(Fwd[2][i]), is_not_aligned);
221 rhowL.load(&(Fwd[3][i]), is_not_aligned);
222 rhovR.load(&(Bwd[2][i]), is_not_aligned);
223 rhowR.load(&(Bwd[3][i]), is_not_aligned);
224 }
225
226 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
227
228 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
229 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
230
231 // store
232 rhof.store(&(Flux[0][i]), is_not_aligned);
233 rhouf.store(&(Flux[1][i]), is_not_aligned);
234 Ef.store(&(Flux[nVars - 1][i]), is_not_aligned);
235 if (spaceDim == 2)
236 {
237 rhovf.store(&(Flux[2][i]), is_not_aligned);
238 }
239 else if (spaceDim == 3)
240 {
241 rhovf.store(&(Flux[2][i]), is_not_aligned);
242 rhowf.store(&(Flux[3][i]), is_not_aligned);
243 }
244
245 } // loop
246 }
247 LIKWID_MARKER_STOP("vector");
248 // get likwid events
249 LIKWID_MARKER_GET("vector", &nevents, events.data(), &time, &count);
250 // print out CPE
251 double cpeVector =
252 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
253 std::cout << "vector likwid CPE\t" << cpeVector << '\t'
254 << cpeScalar / cpeVector << " %\n";
255 // avoid opt out
256 std::cout << Flux[0][0] << std::endl;
257
258 LIKWID_MARKER_START("vectorOfVector");
259 // time SIMD
260 for (size_t j = 0; j < experiments; ++j)
261 {
262 // loop
263 for (size_t i = 0; i < sizeVec; ++i)
264 {
265 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
266 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
267
268 // load
269 rhoL = alignedFwd[0][i];
270 rhouL = alignedFwd[1][i];
271 EL = alignedFwd[spaceDim + 1][i];
272 rhoR = alignedBwd[0][i];
273 rhouR = alignedBwd[1][i];
274 ER = alignedBwd[spaceDim + 1][i];
275
276 if (spaceDim == 2)
277 {
278 rhovL = alignedFwd[2][i];
279 rhovR = alignedBwd[2][i];
280 }
281 else if (spaceDim == 3)
282 {
283 rhovL = alignedFwd[2][i];
284 rhowL = alignedFwd[3][i];
285 rhovR = alignedBwd[2][i];
286 rhowR = alignedBwd[3][i];
287 }
288
289 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
290
291 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
292 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
293
294 // store
295 alignedFlux[0][i] = rhof;
296 alignedFlux[1][i] = rhouf;
297 alignedFlux[nVars - 1][i] = Ef;
298 if (spaceDim == 2)
299 {
300 alignedFlux[2][i] = rhovf;
301 }
302 else if (spaceDim == 3)
303 {
304 alignedFlux[2][i] = rhovf;
305 alignedFlux[3][i] = rhowf;
306 }
307
308 } // loop
309 }
310 LIKWID_MARKER_STOP("vectorOfVector");
311 // get likwid events
312 LIKWID_MARKER_GET("vectorOfVector", &nevents, events.data(), &time, &count);
313 // print out CPE
314 double cpevectorOfVector =
315 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
316 std::cout << "vectorOfVector likwid CPE\t" << cpevectorOfVector << '\t'
317 << cpeScalar / cpevectorOfVector << " %\n";
318 // avoid opt out
319 std::cout << alignedFlux[0][0][0] << std::endl;
320
322}
#define LIKWID_MARKER_THREADINIT
Definition: Likwid.hpp:43
#define LIKWID_MARKER_START(regionTag)
Definition: Likwid.hpp:46
#define LIKWID_MARKER_CLOSE
Definition: Likwid.hpp:48
#define LIKWID_MARKER_INIT
Definition: Likwid.hpp:42
#define LIKWID_MARKER_REGISTER(regionTag)
Definition: Likwid.hpp:45
#define LIKWID_MARKER_STOP(regionTag)
Definition: Likwid.hpp:47
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
Definition: Likwid.hpp:49
void RoeKernel(T &rhoL, T &rhouL, T &rhovL, T &rhowL, T &EL, T &rhoR, T &rhouR, T &rhovR, T &rhowR, T &ER, T &rhof, T &rhouf, T &rhovf, T &rhowf, T &Ef, NekDouble gamma)
Definition: RoeSolver.h:75
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80

References tinysimd::is_not_aligned, LIKWID_MARKER_CLOSE, LIKWID_MARKER_GET, LIKWID_MARKER_INIT, LIKWID_MARKER_REGISTER, LIKWID_MARKER_START, LIKWID_MARKER_STOP, LIKWID_MARKER_THREADINIT, and Nektar::RoeKernel().