Nektar++
Functions
TimeRoeKernel.cpp File Reference
#include "../RiemannSolvers/RoeSolver.h"
#include <LibUtilities/SimdLib/tinysimd.hpp>
#include <LibUtilities/BasicUtils/SharedArray.hpp>
#include <LibUtilities/BasicUtils/Likwid.hpp>
#include <LibUtilities/BasicUtils/Timer.h>

Go to the source code of this file.

Functions

int main (int argc, char const *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char const *  argv[] 
)

Definition at line 12 of file TimeRoeKernel.cpp.

13 {
14 
17  LIKWID_MARKER_REGISTER("scalar");
18  LIKWID_MARKER_REGISTER("vector");
19  LIKWID_MARKER_REGISTER("vectorOfVector");
20 
21  using namespace tinysimd;
22  using vec_t = simd<NekDouble>;
23 
24  size_t nEle;
25  if (argc < 2)
26  {
27  nEle = 10;
28  }
29  else
30  {
31  nEle = std::stoi(argv[1]);
32  }
33 
34  std::cout << "number of faces\t" << nEle
35  << "\t(assuming 4*4 nodes per face)\n";
36 
37  size_t sizeScalar = 4 * 4 * nEle;
38  size_t nVars = 5;
39  size_t spaceDim = nVars - 2;
40  size_t sizeVec = sizeScalar / vec_t::width;
41 
42  double gamma = 1.4;
43 
44  std::vector<std::vector<double>> Fwd(nVars), Bwd(nVars), Flux(nVars);
45 
46  for (size_t i = 0; i < nVars; ++i)
47  {
48  Fwd[i] = std::vector<double>(sizeScalar);
49  Bwd[i] = std::vector<double>(sizeScalar);
50  Flux[i] = std::vector<double>(sizeScalar);
51 
52  for (size_t j = 0; j < sizeScalar; ++j)
53  {
54  Fwd[i][j] = 1.;
55  Bwd[i][j] = 1.;
56  // fix energy to avoid negative pressure
57  if (i == nVars - 1)
58  {
59  Fwd[i][j] = 10.;
60  Bwd[i][j] = 10.;
61  }
62  Flux[i][j] = 0.;
63  }
64  }
65 
66  std::vector<std::vector<vec_t, allocator<vec_t>>> alignedFwd(nVars),
67  alignedBwd(nVars), alignedFlux(nVars);
68 
69  for (size_t i = 0; i < nVars; ++i)
70  {
71  alignedFwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
72  alignedBwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
73  alignedFlux[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
74 
75  for (size_t j = 0; j < sizeVec; ++j)
76  {
77  alignedFwd[i][j] = 1.;
78  alignedBwd[i][j] = 1.;
79  // fix energy to avoid negative pressure
80  if (i == nVars - 1)
81  {
82  alignedFwd[i][j] = 10.;
83  alignedBwd[i][j] = 10.;
84  }
85  alignedFlux[i][j] = 0.;
86  }
87  }
88 
89  // number of experiments
90  constexpr size_t experiments = 1 << 18;
91 
92  LIKWID_MARKER_START("scalar");
93  for (size_t j = 0; j < experiments; ++j)
94  {
95  // time scalar
96  // loop
97  for (size_t i = 0; i < sizeScalar; ++i)
98  {
99  double rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
100  double rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
101 
102  // load
103  rhoL = Fwd[0][i];
104  rhouL = Fwd[1][i];
105  EL = Fwd[spaceDim + 1][i];
106  rhoR = Bwd[0][i];
107  rhouR = Bwd[1][i];
108  ER = Bwd[spaceDim + 1][i];
109 
110  if (spaceDim == 2)
111  {
112  rhovL = Fwd[2][i];
113  rhovR = Bwd[2][i];
114  }
115  else if (spaceDim == 3)
116  {
117  rhovL = Fwd[2][i];
118  rhowL = Fwd[3][i];
119  rhovR = Bwd[2][i];
120  rhowR = Bwd[3][i];
121  }
122 
123  double rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
124 
125  RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
126  ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
127 
128  // store
129  Flux[0][i] = rhof;
130  Flux[1][i] = rhouf;
131  Flux[nVars - 1][i] = Ef;
132  if (spaceDim == 2)
133  {
134  Flux[2][i] = rhovf;
135  }
136  else if (spaceDim == 3)
137  {
138  Flux[2][i] = rhovf;
139  Flux[3][i] = rhowf;
140  }
141 
142  } // loop
143  }
144  LIKWID_MARKER_STOP("scalar");
145  // get likwid events
146  constexpr short CPU_CLK_UNHALTED_REF_id = 2;
147  int nevents{20};
148  std::vector<double> events(nevents);
149  double time;
150  int count;
151  boost::ignore_unused(time, count);
152 
153  LIKWID_MARKER_GET("scalar", &nevents, events.data(), &time, &count);
154  // print out CPE
155  double cpeScalar =
156  events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
157  std::cout << "scalar likwid CPE\t" << cpeScalar << '\n';
158  // avoid opt out
159  std::cout << Flux[0][0] << std::endl;
160 
161  LIKWID_MARKER_START("vector");
162  // time SIMD
163  for (size_t j = 0; j < experiments; ++j)
164  {
165  // loop
166  for (size_t i = 0; i < sizeScalar; i += vec_t::width)
167  {
168  vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
169  vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
170 
171  // load
172  rhoL.load(&(Fwd[0][i]), is_not_aligned);
173  rhouL.load(&(Fwd[1][i]), is_not_aligned);
174  EL.load(&(Fwd[spaceDim + 1][i]), is_not_aligned);
175  rhoR.load(&(Bwd[0][i]), is_not_aligned);
176  rhouR.load(&(Bwd[1][i]), is_not_aligned);
177  ER.load(&(Bwd[spaceDim + 1][i]), is_not_aligned);
178 
179  if (spaceDim == 2)
180  {
181  rhovL.load(&(Fwd[2][i]), is_not_aligned);
182  rhovR.load(&(Bwd[2][i]), is_not_aligned);
183  }
184  else if (spaceDim == 3)
185  {
186  rhovL.load(&(Fwd[2][i]), is_not_aligned);
187  rhowL.load(&(Fwd[3][i]), is_not_aligned);
188  rhovR.load(&(Bwd[2][i]), is_not_aligned);
189  rhowR.load(&(Bwd[3][i]), is_not_aligned);
190  }
191 
192  vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
193 
194  RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
195  ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
196 
197  // store
198  rhof.store(&(Flux[0][i]), is_not_aligned);
199  rhouf.store(&(Flux[1][i]), is_not_aligned);
200  Ef.store(&(Flux[nVars - 1][i]), is_not_aligned);
201  if (spaceDim == 2)
202  {
203  rhovf.store(&(Flux[2][i]), is_not_aligned);
204  }
205  else if (spaceDim == 3)
206  {
207  rhovf.store(&(Flux[2][i]), is_not_aligned);
208  rhowf.store(&(Flux[3][i]), is_not_aligned);
209  }
210 
211  } // loop
212  }
213  LIKWID_MARKER_STOP("vector");
214  // get likwid events
215  LIKWID_MARKER_GET("vector", &nevents, events.data(), &time, &count);
216  // print out CPE
217  double cpeVector =
218  events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
219  std::cout << "vector likwid CPE\t" << cpeVector << '\t'
220  << cpeScalar / cpeVector << " %\n";
221  // avoid opt out
222  std::cout << Flux[0][0] << std::endl;
223 
224  LIKWID_MARKER_START("vectorOfVector");
225  // time SIMD
226  for (size_t j = 0; j < experiments; ++j)
227  {
228  // loop
229  for (size_t i = 0; i < sizeVec; ++i)
230  {
231  vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
232  vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
233 
234  // load
235  rhoL = alignedFwd[0][i];
236  rhouL = alignedFwd[1][i];
237  EL = alignedFwd[spaceDim + 1][i];
238  rhoR = alignedBwd[0][i];
239  rhouR = alignedBwd[1][i];
240  ER = alignedBwd[spaceDim + 1][i];
241 
242  if (spaceDim == 2)
243  {
244  rhovL = alignedFwd[2][i];
245  rhovR = alignedBwd[2][i];
246  }
247  else if (spaceDim == 3)
248  {
249  rhovL = alignedFwd[2][i];
250  rhowL = alignedFwd[3][i];
251  rhovR = alignedBwd[2][i];
252  rhowR = alignedBwd[3][i];
253  }
254 
255  vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
256 
257  RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
258  ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
259 
260  // store
261  alignedFlux[0][i] = rhof;
262  alignedFlux[1][i] = rhouf;
263  alignedFlux[nVars - 1][i] = Ef;
264  if (spaceDim == 2)
265  {
266  alignedFlux[2][i] = rhovf;
267  }
268  else if (spaceDim == 3)
269  {
270  alignedFlux[2][i] = rhovf;
271  alignedFlux[3][i] = rhowf;
272  }
273 
274  } // loop
275  }
276  LIKWID_MARKER_STOP("vectorOfVector");
277  // get likwid events
278  LIKWID_MARKER_GET("vectorOfVector", &nevents, events.data(), &time, &count);
279  // print out CPE
280  double cpevectorOfVector =
281  events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
282  std::cout << "vectorOfVector likwid CPE\t" << cpevectorOfVector << '\t'
283  << cpeScalar / cpevectorOfVector << " %\n";
284  // avoid opt out
285  std::cout << alignedFlux[0][0][0] << std::endl;
286 
288 }
#define LIKWID_MARKER_THREADINIT
Definition: Likwid.hpp:43
#define LIKWID_MARKER_START(regionTag)
Definition: Likwid.hpp:46
#define LIKWID_MARKER_CLOSE
Definition: Likwid.hpp:48
#define LIKWID_MARKER_INIT
Definition: Likwid.hpp:42
#define LIKWID_MARKER_REGISTER(regionTag)
Definition: Likwid.hpp:45
#define LIKWID_MARKER_STOP(regionTag)
Definition: Likwid.hpp:47
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
Definition: Likwid.hpp:49
void RoeKernel(T &rhoL, T &rhouL, T &rhovL, T &rhowL, T &EL, T &rhoR, T &rhouR, T &rhovR, T &rhowR, T &ER, T &rhof, T &rhouf, T &rhovf, T &rhowf, T &Ef, NekDouble gamma)
Definition: RoeSolver.h:73
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80

References tinysimd::is_not_aligned, LIKWID_MARKER_CLOSE, LIKWID_MARKER_GET, LIKWID_MARKER_INIT, LIKWID_MARKER_REGISTER, LIKWID_MARKER_START, LIKWID_MARKER_STOP, LIKWID_MARKER_THREADINIT, and Nektar::RoeKernel().