Nektar++
Functions
TimeRoeKernel.cpp File Reference
#include "../RiemannSolvers/RoeSolver.h"
#include <LibUtilities/SimdLib/tinysimd.hpp>
#include <LibUtilities/BasicUtils/SharedArray.hpp>
#include <LibUtilities/BasicUtils/Likwid.hpp>
#include <LibUtilities/BasicUtils/Timer.h>

Go to the source code of this file.

Functions

int main (int argc, char const *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char const *  argv[] 
)

Definition at line 46 of file TimeRoeKernel.cpp.

47 {
48 
51  LIKWID_MARKER_REGISTER("scalar");
52  LIKWID_MARKER_REGISTER("vector");
53  LIKWID_MARKER_REGISTER("vectorOfVector");
54 
55  using namespace tinysimd;
56  using vec_t = simd<NekDouble>;
57 
58  size_t nEle;
59  if (argc < 2)
60  {
61  nEle = 10;
62  }
63  else
64  {
65  nEle = std::stoi(argv[1]);
66  }
67 
68  std::cout << "number of faces\t" << nEle
69  << "\t(assuming 4*4 nodes per face)\n";
70 
71  size_t sizeScalar = 4 * 4 * nEle;
72  size_t nVars = 5;
73  size_t spaceDim = nVars - 2;
74  size_t sizeVec = sizeScalar / vec_t::width;
75 
76  double gamma = 1.4;
77 
78  std::vector<std::vector<double>> Fwd(nVars), Bwd(nVars), Flux(nVars);
79 
80  for (size_t i = 0; i < nVars; ++i)
81  {
82  Fwd[i] = std::vector<double>(sizeScalar);
83  Bwd[i] = std::vector<double>(sizeScalar);
84  Flux[i] = std::vector<double>(sizeScalar);
85 
86  for (size_t j = 0; j < sizeScalar; ++j)
87  {
88  Fwd[i][j] = 1.;
89  Bwd[i][j] = 1.;
90  // fix energy to avoid negative pressure
91  if (i == nVars - 1)
92  {
93  Fwd[i][j] = 10.;
94  Bwd[i][j] = 10.;
95  }
96  Flux[i][j] = 0.;
97  }
98  }
99 
100  std::vector<std::vector<vec_t, allocator<vec_t>>> alignedFwd(nVars),
101  alignedBwd(nVars), alignedFlux(nVars);
102 
103  for (size_t i = 0; i < nVars; ++i)
104  {
105  alignedFwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
106  alignedBwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
107  alignedFlux[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
108 
109  for (size_t j = 0; j < sizeVec; ++j)
110  {
111  alignedFwd[i][j] = 1.;
112  alignedBwd[i][j] = 1.;
113  // fix energy to avoid negative pressure
114  if (i == nVars - 1)
115  {
116  alignedFwd[i][j] = 10.;
117  alignedBwd[i][j] = 10.;
118  }
119  alignedFlux[i][j] = 0.;
120  }
121  }
122 
123  // number of experiments
124  constexpr size_t experiments = 1 << 18;
125 
126  LIKWID_MARKER_START("scalar");
127  for (size_t j = 0; j < experiments; ++j)
128  {
129  // time scalar
130  // loop
131  for (size_t i = 0; i < sizeScalar; ++i)
132  {
133  double rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
134  double rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
135 
136  // load
137  rhoL = Fwd[0][i];
138  rhouL = Fwd[1][i];
139  EL = Fwd[spaceDim + 1][i];
140  rhoR = Bwd[0][i];
141  rhouR = Bwd[1][i];
142  ER = Bwd[spaceDim + 1][i];
143 
144  if (spaceDim == 2)
145  {
146  rhovL = Fwd[2][i];
147  rhovR = Bwd[2][i];
148  }
149  else if (spaceDim == 3)
150  {
151  rhovL = Fwd[2][i];
152  rhowL = Fwd[3][i];
153  rhovR = Bwd[2][i];
154  rhowR = Bwd[3][i];
155  }
156 
157  double rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
158 
159  RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
160  ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
161 
162  // store
163  Flux[0][i] = rhof;
164  Flux[1][i] = rhouf;
165  Flux[nVars - 1][i] = Ef;
166  if (spaceDim == 2)
167  {
168  Flux[2][i] = rhovf;
169  }
170  else if (spaceDim == 3)
171  {
172  Flux[2][i] = rhovf;
173  Flux[3][i] = rhowf;
174  }
175 
176  } // loop
177  }
178  LIKWID_MARKER_STOP("scalar");
179  // get likwid events
180  constexpr short CPU_CLK_UNHALTED_REF_id = 2;
181  int nevents{20};
182  std::vector<double> events(nevents);
183  double time;
184  int count;
185  boost::ignore_unused(time, count);
186 
187  LIKWID_MARKER_GET("scalar", &nevents, events.data(), &time, &count);
188  // print out CPE
189  double cpeScalar =
190  events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
191  std::cout << "scalar likwid CPE\t" << cpeScalar << '\n';
192  // avoid opt out
193  std::cout << Flux[0][0] << std::endl;
194 
195  LIKWID_MARKER_START("vector");
196  // time SIMD
197  for (size_t j = 0; j < experiments; ++j)
198  {
199  // loop
200  for (size_t i = 0; i < sizeScalar; i += vec_t::width)
201  {
202  vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
203  vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
204 
205  // load
206  rhoL.load(&(Fwd[0][i]), is_not_aligned);
207  rhouL.load(&(Fwd[1][i]), is_not_aligned);
208  EL.load(&(Fwd[spaceDim + 1][i]), is_not_aligned);
209  rhoR.load(&(Bwd[0][i]), is_not_aligned);
210  rhouR.load(&(Bwd[1][i]), is_not_aligned);
211  ER.load(&(Bwd[spaceDim + 1][i]), is_not_aligned);
212 
213  if (spaceDim == 2)
214  {
215  rhovL.load(&(Fwd[2][i]), is_not_aligned);
216  rhovR.load(&(Bwd[2][i]), is_not_aligned);
217  }
218  else if (spaceDim == 3)
219  {
220  rhovL.load(&(Fwd[2][i]), is_not_aligned);
221  rhowL.load(&(Fwd[3][i]), is_not_aligned);
222  rhovR.load(&(Bwd[2][i]), is_not_aligned);
223  rhowR.load(&(Bwd[3][i]), is_not_aligned);
224  }
225 
226  vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
227 
228  RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
229  ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
230 
231  // store
232  rhof.store(&(Flux[0][i]), is_not_aligned);
233  rhouf.store(&(Flux[1][i]), is_not_aligned);
234  Ef.store(&(Flux[nVars - 1][i]), is_not_aligned);
235  if (spaceDim == 2)
236  {
237  rhovf.store(&(Flux[2][i]), is_not_aligned);
238  }
239  else if (spaceDim == 3)
240  {
241  rhovf.store(&(Flux[2][i]), is_not_aligned);
242  rhowf.store(&(Flux[3][i]), is_not_aligned);
243  }
244 
245  } // loop
246  }
247  LIKWID_MARKER_STOP("vector");
248  // get likwid events
249  LIKWID_MARKER_GET("vector", &nevents, events.data(), &time, &count);
250  // print out CPE
251  double cpeVector =
252  events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
253  std::cout << "vector likwid CPE\t" << cpeVector << '\t'
254  << cpeScalar / cpeVector << " %\n";
255  // avoid opt out
256  std::cout << Flux[0][0] << std::endl;
257 
258  LIKWID_MARKER_START("vectorOfVector");
259  // time SIMD
260  for (size_t j = 0; j < experiments; ++j)
261  {
262  // loop
263  for (size_t i = 0; i < sizeVec; ++i)
264  {
265  vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
266  vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
267 
268  // load
269  rhoL = alignedFwd[0][i];
270  rhouL = alignedFwd[1][i];
271  EL = alignedFwd[spaceDim + 1][i];
272  rhoR = alignedBwd[0][i];
273  rhouR = alignedBwd[1][i];
274  ER = alignedBwd[spaceDim + 1][i];
275 
276  if (spaceDim == 2)
277  {
278  rhovL = alignedFwd[2][i];
279  rhovR = alignedBwd[2][i];
280  }
281  else if (spaceDim == 3)
282  {
283  rhovL = alignedFwd[2][i];
284  rhowL = alignedFwd[3][i];
285  rhovR = alignedBwd[2][i];
286  rhowR = alignedBwd[3][i];
287  }
288 
289  vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
290 
291  RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
292  ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
293 
294  // store
295  alignedFlux[0][i] = rhof;
296  alignedFlux[1][i] = rhouf;
297  alignedFlux[nVars - 1][i] = Ef;
298  if (spaceDim == 2)
299  {
300  alignedFlux[2][i] = rhovf;
301  }
302  else if (spaceDim == 3)
303  {
304  alignedFlux[2][i] = rhovf;
305  alignedFlux[3][i] = rhowf;
306  }
307 
308  } // loop
309  }
310  LIKWID_MARKER_STOP("vectorOfVector");
311  // get likwid events
312  LIKWID_MARKER_GET("vectorOfVector", &nevents, events.data(), &time, &count);
313  // print out CPE
314  double cpevectorOfVector =
315  events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
316  std::cout << "vectorOfVector likwid CPE\t" << cpevectorOfVector << '\t'
317  << cpeScalar / cpevectorOfVector << " %\n";
318  // avoid opt out
319  std::cout << alignedFlux[0][0][0] << std::endl;
320 
322 }
#define LIKWID_MARKER_THREADINIT
Definition: Likwid.hpp:43
#define LIKWID_MARKER_START(regionTag)
Definition: Likwid.hpp:46
#define LIKWID_MARKER_CLOSE
Definition: Likwid.hpp:48
#define LIKWID_MARKER_INIT
Definition: Likwid.hpp:42
#define LIKWID_MARKER_REGISTER(regionTag)
Definition: Likwid.hpp:45
#define LIKWID_MARKER_STOP(regionTag)
Definition: Likwid.hpp:47
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
Definition: Likwid.hpp:49
void RoeKernel(T &rhoL, T &rhouL, T &rhovL, T &rhowL, T &EL, T &rhoR, T &rhouR, T &rhovR, T &rhowR, T &ER, T &rhof, T &rhouf, T &rhovf, T &rhowf, T &Ef, NekDouble gamma)
Definition: RoeSolver.h:75
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd
Definition: tinysimd.hpp:80

References tinysimd::is_not_aligned, LIKWID_MARKER_CLOSE, LIKWID_MARKER_GET, LIKWID_MARKER_INIT, LIKWID_MARKER_REGISTER, LIKWID_MARKER_START, LIKWID_MARKER_STOP, LIKWID_MARKER_THREADINIT, and Nektar::RoeKernel().