Nektar++
Functions
TimeRoeKernel.cpp File Reference
#include "../RiemannSolvers/RoeSolver.h"
#include <LibUtilities/SimdLib/tinysimd.hpp>
#include <LibUtilities/BasicUtils/SharedArray.hpp>
#include <LibUtilities/BasicUtils/Timer.h>
#include <LibUtilities/BasicUtils/Likwid.hpp>

Go to the source code of this file.

Functions

int main (int argc, char const *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
char const *  argv[] 
)

Definition at line 12 of file TimeRoeKernel.cpp.

13 {
14 
17  LIKWID_MARKER_REGISTER("scalar");
18  LIKWID_MARKER_REGISTER("vector");
19  LIKWID_MARKER_REGISTER("vectorOfVector");
20 
21  using namespace tinysimd;
22  using vec_t = simd<NekDouble>;
23 
24  size_t nEle;
25  if (argc < 2)
26  {
27  nEle = 10;
28  }
29  else
30  {
31  nEle = std::stoi(argv[1]);
32  }
33 
34  std::cout << "number of faces\t" << nEle
35  << "\t(assuming 4*4 nodes per face)\n";
36 
37  size_t sizeScalar = 4*4 * nEle;
38  size_t nVars = 5;
39  size_t spaceDim = nVars - 2;
40  size_t sizeVec = sizeScalar / vec_t::width;
41 
42  double gamma = 1.4;
43 
44  std::vector<std::vector<double>>
45  Fwd(nVars),
46  Bwd(nVars),
47  Flux(nVars);
48 
49  for (size_t i = 0; i < nVars; ++i)
50  {
51  Fwd[i] = std::vector<double>(sizeScalar);
52  Bwd[i] = std::vector<double>(sizeScalar);
53  Flux[i] = std::vector<double>(sizeScalar);
54 
55  for (size_t j = 0; j < sizeScalar; ++j)
56  {
57  Fwd[i][j] = 1.;
58  Bwd[i][j] = 1.;
59  // fix energy to avoid negative pressure
60  if (i == nVars - 1)
61  {
62  Fwd[i][j] = 10.;
63  Bwd[i][j] = 10.;
64  }
65  Flux[i][j] = 0.;
66  }
67  }
68 
69  std::vector<std::vector<vec_t, allocator<vec_t>>>
70  alignedFwd(nVars),
71  alignedBwd(nVars),
72  alignedFlux(nVars);
73 
74  for (size_t i = 0; i < nVars; ++i)
75  {
76  alignedFwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
77  alignedBwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
78  alignedFlux[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
79 
80  for (size_t j = 0; j < sizeVec; ++j)
81  {
82  alignedFwd[i][j] = 1.;
83  alignedBwd[i][j] = 1.;
84  // fix energy to avoid negative pressure
85  if (i == nVars - 1)
86  {
87  alignedFwd[i][j] = 10.;
88  alignedBwd[i][j] = 10.;
89  }
90  alignedFlux[i][j] = 0.;
91  }
92  }
93 
94  // number of experiments
95  constexpr size_t experiments = 1 << 18;
96 
97  LIKWID_MARKER_START("scalar");
98  for (size_t j = 0; j < experiments; ++j)
99  {
100  // time scalar
101  // loop
102  for (size_t i = 0; i < sizeScalar; ++i)
103  {
104  double rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
105  double rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
106 
107  // load
108  rhoL = Fwd[0][i];
109  rhouL = Fwd[1][i];
110  EL = Fwd[spaceDim+1][i];
111  rhoR = Bwd[0][i];
112  rhouR = Bwd[1][i];
113  ER = Bwd[spaceDim+1][i];
114 
115  if (spaceDim == 2)
116  {
117  rhovL = Fwd[2][i];
118  rhovR = Bwd[2][i];
119  }
120  else if (spaceDim == 3)
121  {
122  rhovL = Fwd[2][i];
123  rhowL = Fwd[3][i];
124  rhovR = Bwd[2][i];
125  rhowR = Bwd[3][i];
126  }
127 
128  double rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
129 
130  RoeKernel(
131  rhoL, rhouL, rhovL, rhowL, EL,
132  rhoR, rhouR, rhovR, rhowR, ER,
133  rhof, rhouf, rhovf, rhowf, Ef,
134  gamma);
135 
136  // store
137  Flux[0][i] = rhof;
138  Flux[1][i] = rhouf;
139  Flux[nVars-1][i] = Ef;
140  if (spaceDim == 2)
141  {
142  Flux[2][i] = rhovf;
143  }
144  else if (spaceDim == 3)
145  {
146  Flux[2][i] = rhovf;
147  Flux[3][i] = rhowf;
148  }
149 
150  } // loop
151  }
152  LIKWID_MARKER_STOP("scalar");
153  // get likwid events
154  constexpr short CPU_CLK_UNHALTED_REF_id = 2;
155  int nevents{20};
156  std::vector<double> events(nevents);
157  double time;
158  int count;
159  boost::ignore_unused(time,count);
160 
161  LIKWID_MARKER_GET("scalar", &nevents, events.data(), &time, &count);
162  // print out CPE
163  double cpeScalar = events[CPU_CLK_UNHALTED_REF_id]/sizeScalar/experiments;
164  std::cout << "scalar likwid CPE\t" << cpeScalar << '\n';
165  // avoid opt out
166  std::cout << Flux[0][0] << std::endl;
167 
168 
169  LIKWID_MARKER_START("vector");
170  // time SIMD
171  for (size_t j = 0; j < experiments; ++j)
172  {
173  // loop
174  for (size_t i = 0; i < sizeScalar; i+=vec_t::width)
175  {
176  vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
177  vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
178 
179  // load
180  rhoL .load(&(Fwd[0][i]), is_not_aligned);
181  rhouL .load(&(Fwd[1][i]), is_not_aligned);
182  EL .load(&(Fwd[spaceDim+1][i]), is_not_aligned);
183  rhoR .load(&(Bwd[0][i]), is_not_aligned);
184  rhouR .load(&(Bwd[1][i]), is_not_aligned);
185  ER .load(&(Bwd[spaceDim+1][i]), is_not_aligned);
186 
187  if (spaceDim == 2)
188  {
189  rhovL.load(&(Fwd[2][i]), is_not_aligned);
190  rhovR.load(&(Bwd[2][i]), is_not_aligned);
191  }
192  else if (spaceDim == 3)
193  {
194  rhovL.load(&(Fwd[2][i]), is_not_aligned);
195  rhowL.load(&(Fwd[3][i]), is_not_aligned);
196  rhovR.load(&(Bwd[2][i]), is_not_aligned);
197  rhowR.load(&(Bwd[3][i]), is_not_aligned);
198  }
199 
200  vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
201 
202  RoeKernel(
203  rhoL, rhouL, rhovL, rhowL, EL,
204  rhoR, rhouR, rhovR, rhowR, ER,
205  rhof, rhouf, rhovf, rhowf, Ef,
206  gamma);
207 
208  // store
209  rhof.store(&(Flux[0][i]), is_not_aligned);
210  rhouf.store(&(Flux[1][i]), is_not_aligned);
211  Ef.store(&(Flux[nVars-1][i]), is_not_aligned);
212  if (spaceDim == 2)
213  {
214  rhovf.store(&(Flux[2][i]), is_not_aligned);
215  }
216  else if (spaceDim == 3)
217  {
218  rhovf.store(&(Flux[2][i]), is_not_aligned);
219  rhowf.store(&(Flux[3][i]), is_not_aligned);
220  }
221 
222  } // loop
223  }
224  LIKWID_MARKER_STOP("vector");
225  // get likwid events
226  LIKWID_MARKER_GET("vector", &nevents, events.data(), &time, &count);
227  // print out CPE
228  double cpeVector = events[CPU_CLK_UNHALTED_REF_id]/sizeScalar/experiments;
229  std::cout << "vector likwid CPE\t" << cpeVector << '\t'
230  << cpeScalar/cpeVector << " %\n";
231  // avoid opt out
232  std::cout << Flux[0][0] << std::endl;
233 
234  LIKWID_MARKER_START("vectorOfVector");
235  // time SIMD
236  for (size_t j = 0; j < experiments; ++j)
237  {
238  // loop
239  for (size_t i = 0; i < sizeVec; ++i)
240  {
241  vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
242  vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
243 
244  // load
245  rhoL = alignedFwd[0][i];
246  rhouL = alignedFwd[1][i];
247  EL = alignedFwd[spaceDim+1][i];
248  rhoR = alignedBwd[0][i];
249  rhouR = alignedBwd[1][i];
250  ER = alignedBwd[spaceDim+1][i];
251 
252  if (spaceDim == 2)
253  {
254  rhovL = alignedFwd[2][i];
255  rhovR = alignedBwd[2][i];
256  }
257  else if (spaceDim == 3)
258  {
259  rhovL = alignedFwd[2][i];
260  rhowL = alignedFwd[3][i];
261  rhovR = alignedBwd[2][i];
262  rhowR = alignedBwd[3][i];
263  }
264 
265  vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
266 
267  RoeKernel(
268  rhoL, rhouL, rhovL, rhowL, EL,
269  rhoR, rhouR, rhovR, rhowR, ER,
270  rhof, rhouf, rhovf, rhowf, Ef,
271  gamma);
272 
273  // store
274  alignedFlux[0][i] = rhof;
275  alignedFlux[1][i] = rhouf;
276  alignedFlux[nVars-1][i] = Ef;
277  if (spaceDim == 2)
278  {
279  alignedFlux[2][i] = rhovf;
280  }
281  else if (spaceDim == 3)
282  {
283  alignedFlux[2][i] = rhovf;
284  alignedFlux[3][i] = rhowf;
285  }
286 
287  } // loop
288  }
289  LIKWID_MARKER_STOP("vectorOfVector");
290  // get likwid events
291  LIKWID_MARKER_GET("vectorOfVector", &nevents, events.data(), &time, &count);
292  // print out CPE
293  double cpevectorOfVector = events[CPU_CLK_UNHALTED_REF_id]/sizeScalar/experiments;
294  std::cout << "vectorOfVector likwid CPE\t" << cpevectorOfVector << '\t'
295  << cpeScalar/cpevectorOfVector << " %\n";
296  // avoid opt out
297  std::cout << alignedFlux[0][0][0] << std::endl;
298 
300 
301 }
#define LIKWID_MARKER_THREADINIT
Definition: Likwid.hpp:43
#define LIKWID_MARKER_START(regionTag)
Definition: Likwid.hpp:46
#define LIKWID_MARKER_CLOSE
Definition: Likwid.hpp:48
#define LIKWID_MARKER_INIT
Definition: Likwid.hpp:42
#define LIKWID_MARKER_REGISTER(regionTag)
Definition: Likwid.hpp:45
#define LIKWID_MARKER_STOP(regionTag)
Definition: Likwid.hpp:47
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
Definition: Likwid.hpp:49
void RoeKernel(T &rhoL, T &rhouL, T &rhovL, T &rhowL, T &EL, T &rhoR, T &rhouR, T &rhovR, T &rhowR, T &ER, T &rhof, T &rhouf, T &rhovf, T &rhowf, T &Ef, NekDouble gamma)
Definition: RoeSolver.h:79
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType >::type simd
Definition: tinysimd.hpp:83

References tinysimd::is_not_aligned, LIKWID_MARKER_CLOSE, LIKWID_MARKER_GET, LIKWID_MARKER_INIT, LIKWID_MARKER_REGISTER, LIKWID_MARKER_START, LIKWID_MARKER_STOP, LIKWID_MARKER_THREADINIT, and Nektar::RoeKernel().