31 nEle = std::stoi(argv[1]);
34 std::cout <<
"number of faces\t" << nEle
35 <<
"\t(assuming 4*4 nodes per face)\n";
37 size_t sizeScalar = 4 * 4 * nEle;
39 size_t spaceDim = nVars - 2;
40 size_t sizeVec = sizeScalar / vec_t::width;
44 std::vector<std::vector<double>> Fwd(nVars), Bwd(nVars), Flux(nVars);
46 for (
size_t i = 0; i < nVars; ++i)
48 Fwd[i] = std::vector<double>(sizeScalar);
49 Bwd[i] = std::vector<double>(sizeScalar);
50 Flux[i] = std::vector<double>(sizeScalar);
52 for (
size_t j = 0; j < sizeScalar; ++j)
66 std::vector<std::vector<vec_t, allocator<vec_t>>> alignedFwd(nVars),
67 alignedBwd(nVars), alignedFlux(nVars);
69 for (
size_t i = 0; i < nVars; ++i)
71 alignedFwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
72 alignedBwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
73 alignedFlux[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
75 for (
size_t j = 0; j < sizeVec; ++j)
77 alignedFwd[i][j] = 1.;
78 alignedBwd[i][j] = 1.;
82 alignedFwd[i][j] = 10.;
83 alignedBwd[i][j] = 10.;
85 alignedFlux[i][j] = 0.;
90 constexpr
size_t experiments = 1 << 18;
93 for (
size_t j = 0; j < experiments; ++j)
97 for (
size_t i = 0; i < sizeScalar; ++i)
99 double rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
100 double rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
105 EL = Fwd[spaceDim + 1][i];
108 ER = Bwd[spaceDim + 1][i];
115 else if (spaceDim == 3)
123 double rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
125 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
126 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
131 Flux[nVars - 1][i] = Ef;
136 else if (spaceDim == 3)
146 constexpr
short CPU_CLK_UNHALTED_REF_id = 2;
148 std::vector<double> events(nevents);
151 boost::ignore_unused(time, count);
156 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
157 std::cout <<
"scalar likwid CPE\t" << cpeScalar <<
'\n';
159 std::cout << Flux[0][0] << std::endl;
163 for (
size_t j = 0; j < experiments; ++j)
166 for (
size_t i = 0; i < sizeScalar; i += vec_t::width)
168 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
169 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
184 else if (spaceDim == 3)
192 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
194 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
195 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
205 else if (spaceDim == 3)
218 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
219 std::cout <<
"vector likwid CPE\t" << cpeVector <<
'\t'
220 << cpeScalar / cpeVector <<
" %\n";
222 std::cout << Flux[0][0] << std::endl;
226 for (
size_t j = 0; j < experiments; ++j)
229 for (
size_t i = 0; i < sizeVec; ++i)
231 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
232 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
235 rhoL = alignedFwd[0][i];
236 rhouL = alignedFwd[1][i];
237 EL = alignedFwd[spaceDim + 1][i];
238 rhoR = alignedBwd[0][i];
239 rhouR = alignedBwd[1][i];
240 ER = alignedBwd[spaceDim + 1][i];
244 rhovL = alignedFwd[2][i];
245 rhovR = alignedBwd[2][i];
247 else if (spaceDim == 3)
249 rhovL = alignedFwd[2][i];
250 rhowL = alignedFwd[3][i];
251 rhovR = alignedBwd[2][i];
252 rhowR = alignedBwd[3][i];
255 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
257 RoeKernel(rhoL, rhouL, rhovL, rhowL, EL, rhoR, rhouR, rhovR, rhowR,
258 ER, rhof, rhouf, rhovf, rhowf, Ef, gamma);
261 alignedFlux[0][i] = rhof;
262 alignedFlux[1][i] = rhouf;
263 alignedFlux[nVars - 1][i] = Ef;
266 alignedFlux[2][i] = rhovf;
268 else if (spaceDim == 3)
270 alignedFlux[2][i] = rhovf;
271 alignedFlux[3][i] = rhowf;
280 double cpevectorOfVector =
281 events[CPU_CLK_UNHALTED_REF_id] / sizeScalar / experiments;
282 std::cout <<
"vectorOfVector likwid CPE\t" << cpevectorOfVector <<
'\t'
283 << cpeScalar / cpevectorOfVector <<
" %\n";
285 std::cout << alignedFlux[0][0][0] << std::endl;
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
void RoeKernel(T &rhoL, T &rhouL, T &rhovL, T &rhowL, T &EL, T &rhoR, T &rhouR, T &rhovR, T &rhowR, T &ER, T &rhof, T &rhouf, T &rhovf, T &rhowf, T &Ef, NekDouble gamma)
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType, width >::type simd