31 nEle = std::stoi(argv[1]);
34 std::cout <<
"number of faces\t" << nEle
35 <<
"\t(assuming 4*4 nodes per face)\n";
37 size_t sizeScalar = 4*4 * nEle;
39 size_t spaceDim = nVars - 2;
40 size_t sizeVec = sizeScalar / vec_t::width;
44 std::vector<std::vector<double>>
49 for (
size_t i = 0; i < nVars; ++i)
51 Fwd[i] = std::vector<double>(sizeScalar);
52 Bwd[i] = std::vector<double>(sizeScalar);
53 Flux[i] = std::vector<double>(sizeScalar);
55 for (
size_t j = 0; j < sizeScalar; ++j)
69 std::vector<std::vector<vec_t, allocator<vec_t>>>
74 for (
size_t i = 0; i < nVars; ++i)
76 alignedFwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
77 alignedBwd[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
78 alignedFlux[i] = std::vector<vec_t, allocator<vec_t>>(sizeVec);
80 for (
size_t j = 0; j < sizeVec; ++j)
82 alignedFwd[i][j] = 1.;
83 alignedBwd[i][j] = 1.;
87 alignedFwd[i][j] = 10.;
88 alignedBwd[i][j] = 10.;
90 alignedFlux[i][j] = 0.;
95 constexpr
size_t experiments = 1 << 18;
98 for (
size_t j = 0; j < experiments; ++j)
102 for (
size_t i = 0; i < sizeScalar; ++i)
104 double rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
105 double rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
110 EL = Fwd[spaceDim+1][i];
113 ER = Bwd[spaceDim+1][i];
120 else if (spaceDim == 3)
128 double rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
131 rhoL, rhouL, rhovL, rhowL, EL,
132 rhoR, rhouR, rhovR, rhowR, ER,
133 rhof, rhouf, rhovf, rhowf, Ef,
139 Flux[nVars-1][i] = Ef;
144 else if (spaceDim == 3)
154 constexpr
short CPU_CLK_UNHALTED_REF_id = 2;
156 std::vector<double> events(nevents);
159 boost::ignore_unused(time,count);
163 double cpeScalar = events[CPU_CLK_UNHALTED_REF_id]/sizeScalar/experiments;
164 std::cout <<
"scalar likwid CPE\t" << cpeScalar <<
'\n';
166 std::cout << Flux[0][0] << std::endl;
171 for (
size_t j = 0; j < experiments; ++j)
174 for (
size_t i = 0; i < sizeScalar; i+=vec_t::width)
176 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
177 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
192 else if (spaceDim == 3)
200 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
203 rhoL, rhouL, rhovL, rhowL, EL,
204 rhoR, rhouR, rhovR, rhowR, ER,
205 rhof, rhouf, rhovf, rhowf, Ef,
216 else if (spaceDim == 3)
228 double cpeVector = events[CPU_CLK_UNHALTED_REF_id]/sizeScalar/experiments;
229 std::cout <<
"vector likwid CPE\t" << cpeVector <<
'\t'
230 << cpeScalar/cpeVector <<
" %\n";
232 std::cout << Flux[0][0] << std::endl;
236 for (
size_t j = 0; j < experiments; ++j)
239 for (
size_t i = 0; i < sizeVec; ++i)
241 vec_t rhoL{}, rhouL{}, rhovL{}, rhowL{}, EL{};
242 vec_t rhoR{}, rhouR{}, rhovR{}, rhowR{}, ER{};
245 rhoL = alignedFwd[0][i];
246 rhouL = alignedFwd[1][i];
247 EL = alignedFwd[spaceDim+1][i];
248 rhoR = alignedBwd[0][i];
249 rhouR = alignedBwd[1][i];
250 ER = alignedBwd[spaceDim+1][i];
254 rhovL = alignedFwd[2][i];
255 rhovR = alignedBwd[2][i];
257 else if (spaceDim == 3)
259 rhovL = alignedFwd[2][i];
260 rhowL = alignedFwd[3][i];
261 rhovR = alignedBwd[2][i];
262 rhowR = alignedBwd[3][i];
265 vec_t rhof{}, rhouf{}, rhovf{}, rhowf{}, Ef{};
268 rhoL, rhouL, rhovL, rhowL, EL,
269 rhoR, rhouR, rhovR, rhowR, ER,
270 rhof, rhouf, rhovf, rhowf, Ef,
274 alignedFlux[0][i] = rhof;
275 alignedFlux[1][i] = rhouf;
276 alignedFlux[nVars-1][i] = Ef;
279 alignedFlux[2][i] = rhovf;
281 else if (spaceDim == 3)
283 alignedFlux[2][i] = rhovf;
284 alignedFlux[3][i] = rhowf;
293 double cpevectorOfVector = events[CPU_CLK_UNHALTED_REF_id]/sizeScalar/experiments;
294 std::cout <<
"vectorOfVector likwid CPE\t" << cpevectorOfVector <<
'\t'
295 << cpeScalar/cpevectorOfVector <<
" %\n";
297 std::cout << alignedFlux[0][0][0] << std::endl;
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
void RoeKernel(T &rhoL, T &rhouL, T &rhovL, T &rhowL, T &EL, T &rhoR, T &rhouR, T &rhovR, T &rhowR, T &ER, T &rhof, T &rhouf, T &rhovf, T &rhowf, T &Ef, NekDouble gamma)
tinysimd::simd< NekDouble > vec_t
static constexpr struct tinysimd::is_not_aligned_t is_not_aligned
typename abi< ScalarType >::type simd