404 bool IsLinearNSEquation,
const int HomogeneousMode,
414 bool AddAdvectionTerms =
455 for (n = 0; n < nel; ++n)
457 nsize_bndry[n] = nvel *
460 nsize_bndry_p1[n] = nsize_bndry[n] + nz_loc;
464 nsize_p[n] =
m_pressure->GetExp(n)->GetNcoeffs() * nz_loc;
465 nsize_p_m1[n] = nsize_p[n] - nz_loc;
471 nsize_bndry_p1, nsize_bndry_p1, blkmatStorage);
473 nsize_bndry, nsize_int, blkmatStorage);
475 nsize_bndry, nsize_int, blkmatStorage);
477 nsize_int, nsize_int, blkmatStorage);
480 nsize_p, nsize_bndry, blkmatStorage);
483 nsize_p, nsize_int, blkmatStorage);
488 nsize_bndry_p1, nsize_p_m1, blkmatStorage);
491 nsize_p_m1, nsize_bndry_p1, blkmatStorage);
499 for (n = 0; n < nel; ++n)
501 nbndry = nsize_bndry[n];
503 k = nsize_bndry_p1[n];
519 nsize_p[n], nsize_bndry[n], zero);
521 nsize_p[n], nsize_int[n], zero);
524 locExp->GetBoundaryMap(bmap);
525 locExp->GetInteriorMap(imap);
533 size_t nbmap = bmap.size();
534 size_t nimap = imap.size();
538 size_t psize =
m_pressure->GetExp(n)->GetNcoeffs();
539 size_t pqsize =
m_pressure->GetExp(n)->GetTotPoints();
544 if (AddAdvectionTerms ==
false)
550 CondMat = locExp->GetLocStaticCondMatrix(helmkey);
552 for (k = 0; k < nvel * nz_loc; ++k)
555 rows = SubBlock.GetRows();
556 cols = SubBlock.GetColumns();
557 for (i = 0; i < rows; ++i)
559 for (j = 0; j < cols; ++j)
561 (*Ah)(i + k * rows, j + k * cols) =
567 for (k = 0; k < nvel * nz_loc; ++k)
571 rows = SubBlock.GetRows();
572 cols = SubBlock.GetColumns();
573 for (i = 0; i < rows; ++i)
575 for (j = 0; j < cols; ++j)
577 (*B)(i + k * rows, j + k * cols) = SubBlock(i, j);
578 (*C)(i + k * rows, j + k * cols) =
584 for (k = 0; k < nvel * nz_loc; ++k)
588 rows = SubBlock.GetRows();
589 cols = SubBlock.GetColumns();
590 for (i = 0; i < rows; ++i)
592 for (j = 0; j < cols; ++j)
594 (*D)(i + k * rows, j + k * cols) =
595 inv_kinvis * SubBlock(i, j);
601 for (i = 0; i < bmap.size(); ++i)
605 coeffs[bmap[i]] = 1.0;
609 for (j = 0; j < nvel; ++j)
611 if ((nz_loc == 2) && (j == 2))
617 beta, phys, 1, deriv, 1);
619 m_pressure->GetExp(n)->IProductWRTBase(deriv, pcoeffs);
623 ((nz_loc * j + 1) * bmap.size() + i) *
630 ((nz_loc * j) * bmap.size() + i) *
645 for (k = 0; k < nz_loc; ++k)
648 psize, &(pcoeffs)[0], 1,
650 ((nz_loc * j + k) * bmap.size() + i) *
660 for (i = 0; i < imap.size(); ++i)
664 coeffs[imap[i]] = 1.0;
668 for (j = 0; j < nvel; ++j)
670 if ((nz_loc == 2) && (j == 2))
676 beta, phys, 1, deriv, 1);
678 m_pressure->GetExp(n)->IProductWRTBase(deriv, pcoeffs);
682 ((nz_loc * j + 1) * imap.size() + i) *
689 ((nz_loc * j) * imap.size() + i) *
708 for (k = 0; k < nz_loc; ++k)
711 psize, &(pcoeffs)[0], 1,
713 ((nz_loc * j + k) * imap.size() + i) *
734 HelmMat.GetOwnedMatrix()->GetPtr();
735 NekDouble HelmMatScale = HelmMat.Scale();
736 int HelmMatRows = HelmMat.GetRows();
752 int npoints = locExp->GetTotPoints();
755 if (IsLinearNSEquation)
760 for (nv = 0; nv < nvel; ++nv)
762 for (nv1 = 0; nv1 < nvel; ++nv1)
764 if (cnt < nvel * nvel - 1)
766 AdvDeriv[cnt + 1] = AdvDeriv[cnt] + npoints;
779 Advfield[nv] + phys_offset,
780 AdvDeriv[nv * nvel + nv1]);
786 for (i = 0; i < nbmap; ++i)
791 coeffs[bmap[i]] = 1.0;
792 locExp->BwdTrans(coeffs, phys);
794 for (k = 0; k < nvel * nz_loc; ++k)
796 for (j = 0; j < nbmap; ++j)
801 Ah_data[i + k * nbmap + (j + k * nbmap) * AhRows] +=
803 HelmMat_data[bmap[i] + HelmMatRows * bmap[j]];
806 for (j = 0; j < nimap; ++j)
808 B_data[i + k * nbmap + (j + k * nimap) * nbndry] +=
810 HelmMat_data[bmap[i] + HelmMatRows * imap[j]];
817 for (k = 0; k < nvel; ++k)
819 for (j = 0; j < nbmap; ++j)
821 Ah_data[i + 2 * k * nbmap +
822 (j + (2 * k + 1) * nbmap) * AhRows] -=
823 lambda_imag * (*MassMat)(bmap[i], bmap[j]);
826 for (j = 0; j < nbmap; ++j)
828 Ah_data[i + (2 * k + 1) * nbmap +
829 (j + 2 * k * nbmap) * AhRows] +=
830 lambda_imag * (*MassMat)(bmap[i], bmap[j]);
833 for (j = 0; j < nimap; ++j)
835 B_data[i + 2 * k * nbmap +
836 (j + (2 * k + 1) * nimap) * nbndry] -=
837 lambda_imag * (*MassMat)(bmap[i], imap[j]);
840 for (j = 0; j < nimap; ++j)
842 B_data[i + (2 * k + 1) * nbmap +
843 (j + 2 * k * nimap) * nbndry] +=
844 lambda_imag * (*MassMat)(bmap[i], imap[j]);
849 for (k = 0; k < nvel; ++k)
851 if ((nz_loc == 2) && (k == 2))
858 m_pressure->GetExp(n)->IProductWRTBase(deriv, pcoeffs);
861 ((nz_loc * k + 1) * bmap.size() + i) *
869 ((nz_loc * k) * bmap.size() + i) *
875 Vmath::Vmul(npoints, Advtmp = Advfield[k] + phys_offset,
876 1, deriv, 1, tmpphys, 1);
878 locExp->IProductWRTBase(tmpphys, coeffs);
881 for (nv = 0; nv < nvel; ++nv)
883 for (j = 0; j < nbmap; ++j)
885 Ah_data[j + 2 * nv * nbmap +
886 (i + (2 * nv + 1) * nbmap) * AhRows] +=
890 for (j = 0; j < nimap; ++j)
892 C_data[i + (2 * nv + 1) * nbmap +
893 (j + 2 * nv * nimap) * nbndry] +=
900 for (nv = 0; nv < nvel; ++nv)
902 for (j = 0; j < nbmap; ++j)
904 Ah_data[j + (2 * nv + 1) * nbmap +
905 (i + 2 * nv * nbmap) * AhRows] +=
909 for (j = 0; j < nimap; ++j)
911 C_data[i + 2 * nv * nbmap +
912 (j + (2 * nv + 1) * nimap) * nbndry] +=
924 Advtmp = Advfield[k] + phys_offset, 1,
925 deriv, 1, tmpphys, 1);
926 locExp->IProductWRTBase(tmpphys, coeffs);
928 for (nv = 0; nv < nvel * nz_loc; ++nv)
930 for (j = 0; j < nbmap; ++j)
932 Ah_data[j + nv * nbmap +
933 (i + nv * nbmap) * AhRows] +=
937 for (j = 0; j < nimap; ++j)
939 C_data[i + nv * nbmap +
940 (j + nv * nimap) * nbndry] +=
948 for (j = 0; j < nz_loc; ++j)
951 psize, &(pcoeffs)[0], 1,
953 ((nz_loc * k + j) * bmap.size() + i) *
961 if (IsLinearNSEquation)
963 for (nv = 0; nv < nvel; ++nv)
967 AdvDeriv[k * nvel + nv], 1, tmpphys, 1);
968 locExp->IProductWRTBase(tmpphys, coeffs);
970 for (
size_t n1 = 0; n1 < nz_loc; ++n1)
972 for (j = 0; j < nbmap; ++j)
974 Ah_data[j + (k * nz_loc + n1) * nbmap +
975 (i + (nv * nz_loc + n1) * nbmap) *
976 AhRows] += coeffs[bmap[j]];
979 for (j = 0; j < nimap; ++j)
981 C_data[i + (nv * nz_loc + n1) * nbmap +
982 (j + (k * nz_loc + n1) * nimap) *
983 nbndry] += coeffs[imap[j]];
991 for (i = 0; i < nimap; ++i)
995 coeffs[imap[i]] = 1.0;
996 locExp->BwdTrans(coeffs, phys);
998 for (k = 0; k < nvel * nz_loc; ++k)
1000 for (j = 0; j < nbmap; ++j)
1002 C_data[j + k * nbmap + (i + k * nimap) * nbndry] +=
1004 HelmMat_data[imap[i] + HelmMatRows * bmap[j]];
1007 for (j = 0; j < nimap; ++j)
1009 D_data[i + k * nimap + (j + k * nimap) * nint] +=
1011 HelmMat_data[imap[i] + HelmMatRows * imap[j]];
1018 for (k = 0; k < nvel; ++k)
1020 for (j = 0; j < nbmap; ++j)
1022 C_data[j + 2 * k * nbmap +
1023 (i + (2 * k + 1) * nimap) * nbndry] +=
1024 lambda_imag * (*MassMat)(bmap[j], imap[i]);
1027 for (j = 0; j < nbmap; ++j)
1029 C_data[j + (2 * k + 1) * nbmap +
1030 (i + 2 * k * nimap) * nbndry] -=
1031 lambda_imag * (*MassMat)(bmap[j], imap[i]);
1034 for (j = 0; j < nimap; ++j)
1036 D_data[i + 2 * k * nimap +
1037 (j + (2 * k + 1) * nimap) * nint] -=
1038 lambda_imag * (*MassMat)(imap[i], imap[j]);
1041 for (j = 0; j < nimap; ++j)
1043 D_data[i + (2 * k + 1) * nimap +
1044 (j + 2 * k * nimap) * nint] +=
1045 lambda_imag * (*MassMat)(imap[i], imap[j]);
1050 for (k = 0; k < nvel; ++k)
1052 if ((nz_loc == 2) && (k == 2))
1058 m_pressure->GetExp(n)->IProductWRTBase(deriv, pcoeffs);
1061 ((nz_loc * k + 1) * imap.size() + i) *
1068 ((nz_loc * k) * imap.size() + i) *
1075 Vmath::Vmul(npoints, Advtmp = Advfield[k] + phys_offset,
1076 1, deriv, 1, tmpphys, 1);
1077 locExp->IProductWRTBase(tmpphys, coeffs);
1080 for (nv = 0; nv < nvel; ++nv)
1082 for (j = 0; j < nbmap; ++j)
1084 B_data[j + 2 * nv * nbmap +
1085 (i + (2 * nv + 1) * nimap) * nbndry] +=
1089 for (j = 0; j < nimap; ++j)
1091 D_data[j + 2 * nv * nimap +
1092 (i + (2 * nv + 1) * nimap) * nint] +=
1098 for (nv = 0; nv < nvel; ++nv)
1100 for (j = 0; j < nbmap; ++j)
1102 B_data[j + (2 * nv + 1) * nbmap +
1103 (i + 2 * nv * nimap) * nbndry] +=
1107 for (j = 0; j < nimap; ++j)
1109 D_data[j + (2 * nv + 1) * nimap +
1110 (i + 2 * nv * nimap) * nint] +=
1124 Advtmp = Advfield[k] + phys_offset, 1,
1125 deriv, 1, tmpphys, 1);
1126 locExp->IProductWRTBase(tmpphys, coeffs);
1128 for (nv = 0; nv < nvel * nz_loc; ++nv)
1130 for (j = 0; j < nbmap; ++j)
1132 B_data[j + nv * nbmap +
1133 (i + nv * nimap) * nbndry] +=
1137 for (j = 0; j < nimap; ++j)
1139 D_data[j + nv * nimap +
1140 (i + nv * nimap) * nint] +=
1145 m_pressure->GetExp(n)->IProductWRTBase(deriv,
1147 for (j = 0; j < nz_loc; ++j)
1150 psize, &(pcoeffs)[0], 1,
1152 ((nz_loc * k + j) * imap.size() + i) *
1160 if (IsLinearNSEquation)
1163 for (nv = 0; nv < nvel; ++nv)
1167 AdvDeriv[k * nvel + nv], 1, tmpphys, 1);
1168 locExp->IProductWRTBase(tmpphys, coeffs);
1170 for (n1 = 0; n1 < nz_loc; ++n1)
1172 for (j = 0; j < nbmap; ++j)
1174 B_data[j + (k * nz_loc + n1) * nbmap +
1175 (i + (nv * nz_loc + n1) * nimap) *
1176 nbndry] += coeffs[bmap[j]];
1179 for (j = 0; j < nimap; ++j)
1181 D_data[j + (k * nz_loc + n1) * nimap +
1182 (i + (nv * nz_loc + n1) * nimap) *
1183 nint] += coeffs[imap[j]];
1197 Blas::Dgemm(
'N',
'T', B->GetRows(), C->GetRows(), B->GetColumns(),
1198 -1.0, B->GetRawPtr(), B->GetRows(), C->GetRawPtr(),
1199 C->GetRows(), 1.0, Ah->GetRawPtr(), Ah->GetRows());
1222 DNekMat DintCinvDTint, BCinvDTint_m_DTbnd, DintCinvBTtilde_m_Dbnd;
1228 DintCinvDTint = (*Dint) * (*Cinv) *
Transpose(*Dint);
1232 DintCinvBTtilde_m_Dbnd =
1233 (*Dint) * (*Cinv) *
Transpose(*Btilde) - (*Dbnd);
1237 nsize_bndry_p1[n], nsize_p_m1[n], zero);
1239 nsize_p_m1[n], nsize_bndry_p1[n], zero);
1241 nsize_p_m1[n], nsize_p_m1[n], zero);
1246 for (n1 = 0; n1 < nz_loc; ++n1)
1248 for (i = 0; i < psize - 1; ++i)
1250 for (n2 = 0; n2 < nz_loc; ++n2)
1252 for (j = 0; j < psize - 1; ++j)
1256 Dh_data[(i + n1 * (psize - 1)) +
1257 (j + n2 * (psize - 1)) * Dh->GetRows()] =
1258 -DintCinvDTint(i + 1 + n1 * psize,
1259 j + 1 + n2 * psize);
1265 for (n1 = 0; n1 < nz_loc; ++n1)
1267 for (i = 0; i < nsize_bndry_p1[n] - nz_loc; ++i)
1269 (*Ah)(i, nsize_bndry_p1[n] - nz_loc + n1) =
1270 BCinvDTint_m_DTbnd(i, n1 * psize);
1271 (*Ah)(nsize_bndry_p1[n] - nz_loc + n1, i) =
1272 DintCinvBTtilde_m_Dbnd(n1 * psize, i);
1276 for (n1 = 0; n1 < nz_loc; ++n1)
1278 for (n2 = 0; n2 < nz_loc; ++n2)
1280 (*Ah)(nsize_bndry_p1[n] - nz_loc + n1,
1281 nsize_bndry_p1[n] - nz_loc + n2) =
1282 -DintCinvDTint(n1 * psize, n2 * psize);
1286 for (n1 = 0; n1 < nz_loc; ++n1)
1288 for (j = 0; j < psize - 1; ++j)
1290 for (i = 0; i < nsize_bndry_p1[n] - nz_loc; ++i)
1292 (*Bh)(i, j + n1 * (psize - 1)) =
1293 BCinvDTint_m_DTbnd(i, j + 1 + n1 * psize);
1294 (*Ch)(j + n1 * (psize - 1), i) =
1295 DintCinvBTtilde_m_Dbnd(j + 1 + n1 * psize, i);
1300 for (n1 = 0; n1 < nz_loc; ++n1)
1302 for (n2 = 0; n2 < nz_loc; ++n2)
1304 for (j = 0; j < psize - 1; ++j)
1306 (*Bh)(nsize_bndry_p1[n] - nz_loc + n1,
1307 j + n2 * (psize - 1)) =
1308 -DintCinvDTint(n1 * psize, j + 1 + n2 * psize);
1309 (*Ch)(j + n2 * (psize - 1),
1310 nsize_bndry_p1[n] - nz_loc + n1) =
1311 -DintCinvDTint(j + 1 + n2 * psize, n1 * psize);
1318 (*Bh) = (*Bh) * (*Dh);
1320 Blas::Dgemm(
'N',
'N', Bh->GetRows(), Ch->GetColumns(), Bh->GetColumns(),
1321 -1.0, Bh->GetRawPtr(), Bh->GetRows(), Ch->GetRawPtr(),
1322 Ch->GetRows(), 1.0, Ah->GetRawPtr(), Ah->GetRows());
1340 std::cout <<
"Matrix Setup Costs: " << timer.
TimePerTest(1) << std::endl;
2121 size_t i, j, k, n, cnt, cnt1;
2122 size_t nbnd, nint, offset;
2124 size_t nel = fields[0]->GetNumElmts();
2154 for (i = 0; i < fields.size(); ++i)
2157 tmp = fields[i]->UpdateCoeffs() + nplanecoeffs,
2161 pressure->UpdateCoeffs(), 1);
2166 for (k = 0; k < nvel; ++k)
2169 std::dynamic_pointer_cast<MultiRegions::ContField>(fields[k]);
2172 cfield->GetLocalToGlobalMap()->GetBndCondCoeffsToLocalCoeffsSign();
2174 cfield->GetLocalToGlobalMap()->GetBndCondCoeffsToLocalCoeffsMap();
2178 fields[k]->GetBndConditions();
2184 m_fields[k]->GetPlane(2 * mode)->GetBndCondExpansions();
2188 bndCondExp =
m_fields[k]->GetBndCondExpansions();
2191 for (n = 0; n < nz_loc; ++n)
2194 for (i = 0; i < bndCondExp.size(); ++i)
2197 bndCondExp[i]->GetCoeffs();
2198 size_t nCoeffs = (bndCondExp[i])->
GetNcoeffs();
2201 if (bndConds[i]->GetBoundaryConditionType() ==
2203 bndConds[i]->GetBoundaryConditionType() ==
2208 for (j = 0; j < nCoeffs; j++)
2210 forcing[k][n * nplanecoeffs + map[bndcnt + j]] +=
2211 sign[bndcnt + j] * bndcoeffs[j];
2216 for (j = 0; j < nCoeffs; j++)
2218 forcing[k][n * nplanecoeffs + map[bndcnt + j]] +=
2224 bndcnt += bndCondExp[i]->GetNcoeffs();
2235 for (i = 0; i < nel; ++i)
2237 fields[
m_velocity[0]]->GetExp(i)->GetBoundaryMap(bmap);
2238 fields[
m_velocity[0]]->GetExp(i)->GetInteriorMap(imap);
2241 offset = fields[
m_velocity[0]]->GetCoeff_Offset(i);
2243 for (j = 0; j < nvel; ++j)
2247 for (n = 0; n < nz_loc; ++n)
2249 for (k = 0; k < nbnd; ++k)
2252 forcing[j][n * nplanecoeffs + offset + bmap[k]];
2254 bnd[bndoffset + (n + j * nz_loc) * nbnd + k] =
2255 incoeffs[n * nplanecoeffs + offset + bmap[k]];
2257 for (k = 0; k < nint; ++k)
2260 forcing[j][n * nplanecoeffs + offset + imap[k]];
2268 nvel * nz_loc * nbnd + nz_loc * (pressure->GetExp(i)->GetNcoeffs());
2276 F_bnd = F_bnd - (*
m_mat[mode].m_BCinv) * F_int;
2277 F_p_tmp = (*
m_mat[mode].m_Cinv) * F_int;
2278 F_p = (*
m_mat[mode].m_D_int) * F_p_tmp;
2285 for (i = 0; i < nel; ++i)
2287 nbnd = nz_loc * fields[0]->GetExp(i)->NumBndryCoeffs();
2289 for (j = 0; j < nvel; ++j)
2291 for (k = 0; k < nbnd; ++k)
2293 fh_bnd[offset + j * nbnd + k] = f_bnd[cnt + k];
2298 nint = pressure->GetExp(i)->GetNcoeffs();
2299 offset += nvel * nbnd + nint * nz_loc;
2303 for (i = 0; i < nel; ++i)
2305 nbnd = nz_loc * fields[0]->GetExp(i)->NumBndryCoeffs();
2306 nint = pressure->GetExp(i)->GetNcoeffs();
2308 for (n = 0; n < nz_loc; ++n)
2310 for (j = 0; j < nint; ++j)
2312 fh_bnd[offset + nvel * nbnd + n * nint + j] = f_p[cnt1 + j];
2316 offset += nvel * nbnd + nz_loc * nint;
2322 int totpcoeffs = pressure->GetNcoeffs();
2324 for (i = 0; i < nel; ++i)
2326 nbnd = nz_loc * fields[0]->GetExp(i)->NumBndryCoeffs();
2327 nint = pressure->GetExp(i)->GetNcoeffs();
2328 for (j = 0; j < nvel; ++j)
2330 for (k = 0; k < nbnd; ++k)
2332 f_bnd[cnt + k] = bnd[offset + j * nbnd + k];
2336 offset += nvel * nbnd + nint * nz_loc;
2339 pressure->SetPhysState(
false);
2341 offset = cnt = cnt1 = 0;
2342 for (i = 0; i < nel; ++i)
2344 nint = pressure->GetExp(i)->GetNcoeffs();
2345 nbnd = fields[0]->GetExp(i)->NumBndryCoeffs();
2346 cnt1 = pressure->GetCoeff_Offset(i);
2348 for (n = 0; n < nz_loc; ++n)
2350 for (j = 0; j < nint; ++j)
2352 p_coeffs[n * totpcoeffs + cnt1 + j] = f_p[cnt + j] =
2353 bnd[offset + nvel * nz_loc * nbnd + n * nint + j];
2357 offset += (nvel * nbnd + nint) * nz_loc;
2364 F_int = (*
m_mat[mode].m_Cinv) * F_int;
2368 for (i = 0; i < nel; ++i)
2370 fields[0]->GetExp(i)->GetBoundaryMap(bmap);
2371 fields[0]->GetExp(i)->GetInteriorMap(imap);
2374 offset = fields[0]->GetCoeff_Offset(i);
2376 for (j = 0; j < nvel; ++j)
2378 for (n = 0; n < nz_loc; ++n)
2380 for (k = 0; k < nbnd; ++k)
2382 fields[j]->SetCoeff(n * nplanecoeffs + offset + bmap[k],
2386 for (k = 0; k < nint; ++k)
2388 fields[j]->SetCoeff(n * nplanecoeffs + offset + imap[k],
2397 for (j = 0; j < nvel; ++j)
2399 fields[j]->SetPhysState(
false);