Reference documentation for deal.II version 9.5.1
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
step-67.h
Go to the documentation of this file.
1
898 *   stage_5_order_4, /* Kennedy, Carpenter, Lewis, 2000 */
899 *   stage_7_order_4, /* Tselios, Simos, 2007 */
900 *   stage_9_order_5, /* Kennedy, Carpenter, Lewis, 2000 */
901 *   };
903 *  
904 * @endcode
905 *
906 * Eventually, we select a detail of the spatial discretization, namely the
907 * numerical flux (Riemann solver) at the faces between cells. For this
910 *
911 * @code
913 *   {
916 *   };
918 *  
919 *  
920 *  
921 * @endcode
922 *
923 *
924 * <a name="Equationdata"></a>
925 * <h3>Equation data</h3>
926 *
927
928 *
929 * We now define a class with the exact solution for the test case 0 and one
930 * with a background flow field for test case 1 of the channel. Given that
931 * the Euler equations are a problem with @f$d+2@f$ equations in @f$d@f$ dimensions,
932 * we need to tell the Function base class about the correct number of
933 * components.
934 *
935 * @code
936 *   template <int dim>
937 *   class ExactSolution : public Function<dim>
938 *   {
939 *   public:
940 *   ExactSolution(const double time)
941 *   : Function<dim>(dim + 2, time)
942 *   {}
943 *  
944 *   virtual double value(const Point<dim> & p,
945 *   const unsigned int component = 0) const override;
946 *   };
947 *  
948 *  
949 *  
950 * @endcode
951 *
953 * test case is an isentropic vortex case (see e.g. the book by Hesthaven
954 * and Warburton, Example 6.1 in Section 6.6 on page 209) which fulfills the
955 * Euler equations with zero force term on the right hand side. Given that
956 * definition, we return either the density, the momentum, or the energy
962 * optimized. This formula might lose accuracy in the last digits
964 * it anyway, since small numbers map to data close to 1.
965 *
966
967 *
968 * For the channel test case, we simply select a density of 1, a velocity of
969 * 0.4 in @f$x@f$ direction and zero in the other directions, and an energy that
970 * corresponds to a speed of sound of 1.3 measured against the background
971 * velocity field, computed from the relation @f$E = \frac{c^2}{\gamma (\gamma
972 * -1)} + \frac 12 \rho \|u\|^2@f$.
973 *
974 * @code
975 *   template <int dim>
976 *   double ExactSolution<dim>::value(const Point<dim> & x,
977 *   const unsigned int component) const
978 *   {
979 *   const double t = this->get_time();
980 *  
981 *   switch (testcase)
982 *   {
983 *   case 0:
984 *   {
985 *   Assert(dim == 2, ExcNotImplemented());
986 *   const double beta = 5;
987 *  
988 *   Point<dim> x0;
989 *   x0[0] = 5.;
990 *   const double radius_sqr =
991 *   (x - x0).norm_square() - 2. * (x[0] - x0[0]) * t + t * t;
992 *   const double factor =
993 *   beta / (numbers::PI * 2) * std::exp(1. - radius_sqr);
994 *   const double density_log = std::log2(
995 *   std::abs(1. - (gamma - 1.) / gamma * 0.25 * factor * factor));
996 *   const double density = std::exp2(density_log * (1. / (gamma - 1.)));
997 *   const double u = 1. - factor * (x[1] - x0[1]);
998 *   const double v = factor * (x[0] - t - x0[0]);
999 *  
1000 *   if (component == 0)
1001 *   return density;
1002 *   else if (component == 1)
1003 *   return density * u;
1004 *   else if (component == 2)
1005 *   return density * v;
1006 *   else
1007 *   {
1008 *   const double pressure =
1009 *   std::exp2(density_log * (gamma / (gamma - 1.)));
1010 *   return pressure / (gamma - 1.) +
1011 *   0.5 * (density * u * u + density * v * v);
1012 *   }
1013 *   }
1014 *  
1015 *   case 1:
1016 *   {
1017 *   if (component == 0)
1018 *   return 1.;
1019 *   else if (component == 1)
1020 *   return 0.4;
1021 *   else if (component == dim + 1)
1022 *   return 3.097857142857143;
1023 *   else
1024 *   return 0.;
1025 *   }
1026 *  
1027 *   default:
1028 *   Assert(false, ExcNotImplemented());
1029 *   return 0.;
1030 *   }
1031 *   }
1032 *  
1033 *  
1034 *  
1035 * @endcode
1036 *
1037 *
1038 * <a name="LowstorageexplicitRungeKuttatimeintegrators"></a>
1039 * <h3>Low-storage explicit Runge--Kutta time integrators</h3>
1040 *
1041
1042 *
1043 * The next few lines implement a few low-storage variants of Runge--Kutta
1046 * method, we can deduce time steps, @f$c_i = \sum_{j=1}^{i-2} b_i + a_{i-1}@f$
1047 * from those coefficients. The main advantage of this kind of scheme is the
1049 * part of the solution @f$\mathbf{w}@f$ (that will hold the solution
1050 * @f$\mathbf{w}^{n+1}@f$ at the new time @f$t^{n+1}@f$ after the last stage), the
1051 * update vector @f$\mathbf{r}_i@f$ that gets evaluated during the stages, plus
1052 * one vector @f$\mathbf{k}_i@f$ to hold the evaluation of the operator. Such a
1053 * Runge--Kutta setup reduces the memory storage and memory access. As the
1054 * memory bandwidth is often the performance-limiting factor on modern
1056 * well-optimized, performance can be improved over standard time
1058 * conventional Runge--Kutta scheme might allow for slightly larger time
1059 * steps as more free parameters allow for better stability properties.
1060 *
1061
1062 *
1064 * low-storage schemes defined in the article by Kennedy, Carpenter, and
1065 * Lewis (2000), as well as one variant described by Tselios and Simos
1066 * (2007). There is a large series of other schemes available, which could
1068 * update formulas.
1069 *
1070
1071 *
1073 * enum described above. To each scheme, we then fill the vectors for the
1074 * @f$b_i@f$ and @f$a_i@f$ to the given variables in the class.
1075 *
1076 * @code
1078 *   {
1079 *   public:
1081 *   {
1083 * @endcode
1084 *
1085 * First comes the three-stage scheme of order three by Kennedy et al.
1088 * competitive in terms of the work per stage.
1089 *
1090 * @code
1091 *   switch (scheme)
1092 *   {
1093 *   case stage_3_order_3:
1094 *   {
1096 *   break;
1097 *   }
1098 *  
1099 * @endcode
1100 *
1101 * The next scheme is a five-stage scheme of order four, again
1102 * defined in the paper by Kennedy et al. (2000).
1103 *
1104 * @code
1105 *   case stage_5_order_4:
1106 *   {
1108 *   break;
1109 *   }
1110 *  
1111 * @endcode
1112 *
1113 * The following scheme of seven stages and order four has been
1114 * explicitly derived for acoustics problems. It is a balance of
1115 * accuracy for imaginary eigenvalues among fourth order schemes,
1118 * necessarily translate to the highest possible time step per
1121 * also the maximal stable time step size. For the modified
1122 * Lax--Friedrichs flux, this scheme is similar to the
1123 * `stage_5_order_4` scheme in terms of step size per stage if only
1125 * flux.
1126 *
1127 * @code
1128 *   case stage_7_order_4:
1129 *   {
1131 *   break;
1132 *   }
1133 *  
1134 * @endcode
1135 *
1136 * The last scheme included here is the nine-stage scheme of order
1138 * the schemes used here, but the higher order of accuracy
1140 * stage is less than for the fourth order schemes.
1141 *
1142 * @code
1143 *   case stage_9_order_5:
1144 *   {
1146 *   break;
1147 *   }
1148 *  
1149 *   default:
1150 *   AssertThrow(false, ExcNotImplemented());
1151 *   }
1155 *   rk_integrator.get_coefficients(ai, bi, ci);
1156 *   }
1157 *  
1158 *   unsigned int n_stages() const
1159 *   {
1160 *   return bi.size();
1161 *   }
1162 *  
1163 * @endcode
1164 *
1165 * The main function of the time integrator is to go through the stages,
1166 * evaluate the operator, prepare the @f$\mathbf{r}_i@f$ vector for the next
1167 * evaluation, and update the solution vector @f$\mathbf{w}@f$. We hand off
1168 * the work to the `pde_operator` involved in order to be able to merge
1170 * the differential operator for better performance, so all we do here is
1171 * to delegate the vectors and coefficients.
1172 *
1173
1174 *
1175 * We separately call the operator for the first stage because we need
1176 * slightly modified arguments there: We evaluate the solution from
1177 * the old solution @f$\mathbf{w}^n@f$ rather than a @f$\mathbf r_i@f$ vector, so
1178 * the first argument is `solution`. We here let the stage vector
1180 * is not used otherwise. For all subsequent stages, we use the vector
1181 * `vec_ki` as the second vector argument to store the result of the
1182 * operator evaluation. Finally, when we are at the last stage, we must
1183 * skip the computation of the vector @f$\mathbf{r}_{s+1}@f$ as there is no
1185 *
1186 * @code
1187 *   template <typename VectorType, typename Operator>
1189 *   const double current_time,
1190 *   const double time_step,
1191 *   VectorType & solution,
1192 *   VectorType & vec_ri,
1193 *   VectorType & vec_ki) const
1194 *   {
1195 *   AssertDimension(ai.size() + 1, bi.size());
1196 *  
1197 *   pde_operator.perform_stage(current_time,
1198 *   bi[0] * time_step,
1199 *   ai[0] * time_step,
1200 *   solution,
1201 *   vec_ri,
1202 *   solution,
1203 *   vec_ri);
1204 *  
1205 *   for (unsigned int stage = 1; stage < bi.size(); ++stage)
1206 *   {
1207 *   const double c_i = ci[stage];
1208 *   pde_operator.perform_stage(current_time + c_i * time_step,
1209 *   bi[stage] * time_step,
1210 *   (stage == bi.size() - 1 ?
1211 *   0 :
1212 *   ai[stage] * time_step),
1213 *   vec_ri,
1214 *   vec_ki,
1215 *   solution,
1216 *   vec_ri);
1217 *   }
1218 *   }
1219 *  
1220 *   private:
1221 *   std::vector<double> bi;
1222 *   std::vector<double> ai;
1223 *   std::vector<double> ci;
1224 *   };
1225 *  
1226 *  
1227 *  
1228 * @endcode
1229 *
1230 *
1231 * <a name="ImplementationofpointwiseoperationsoftheEulerequations"></a>
1232 * <h3>Implementation of point-wise operations of the Euler equations</h3>
1233 *
1234
1235 *
1237 * operators pertaining to the Euler equations. Each function acts on the
1238 * vector of conserved variables @f$[\rho, \rho\mathbf{u}, E]@f$ that we hold in
1239 * the solution vectors, and computes various derived quantities.
1240 *
1241
1242 *
1247 * compiler-specific keyword that tells the compiler to never create a
1248 * function call for any of those functions, and instead move the
1249 * implementation <a
1250 * href="https://en.wikipedia.org/wiki/Inline_function">inline</a> to where
1255 * every quadrature point of every cell. Making sure these functions are
1257 * instruction into the function (and the corresponding return jump), but also
1259 * context in code that comes after the place where the function was called.
1260 * (We note that compilers are generally quite good at figuring out which
1261 * functions to inline by themselves. Here is a place where compilers may or
1262 * may not have figured it out by themselves but where we know for sure that
1263 * inlining is a win.)
1264 *
1265
1266 *
1267 * Another trick we apply is a separate variable for the inverse density
1268 * @f$\frac{1}{\rho}@f$. This enables the compiler to only perform a single
1269 * division for the flux, despite the division being used at several
1270 * places. As divisions are around ten to twenty times as expensive as
1271 * multiplications or additions, avoiding redundant divisions is crucial for
1272 * performance. We note that taking the inverse first and later multiplying
1273 * with it is not equivalent to a division in floating point arithmetic due
1274 * to roundoff effects, so the compiler is not allowed to exchange one way by
1275 * the other with standard optimization flags. However, it is also not
1276 * particularly difficult to write the code in the right way.
1277 *
1278
1279 *
1280 * To summarize, the chosen strategy of always inlining and careful
1281 * definition of expensive arithmetic operations allows us to write compact
1282 * code without passing all intermediate results around, despite making sure
1283 * that the code maps to excellent machine code.
1284 *
1285 * @code
1286 *   template <int dim, typename Number>
1287 *   inline DEAL_II_ALWAYS_INLINE
1288 *   Tensor<1, dim, Number>
1289 *   euler_velocity(const Tensor<1, dim + 2, Number> &conserved_variables)
1290 *   {
1291 *   const Number inverse_density = Number(1.) / conserved_variables[0];
1292 *  
1293 *   Tensor<1, dim, Number> velocity;
1294 *   for (unsigned int d = 0; d < dim; ++d)
1295 *   velocity[d] = conserved_variables[1 + d] * inverse_density;
1296 *  
1297 *   return velocity;
1298 *   }
1299 *  
1300 * @endcode
1301 *
1302 * The next function computes the pressure from the vector of conserved
1303 * variables, using the formula @f$p = (\gamma - 1) \left(E - \frac 12 \rho
1304 * \mathbf{u}\cdot \mathbf{u}\right)@f$. As explained above, we use the
1305 * velocity from the `euler_velocity()` function. Note that we need to
1306 * specify the first template argument `dim` here because the compiler is
1307 * not able to deduce it from the arguments of the tensor, whereas the
1308 * second argument (number type) can be automatically deduced.
1309 *
1310 * @code
1311 *   template <int dim, typename Number>
1312 *   inline DEAL_II_ALWAYS_INLINE
1313 *   Number
1314 *   euler_pressure(const Tensor<1, dim + 2, Number> &conserved_variables)
1315 *   {
1316 *   const Tensor<1, dim, Number> velocity =
1317 *   euler_velocity<dim>(conserved_variables);
1318 *  
1319 *   Number rho_u_dot_u = conserved_variables[1] * velocity[0];
1320 *   for (unsigned int d = 1; d < dim; ++d)
1321 *   rho_u_dot_u += conserved_variables[1 + d] * velocity[d];
1322 *  
1323 *   return (gamma - 1.) * (conserved_variables[dim + 1] - 0.5 * rho_u_dot_u);
1324 *   }
1325 *  
1326 * @endcode
1327 *
1328 * Here is the definition of the Euler flux function, i.e., the definition
1329 * of the actual equation. Given the velocity and pressure (that the
1330 * compiler optimization will make sure are done only once), this is
1331 * straight-forward given the equation stated in the introduction.
1332 *
1333 * @code
1334 *   template <int dim, typename Number>
1335 *   inline DEAL_II_ALWAYS_INLINE
1336 *   Tensor<1, dim + 2, Tensor<1, dim, Number>>
1337 *   euler_flux(const Tensor<1, dim + 2, Number> &conserved_variables)
1338 *   {
1339 *   const Tensor<1, dim, Number> velocity =
1340 *   euler_velocity<dim>(conserved_variables);
1341 *   const Number pressure = euler_pressure<dim>(conserved_variables);
1342 *  
1343 *   Tensor<1, dim + 2, Tensor<1, dim, Number>> flux;
1344 *   for (unsigned int d = 0; d < dim; ++d)
1345 *   {
1346 *   flux[0][d] = conserved_variables[1 + d];
1347 *   for (unsigned int e = 0; e < dim; ++e)
1348 *   flux[e + 1][d] = conserved_variables[e + 1] * velocity[d];
1349 *   flux[d + 1][d] += pressure;
1350 *   flux[dim + 1][d] =
1351 *   velocity[d] * (conserved_variables[dim + 1] + pressure);
1352 *   }
1353 *  
1354 *   return flux;
1355 *   }
1356 *  
1357 * @endcode
1358 *
1359 * This next function is a helper to simplify the implementation of the
1360 * numerical flux, implementing the action of a tensor of tensors (with
1361 * non-standard outer dimension of size `dim + 2`, so the standard overloads
1362 * provided by deal.II's tensor classes do not apply here) with another
1363 * tensor of the same inner dimension, i.e., a matrix-vector product.
1364 *
1365 * @code
1367 *   inline DEAL_II_ALWAYS_INLINE
1369 *   operator*(const Tensor<1, n_components, Tensor<1, dim, Number>> &matrix,
1370 *   const Tensor<1, dim, Number> & vector)
1371 *   {
1373 *   for (unsigned int d = 0; d < n_components; ++d)
1374 *   result[d] = matrix[d] * vector;
1375 *   return result;
1376 *   }
1377 *  
1378 * @endcode
1379 *
1380 * This function implements the numerical flux (Riemann solver). It gets the
1381 * state from the two sides of an interface and the normal vector, oriented
1382 * from the side of the solution @f$\mathbf{w}^-@f$ towards the solution
1386 * numerical flux is less central due to the polynomials within the elements
1387 * and the physical flux used there. As a result of higher-degree
1389 * continuous solution, the numerical flux can be seen as a control of the
1390 * jump of the solution from both sides to weakly impose continuity. It is
1392 * high-order DG method in the presence of shocks, and thus any DG method
1394 * cases. In this tutorial, we focus on wave-like solutions of the Euler
1396 * basic scheme is sufficient.
1397 *
1398
1399 *
1402 * size with explicit Runge--Kutta methods. We consider two choices, a
1406 * Euler flux.
1407 *
1408
1409 *
1411 * =\frac{\mathbf{F}(\mathbf{w}^-)+\mathbf{F}(\mathbf{w}^+)}{2} +
1412 * \frac{\lambda}{2}\left[\mathbf{w}^--\mathbf{w}^+\right]\otimes
1413 * \mathbf{n^-}@f$, where the factor @f$\lambda =
1414 * \max\left(\|\mathbf{u}^-\|+c^-, \|\mathbf{u}^+\|+c^+\right)@f$ gives the
1415 * maximal wave speed and @f$c = \sqrt{\gamma p / \rho}@f$ is the speed of
1416 * sound. Here, we choose two modifications of that expression for reasons
1418 * solution. For the above definition of the factor @f$\lambda@f$, we would need
1419 * to take four square roots, two for the two velocity norms and two for the
1421 * use @f$\sqrt{\|\mathbf{u}\|^2+c^2}@f$ as an estimate of the maximal speed
1422 * (which is at most a factor of 2 away from the actual maximum, as shown in
1423 * the introduction). This allows us to pull the square root out of the
1425 * modification is to further relax on the parameter @f$\lambda@f$---the smaller
1427 * jump in @f$\mathbf{w}@f$, which might result in a smaller or bigger
1432 * formulation is not energy-stable in the limit of @f$\lambda\to 0@f$ as it is
1433 * not skew-symmetric, and would need additional measures such as split-form
1434 * DG schemes in that case.
1435 *
1436
1437 *
1441 * the Euler equations in terms of the current direction of velocity and
1444 * parameters.
1445 *
1446
1447 *
1448 * Since the numerical flux is multiplied by the normal vector in the weak
1449 * form, we multiply by the result by the normal vector for all terms in the
1452 *
1453
1454 *
1455 * In this and the following functions, we use variable suffixes `_m` and
1457 * i.e., values "here" and "there" relative to the current cell when looking
1458 * at a neighbor cell.
1459 *
1460 * @code
1461 *   template <int dim, typename Number>
1462 *   inline DEAL_II_ALWAYS_INLINE
1466 *   const Tensor<1, dim, Number> & normal)
1467 *   {
1468 *   const auto velocity_m = euler_velocity<dim>(u_m);
1469 *   const auto velocity_p = euler_velocity<dim>(u_p);
1470 *  
1471 *   const auto pressure_m = euler_pressure<dim>(u_m);
1472 *   const auto pressure_p = euler_pressure<dim>(u_p);
1473 *  
1474 *   const auto flux_m = euler_flux<dim>(u_m);
1475 *   const auto flux_p = euler_flux<dim>(u_p);
1476 *  
1477 *   switch (numerical_flux_type)
1478 *   {
1480 *   {
1481 *   const auto lambda =
1482 *   0.5 * std::sqrt(std::max(velocity_p.norm_square() +
1483 *   gamma * pressure_p * (1. / u_p[0]),
1484 *   velocity_m.norm_square() +
1485 *   gamma * pressure_m * (1. / u_m[0])));
1486 *  
1487 *   return 0.5 * (flux_m * normal + flux_p * normal) +
1488 *   0.5 * lambda * (u_m - u_p);
1489 *   }
1490 *  
1491 *   case harten_lax_vanleer:
1492 *   {
1493 *   const auto avg_velocity_normal =
1494 *   0.5 * ((velocity_m + velocity_p) * normal);
1495 *   const auto avg_c = std::sqrt(std::abs(
1496 *   0.5 * gamma *
1497 *   (pressure_p * (1. / u_p[0]) + pressure_m * (1. / u_m[0]))));
1498 *   const Number s_pos =
1499 *   std::max(Number(), avg_velocity_normal + avg_c);
1500 *   const Number s_neg =
1501 *   std::min(Number(), avg_velocity_normal - avg_c);
1502 *   const Number inverse_s = Number(1.) / (s_pos - s_neg);
1503 *  
1504 *   return inverse_s *
1505 *   ((s_pos * (flux_m * normal) - s_neg * (flux_p * normal)) -
1506 *   s_pos * s_neg * (u_m - u_p));
1507 *   }
1508 *  
1509 *   default:
1510 *   {
1511 *   Assert(false, ExcNotImplemented());
1512 *   return {};
1513 *   }
1514 *   }
1515 *   }
1516 *  
1517 *  
1518 *  
1519 * @endcode
1520 *
1521 * This and the next function are helper functions to provide compact
1523 * VectorizedArray argument (see the @ref step_37 "step-37" tutorial for details). This
1524 * function is used for the subsonic outflow boundary conditions where we
1525 * need to set the energy component to a prescribed value. The next one
1526 * requests the solution on all components and is used for inflow boundaries
1527 * where all components of the solution are set.
1528 *
1529 * @code
1530 *   template <int dim, typename Number>
1532 *   evaluate_function(const Function<dim> & function,
1534 *   const unsigned int component)
1535 *   {
1537 *   for (unsigned int v = 0; v < VectorizedArray<Number>::size(); ++v)
1538 *   {
1539 *   Point<dim> p;
1540 *   for (unsigned int d = 0; d < dim; ++d)
1541 *   p[d] = p_vectorized[d][v];
1542 *   result[v] = function.value(p, component);
1543 *   }
1544 *   return result;
1545 *   }
1546 *  
1547 *  
1548 *   template <int dim, typename Number, int n_components = dim + 2>
1550 *   evaluate_function(const Function<dim> & function,
1552 *   {
1553 *   AssertDimension(function.n_components, n_components);
1555 *   for (unsigned int v = 0; v < VectorizedArray<Number>::size(); ++v)
1556 *   {
1557 *   Point<dim> p;
1558 *   for (unsigned int d = 0; d < dim; ++d)
1559 *   p[d] = p_vectorized[d][v];
1560 *   for (unsigned int d = 0; d < n_components; ++d)
1561 *   result[d][v] = function.value(p, d);
1562 *   }
1563 *   return result;
1564 *   }
1565 *  
1566 *  
1567 *  
1568 * @endcode
1569 *
1570 *
1571 * <a name="TheEulerOperationclass"></a>
1572 * <h3>The EulerOperation class</h3>
1573 *
1574
1575 *
1577 * the `LaplaceOperator` class of @ref step_37 "step-37" or @ref step_59 "step-59". Since the present
1578 * operator is non-linear and does not require a matrix interface (to be
1581 * function as well as the combination of `apply` with the required vector
1582 * updates for the low-storage Runge--Kutta time integrator mentioned above
1584 * functions involving matrix-free routines, namely one to compute an
1585 * estimate of the time step scaling (that is combined with the Courant
1586 * number for the actual time step size) based on the velocity and speed of
1587 * sound in the elements, one for the projection of solutions (specializing
1588 * VectorTools::project() for the DG case), and one to compute the errors
1589 * against a possible analytical solution or norms against some background
1590 * state.
1591 *
1592
1593 *
1599 *
1600 * @code
1601 *   template <int dim, int degree, int n_points_1d>
1602 *   class EulerOperator
1603 *   {
1604 *   public:
1605 *   static constexpr unsigned int n_quadrature_points_1d = n_points_1d;
1606 *  
1608 *  
1609 *   void reinit(const Mapping<dim> & mapping,
1610 *   const DoFHandler<dim> &dof_handler);
1611 *  
1612 *   void set_inflow_boundary(const types::boundary_id boundary_id,
1613 *   std::unique_ptr<Function<dim>> inflow_function);
1614 *  
1616 *   const types::boundary_id boundary_id,
1617 *   std::unique_ptr<Function<dim>> outflow_energy);
1618 *  
1619 *   void set_wall_boundary(const types::boundary_id boundary_id);
1620 *  
1621 *   void set_body_force(std::unique_ptr<Function<dim>> body_force);
1622 *  
1623 *   void apply(const double current_time,
1626 *  
1627 *   void
1628 *   perform_stage(const Number cur_time,
1629 *   const Number factor_solution,
1630 *   const Number factor_ai,
1635 *  
1636 *   void project(const Function<dim> & function,
1638 *  
1639 *   std::array<double, 3> compute_errors(
1640 *   const Function<dim> & function,
1641 *   const LinearAlgebra::distributed::Vector<Number> &solution) const;
1642 *  
1644 *   const LinearAlgebra::distributed::Vector<Number> &solution) const;
1645 *  
1646 *   void
1648 *  
1649 *   private:
1651 *  
1652 *   TimerOutput &timer;
1653 *  
1654 *   std::map<types::boundary_id, std::unique_ptr<Function<dim>>>
1656 *   std::map<types::boundary_id, std::unique_ptr<Function<dim>>>
1658 *   std::set<types::boundary_id> wall_boundaries;
1659 *   std::unique_ptr<Function<dim>> body_force;
1660 *  
1665 *   const std::pair<unsigned int, unsigned int> & cell_range) const;
1666 *  
1667 *   void local_apply_cell(
1671 *   const std::pair<unsigned int, unsigned int> & cell_range) const;
1672 *  
1673 *   void local_apply_face(
1677 *   const std::pair<unsigned int, unsigned int> & face_range) const;
1678 *  
1683 *   const std::pair<unsigned int, unsigned int> & face_range) const;
1684 *   };
1685 *  
1686 *  
1687 *  
1688 *   template <int dim, int degree, int n_points_1d>
1690 *   : timer(timer)
1691 *   {}
1692 *  
1693 *  
1694 *  
1695 * @endcode
1696 *
1697 * For the initialization of the Euler operator, we set up the MatrixFree
1698 * variable contained in the class. This can be done given a mapping to
1700 * describing the degrees of freedom. Since we use a discontinuous Galerkin
1701 * discretization in this tutorial program where no constraints are imposed
1702 * strongly on the solution field, we do not need to pass in an
1703 * AffineConstraints object and rather use a dummy for the
1704 * construction. With respect to quadrature, we want to select two different
1706 * based on a template parameter `n_points_1d` (that will be assigned the
1707 * `n_q_points_1d` value specified at the top of this file). More accurate
1709 * variable coefficients in the Euler operator. The second less accurate
1710 * quadrature formula is a tight one based on `fe_degree+1` and needed for
1711 * the inverse mass matrix. While that formula provides an exact inverse
1712 * only on affine element shapes and not on deformed elements, it enables
1713 * the fast inversion of the mass matrix by tensor product techniques,
1715 *
1716 * @code
1719 *   const Mapping<dim> & mapping,
1720 *   const DoFHandler<dim> &dof_handler)
1721 *   {
1722 *   const std::vector<const DoFHandler<dim> *> dof_handlers = {&dof_handler};
1724 *   const std::vector<const AffineConstraints<double> *> constraints = {&dummy};
1725 *   const std::vector<Quadrature<1>> quadratures = {QGauss<1>(n_q_points_1d),
1726 *   QGauss<1>(fe_degree + 1)};
1727 *  
1728 *   typename MatrixFree<dim, Number>::AdditionalData additional_data;
1729 *   additional_data.mapping_update_flags =
1731 *   update_values);
1732 *   additional_data.mapping_update_flags_inner_faces =
1734 *   update_values);
1735 *   additional_data.mapping_update_flags_boundary_faces =
1737 *   update_values);
1738 *   additional_data.tasks_parallel_scheme =
1740 *  
1741 *   data.reinit(
1742 *   mapping, dof_handlers, constraints, quadratures, additional_data);
1743 *   }
1744 *  
1745 *  
1746 *  
1747 *   template <int dim, int degree, int n_points_1d>
1750 *   {
1751 *   data.initialize_dof_vector(vector);
1752 *   }
1753 *  
1754 *  
1755 *  
1756 * @endcode
1757 *
1760 * we must specify all components in terms of density @f$\rho@f$, momentum @f$\rho
1761 * \mathbf{u}@f$ and energy @f$E@f$. Given this information, we then store the
1762 * function alongside the respective boundary id in a map member variable of
1764 * we request a function as well, which we use to retrieve the energy) and for
1766 * function necessary, so we only request the boundary id). For the present
1768 * form (during time integration), the call to set the boundary conditions
1769 * can appear both before or after the `reinit()` call to this class. This
1770 * is different from continuous finite element codes where the boundary
1774 *
1775
1776 *
1777 * The checks added in each of the four function are used to
1779 * parts of the boundary, i.e., that a user does not accidentally designate a
1780 * boundary as both an inflow and say a subsonic outflow boundary.
1781 *
1782 * @code
1783 *   template <int dim, int degree, int n_points_1d>
1785 *   const types::boundary_id boundary_id,
1786 *   std::unique_ptr<Function<dim>> inflow_function)
1787 *   {
1788 *   AssertThrow(subsonic_outflow_boundaries.find(boundary_id) ==
1790 *   wall_boundaries.find(boundary_id) == wall_boundaries.end(),
1791 *   ExcMessage("You already set the boundary with id " +
1792 *   std::to_string(static_cast<int>(boundary_id)) +
1793 *   " to another type of boundary before now setting " +
1794 *   "it as inflow"));
1795 *   AssertThrow(inflow_function->n_components == dim + 2,
1796 *   ExcMessage("Expected function with dim+2 components"));
1797 *  
1799 *   }
1800 *  
1801 *  
1802 *   template <int dim, int degree, int n_points_1d>
1804 *   const types::boundary_id boundary_id,
1805 *   std::unique_ptr<Function<dim>> outflow_function)
1806 *   {
1807 *   AssertThrow(inflow_boundaries.find(boundary_id) ==
1808 *   inflow_boundaries.end() &&
1809 *   wall_boundaries.find(boundary_id) == wall_boundaries.end(),
1810 *   ExcMessage("You already set the boundary with id " +
1811 *   std::to_string(static_cast<int>(boundary_id)) +
1812 *   " to another type of boundary before now setting " +
1813 *   "it as subsonic outflow"));
1814 *   AssertThrow(outflow_function->n_components == dim + 2,
1815 *   ExcMessage("Expected function with dim+2 components"));
1816 *  
1818 *   }
1819 *  
1820 *  
1821 *   template <int dim, int degree, int n_points_1d>
1823 *   const types::boundary_id boundary_id)
1824 *   {
1825 *   AssertThrow(inflow_boundaries.find(boundary_id) ==
1826 *   inflow_boundaries.end() &&
1827 *   subsonic_outflow_boundaries.find(boundary_id) ==
1829 *   ExcMessage("You already set the boundary with id " +
1830 *   std::to_string(static_cast<int>(boundary_id)) +
1831 *   " to another type of boundary before now setting " +
1832 *   "it as wall boundary"));
1833 *  
1834 *   wall_boundaries.insert(boundary_id);
1835 *   }
1836 *  
1837 *  
1838 *   template <int dim, int degree, int n_points_1d>
1840 *   std::unique_ptr<Function<dim>> body_force)
1841 *   {
1842 *   AssertDimension(body_force->n_components, dim);
1843 *  
1844 *   this->body_force = std::move(body_force);
1845 *   }
1846 *  
1847 *  
1848 *  
1849 * @endcode
1850 *
1851 *
1852 * <a name="Localevaluators"></a>
1853 * <h4>Local evaluators</h4>
1854 *
1855
1856 *
1857 * Now we proceed to the local evaluators for the Euler problem. The
1859 * @ref step_37 "step-37", @ref step_48 "step-48", or @ref step_59 "step-59". The first notable difference is the fact
1860 * that we use an FEEvaluation with a non-standard number of quadrature
1861 * points. Whereas we previously always set the number of quadrature points
1862 * to equal the polynomial degree plus one (ensuring exact integration on
1863 * affine element shapes), we now set the number quadrature points as a
1864 * separate variable (e.g. the polynomial degree plus two or three halves of
1865 * the polynomial degree) to more accurately handle nonlinear terms. Since
1867 * argument and keeps the number of quadrature points in the whole cell in
1870 *
1871
1872 *
1873 * The second difference is due to the fact that we are now evaluating a
1876 * multi-component case. The variant shown here utilizes an FEEvaluation
1877 * object with multiple components embedded into it, specified by the fourth
1878 * template argument `dim + 2` for the components in the Euler system. As a
1879 * consequence, the return type of FEEvaluation::get_value() is not a scalar
1881 * several elements), but a Tensor of `dim+2` components. The functionality
1884 * variant would have been to use several FEEvaluation objects, a scalar one
1885 * for the density, a vector-valued one with `dim` components for the
1886 * momentum, and another scalar evaluator for the energy. To ensure that
1887 * those components point to the correct part of the solution, the
1888 * constructor of FEEvaluation takes three optional integer arguments after
1891 * quadrature point in case there are multiple Quadrature objects (see more
1892 * below), and as a third argument the component within a vector system. As
1893 * we have a single vector for all components, we would go with the third
1894 * argument, and set it to `0` for the density, `1` for the vector-valued
1895 * momentum, and `dim+1` for the energy slot. FEEvaluation then picks the
1896 * appropriate subrange of the solution vector during
1897 * FEEvaluationBase::read_dof_values() and
1899 * FEEvaluation::gather_evaluate() and FEEvaluation::integrate_scatter()
1900 * calls.
1901 *
1902
1903 *
1906 * function (derived from Functions::ConstantFunction), we can precompute
1907 * the value outside the loop over quadrature points and simply use the
1908 * value everywhere. For a more general function, we instead need to call
1910 * expensive because we need to access the memory associated with the
1911 * quadrature point data.
1912 *
1913
1914 *
1916 * all physics for the Euler equations in the separate `euler_flux()`
1917 * function, all we have to do here is to call this function
1918 * given the current solution evaluated at quadrature points, returned by
1919 * `phi.get_value(q)`, and tell the FEEvaluation object to queue the flux
1920 * for testing it by the gradients of the shape functions (which is a Tensor
1921 * of outer `dim+2` components, each holding a tensor of `dim` components
1922 * for the @f$x,y,z@f$ component of the Euler flux). One final thing worth
1923 * mentioning is the order in which we queue the data for testing by the
1924 * value of the test function, `phi.submit_value()`, in case we are given an
1925 * external function: We must do this after calling `phi.get_value(q)`,
1926 * because `get_value()` (reading the solution) and `submit_value()`
1927 * (queuing the value for multiplication by the test function and summation
1928 * over quadrature points) access the same underlying data field. Here it
1930 * there is no mixing between values and gradients. For more complicated
1931 * setups, one has to first copy out e.g. both the value and gradient at a
1932 * quadrature point and then queue results again by
1933 * FEEvaluationBase::submit_value() and FEEvaluationBase::submit_gradient().
1934 *
1935
1936 *
1938 * argument of this function, which is a call-back from MatrixFree::loop().
1939 * The interfaces imposes the present list of arguments, but since we are in
1940 * a member function where the MatrixFree object is already available as the
1941 * `data` variable, we stick with that to avoid confusion.
1942 *
1943 * @code
1944 *   template <int dim, int degree, int n_points_1d>
1945 *   void EulerOperator<dim, degree, n_points_1d>::local_apply_cell(
1946 *   const MatrixFree<dim, Number> &,
1947 *   LinearAlgebra::distributed::Vector<Number> & dst,
1948 *   const LinearAlgebra::distributed::Vector<Number> &src,
1949 *   const std::pair<unsigned int, unsigned int> & cell_range) const
1950 *   {
1952 *  
1955 *   dynamic_cast<Functions::ConstantFunction<dim> *>(body_force.get());
1956 *  
1957 *   if (constant_function)
1960 *  
1961 *   for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell)
1962 *   {
1963 *   phi.reinit(cell);
1964 *   phi.gather_evaluate(src, EvaluationFlags::values);
1965 *  
1966 *   for (unsigned int q = 0; q < phi.n_q_points; ++q)
1967 *   {
1968 *   const auto w_q = phi.get_value(q);
1969 *   phi.submit_gradient(euler_flux<dim>(w_q), q);
1970 *   if (body_force.get() != nullptr)
1971 *   {
1975 *   *body_force, phi.quadrature_point(q));
1976 *  
1978 *   for (unsigned int d = 0; d < dim; ++d)
1979 *   forcing[d + 1] = w_q[0] * force[d];
1980 *   for (unsigned int d = 0; d < dim; ++d)
1981 *   forcing[dim + 1] += force[d] * w_q[d + 1];
1982 *  
1983 *   phi.submit_value(forcing, q);
1984 *   }
1985 *   }
1986 *  
1987 *   phi.integrate_scatter(((body_force.get() != nullptr) ?
1991 *   dst);
1992 *   }
1993 *   }
1994 *  
1995 *  
1996 *  
1997 * @endcode
1998 *
1999 * The next function concerns the computation of integrals on interior
2000 * faces, where we need evaluators from both cells adjacent to the face. We
2001 * associate the variable `phi_m` with the solution component @f$\mathbf{w}^-@f$
2002 * and the variable `phi_p` with the solution component @f$\mathbf{w}^+@f$. We
2004 * second argument, with `true` for the interior side and `false` for the
2005 * exterior side, with interior and exterior denoting the orientation with
2006 * respect to the normal vector.
2007 *
2008
2009 *
2011 * FEFaceEvaluation::integrate_scatter() combine the access to the vectors
2012 * and the sum factorization parts. This combined operation not only saves a
2014 * use a nodal basis in terms of the Lagrange polynomials in the points of
2015 * the Gauss-Lobatto quadrature formula, only @f$(p+1)^{d-1}@f$ out of the
2016 * @f$(p+1)^d@f$ basis functions evaluate to non-zero on each face. Thus, the
2018 * parts which are multiplied by zero. If we had first read the vector, we
2019 * would have needed to load all data from the vector, as the call in
2022 * values and derivatives, indeed all @f$(p+1)^d@f$ vector entries for each
2023 * component are needed, as the normal derivative is nonzero for all basis
2024 * functions.
2025 *
2026
2027 *
2031 * argument in the list. At the quadrature points, we then go to our
2032 * free-standing function for the numerical flux. It receives the solution
2033 * evaluated at quadrature points from both sides (i.e., @f$\mathbf{w}^-@f$ and
2034 * @f$\mathbf{w}^+@f$), as well as the normal vector onto the minus side. As
2036 * vector from the minus side. We need to switch the sign because the
2037 * boundary term comes with a minus sign in the weak form derived in the
2038 * introduction. The flux is then queued for testing both on the minus sign
2039 * and on the plus sign, with switched sign as the normal vector from the
2040 * plus side is exactly opposed to the one from the minus side.
2041 *
2042 * @code
2045 *   const MatrixFree<dim, Number> &,
2048 *   const std::pair<unsigned int, unsigned int> & face_range) const
2049 *   {
2051 *   true);
2053 *   false);
2054 *  
2055 *   for (unsigned int face = face_range.first; face < face_range.second; ++face)
2056 *   {
2057 *   phi_p.reinit(face);
2058 *   phi_p.gather_evaluate(src, EvaluationFlags::values);
2059 *  
2060 *   phi_m.reinit(face);
2061 *   phi_m.gather_evaluate(src, EvaluationFlags::values);
2062 *  
2063 *   for (unsigned int q = 0; q < phi_m.n_q_points; ++q)
2064 *   {
2065 *   const auto numerical_flux =
2066 *   euler_numerical_flux<dim>(phi_m.get_value(q),
2067 *   phi_p.get_value(q),
2068 *   phi_m.normal_vector(q));
2069 *   phi_m.submit_value(-numerical_flux, q);
2070 *   phi_p.submit_value(numerical_flux, q);
2071 *   }
2072 *  
2073 *   phi_p.integrate_scatter(EvaluationFlags::values, dst);
2074 *   phi_m.integrate_scatter(EvaluationFlags::values, dst);
2075 *   }
2076 *   }
2077 *  
2078 *  
2079 *  
2080 * @endcode
2081 *
2082 * For faces located at the boundary, we need to impose the appropriate
2085 * discussed in the "Results" section below.) The discontinuous Galerkin
2086 * method imposes boundary conditions not as constraints, but only
2088 * <i>exterior</i> quantity @f$\mathbf{w}^+@f$ that is then handed to the
2089 * numerical flux function also used for the interior faces. In essence,
2090 * we "pretend" a state on the outside of the domain in such a way that
2091 * if that were reality, the solution of the PDE would satisfy the boundary
2092 * conditions we want.
2093 *
2094
2095 *
2098 * energy with @f$\rho^+ = \rho^-@f$ and @f$E^+ = E^-@f$. To achieve the no-normal
2099 * flux condition, we set the exterior values to the interior values and
2100 * subtract two times the velocity in wall-normal direction, i.e., in the
2101 * direction of the normal vector.
2102 *
2103
2104 *
2107 * to use @f$\mathbf{w}^+ = -\mathbf{w}^- + 2 \mathbf{w}_\mathrm{D}@f$, the
2109 *
2110
2111 *
2113 * setting @f$\mathbf{w}^+ = \mathbf{w}^-@f$. For the case of subsonic outflow,
2114 * we still need to impose a value for the energy, which we derive from the
2115 * respective function. A special step is needed for the case of
2116 * <i>backflow</i>, i.e., the case where there is a momentum flux into the
2118 * be derived by appropriate energy arguments), we must switch to another
2120 * Yoshihara, Ismail, Wall, "A novel formulation for Neumann inflow
2121 * conditions in biomechanics", Int. J. Numer. Meth. Biomed. Eng., vol. 28
2124 * variables. We do this in a post-processing step, and only for the case
2125 * when we both are at an outflow boundary and the dot product between the
2126 * normal vector and the momentum (or, equivalently, velocity) is
2127 * negative. As we work on data of several quadrature points at once for
2129 * entries of the SIMD array.
2130 *
2131
2132 *
2134 * of boundaries at the level of quadrature points. Of course, we could also
2135 * have moved the decision out of the quadrature point loop and treat entire
2136 * faces as of the same kind, which avoids some map/set lookups in the inner
2137 * loop over quadrature points. However, the loss of efficiency is hardly
2139 * `else` clause will catch the case when some part of the boundary was not
2140 * assigned any boundary condition via `EulerOperator::set_..._boundary(...)`.
2141 *
2142 * @code
2143 *   template <int dim, int degree, int n_points_1d>
2145 *   const MatrixFree<dim, Number> &,
2148 *   const std::pair<unsigned int, unsigned int> & face_range) const
2149 *   {
2151 *  
2152 *   for (unsigned int face = face_range.first; face < face_range.second; ++face)
2153 *   {
2154 *   phi.reinit(face);
2155 *   phi.gather_evaluate(src, EvaluationFlags::values);
2156 *  
2157 *   for (unsigned int q = 0; q < phi.n_q_points; ++q)
2158 *   {
2159 *   const auto w_m = phi.get_value(q);
2160 *   const auto normal = phi.normal_vector(q);
2161 *  
2162 *   auto rho_u_dot_n = w_m[1] * normal[0];
2163 *   for (unsigned int d = 1; d < dim; ++d)
2164 *   rho_u_dot_n += w_m[1 + d] * normal[d];
2165 *  
2166 *   bool at_outflow = false;
2167 *  
2169 *   const auto boundary_id = data.get_boundary_id(face);
2170 *   if (wall_boundaries.find(boundary_id) != wall_boundaries.end())
2171 *   {
2172 *   w_p[0] = w_m[0];
2173 *   for (unsigned int d = 0; d < dim; ++d)
2174 *   w_p[d + 1] = w_m[d + 1] - 2. * rho_u_dot_n * normal[d];
2175 *   w_p[dim + 1] = w_m[dim + 1];
2176 *   }
2177 *   else if (inflow_boundaries.find(boundary_id) !=
2179 *   w_p =
2180 *   evaluate_function(*inflow_boundaries.find(boundary_id)->second,
2181 *   phi.quadrature_point(q));
2182 *   else if (subsonic_outflow_boundaries.find(boundary_id) !=
2184 *   {
2185 *   w_p = w_m;
2186 *   w_p[dim + 1] = evaluate_function(
2187 *   *subsonic_outflow_boundaries.find(boundary_id)->second,
2188 *   phi.quadrature_point(q),
2189 *   dim + 1);
2190 *   at_outflow = true;
2191 *   }
2192 *   else
2193 *   AssertThrow(false,
2194 *   ExcMessage("Unknown boundary id, did "
2195 *   "you set a boundary condition for "
2196 *   "this part of the domain boundary?"));
2197 *  
2198 *   auto flux = euler_numerical_flux<dim>(w_m, w_p, normal);
2199 *  
2200 *   if (at_outflow)
2201 *   for (unsigned int v = 0; v < VectorizedArray<Number>::size(); ++v)
2202 *   {
2203 *   if (rho_u_dot_n[v] < -1e-12)
2204 *   for (unsigned int d = 0; d < dim; ++d)
2205 *   flux[d + 1][v] = 0.;
2206 *   }
2207 *  
2208 *   phi.submit_value(-flux, q);
2209 *   }
2210 *  
2211 *   phi.integrate_scatter(EvaluationFlags::values, dst);
2212 *   }
2213 *   }
2214 *  
2215 *  
2216 *  
2217 * @endcode
2218 *
2219 * The next function implements the inverse mass matrix operation. The
2225 * factors. These represent a change of basis from the specified basis (in
2226 * this case, the Lagrange basis in the points of the Gauss--Lobatto
2227 * quadrature formula) to the Lagrange basis in the points of the Gauss
2228 * quadrature formula. In the latter basis, we can apply the inverse of the
2229 * point-wise `JxW` factor, i.e., the quadrature weight times the
2230 * determinant of the Jacobian of the mapping from reference to real
2231 * coordinates. Once this is done, the basis is changed back to the nodal
2232 * Gauss-Lobatto basis again. All of these operations are done by the
2233 * `apply()` function below. What we need to provide is the local fields to
2234 * operate on (which we extract from the global vector by an FEEvaluation
2235 * object) and write the results back to the destination vector of the mass
2236 * matrix operation.
2237 *
2238
2239 *
2240 * One thing to note is that we added two integer arguments (that are
2243 * only have one) and the second being 1 to make the quadrature formula
2244 * selection. As we use the quadrature formula 0 for the over-integration of
2245 * nonlinear terms, we use the formula 1 with the default @f$p+1@f$ (or
2246 * `fe_degree+1` in terms of the variable name) points for the mass
2249 *
2250 * @code
2251 *   template <int dim, int degree, int n_points_1d>
2253 *   const MatrixFree<dim, Number> &,
2256 *   const std::pair<unsigned int, unsigned int> & cell_range) const
2257 *   {
2260 *   inverse(phi);
2261 *  
2262 *   for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell)
2263 *   {
2264 *   phi.reinit(cell);
2265 *   phi.read_dof_values(src);
2266 *  
2267 *   inverse.apply(phi.begin_dof_values(), phi.begin_dof_values());
2268 *  
2269 *   phi.set_dof_values(dst);
2270 *   }
2271 *   }
2272 *  
2273 *  
2274 *  
2275 * @endcode
2276 *
2277 *
2278 * <a name="Theapplyandrelatedfunctions"></a>
2279 * <h4>The apply() and related functions</h4>
2280 *
2281
2282 *
2283 * We now come to the function which implements the evaluation of the Euler
2284 * operator as a whole, i.e., @f$\mathcal M^{-1} \mathcal L(t, \mathbf{w})@f$,
2287 * the time in the functions we have associated with the various parts of
2288 * the boundary, in order to be consistent with the equation in case the
2289 * boundary data is time-dependent. Then, we call MatrixFree::loop() to
2290 * perform the cell and face integrals, including the necessary ghost data
2291 * exchange in the `src` vector. The seventh argument to the function,
2292 * `true`, specifies that we want to zero the `dst` vector as part of the
2293 * loop, before we start accumulating integrals into it. This variant is
2295 * zeroing operation is done on a subrange of the vector in parts that are
2297 * for caching, saving one roundtrip of vector data to main memory and
2300 * one faces, typical of first-order hyperbolic problems, and since we have
2301 * a nodal basis with nodes at the reference element surface, we only need
2302 * to exchange those parts. This again saves precious memory bandwidth.
2303 *
2304
2305 *
2306 * Once the spatial operator @f$\mathcal L@f$ is applied, we need to make a
2309 * is cheaper than the full loop as access only goes to the degrees of
2311 * locally owned degrees of freedom for DG discretizations. Thus, no ghost
2313 *
2314
2315 *
2316 * Around all these functions, we put timer scopes to record the
2318 * parts.
2319 *
2320 * @code
2321 *   template <int dim, int degree, int n_points_1d>
2322 *   void EulerOperator<dim, degree, n_points_1d>::apply(
2323 *   const double current_time,
2324 *   const LinearAlgebra::distributed::Vector<Number> &src,
2325 *   LinearAlgebra::distributed::Vector<Number> & dst) const
2326 *   {
2327 *   {
2328 *   TimerOutput::Scope t(timer, "apply - integrals");
2329 *  
2330 *   for (auto &i : inflow_boundaries)
2331 *   i.second->set_time(current_time);
2332 *   for (auto &i : subsonic_outflow_boundaries)
2333 *   i.second->set_time(current_time);
2334 *  
2335 *   data.loop(&EulerOperator::local_apply_cell,
2338 *   this,
2339 *   dst,
2340 *   src,
2341 *   true,
2344 *   }
2345 *  
2346 *   {
2347 *   TimerOutput::Scope t(timer, "apply - inverse mass");
2348 *  
2350 *   this,
2351 *   dst,
2352 *   dst);
2353 *   }
2354 *   }
2355 *  
2356 *  
2357 *  
2358 * @endcode
2359 *
2360 * Let us move to the function that does an entire stage of a Runge--Kutta
2361 * update. It calls EulerOperator::apply() followed by some updates
2362 * to the vectors, namely `next_ri = solution + factor_ai * k_i` and
2363 * `solution += factor_solution * k_i`. Rather than performing these
2365 * strategy that is faster on cache-based architectures. As the memory
2366 * consumed by the vectors is often much larger than what fits into caches,
2368 * can be improved by loop fusion, i.e., performing both the updates to
2369 * `next_ki` and `solution` within a single sweep. In that case, we would
2370 * read the two vectors `rhs` and `solution` and write into `next_ki` and
2371 * `solution`, compared to at least 4 reads and two writes in the baseline
2372 * case. Here, we go one step further and perform the loop immediately when
2373 * the mass matrix inversion has finished on a part of the
2374 * vector. MatrixFree::cell_loop() provides a mechanism to attach an
2375 * `std::function` both before the loop over cells first touches a vector
2376 * entry (which we do not use here, but is e.g. used for zeroing the vector)
2377 * and a second `std::function` to be called after the loop last touches
2378 * an entry. The callback is in form of a range over the given vector (in
2379 * terms of the local index numbering in the MPI universe) that can be
2380 * addressed by `local_element()` functions.
2381 *
2382
2383 *
2384 * For this second callback, we create a lambda that works on a range and
2388 * ensure that there is no overlap, also called aliasing, between the index
2389 * ranges of the pointers we use inside the loops). It turns out that at the
2390 * time of this writing, GCC 7.2 fails to compile an OpenMP pragma inside a
2391 * lambda function, so we comment this pragma out below. If your compiler is
2393 *
2394
2395 *
2396 * Note that we select a different code path for the last
2398 * vector. This strategy gives a considerable speedup. Whereas the inverse
2399 * mass matrix and vector updates take more than 60% of the computational
2400 * time with default vector updates on a 40-core machine, the percentage is
2401 * around 35% with the more optimized variant. In other words, this is a
2402 * speedup of around a third.
2403 *
2404 * @code
2405 *   template <int dim, int degree, int n_points_1d>
2406 *   void EulerOperator<dim, degree, n_points_1d>::perform_stage(
2407 *   const Number current_time,
2408 *   const Number factor_solution,
2409 *   const Number factor_ai,
2410 *   const LinearAlgebra::distributed::Vector<Number> &current_ri,
2411 *   LinearAlgebra::distributed::Vector<Number> & vec_ki,
2412 *   LinearAlgebra::distributed::Vector<Number> & solution,
2413 *   LinearAlgebra::distributed::Vector<Number> & next_ri) const
2414 *   {
2415 *   {
2416 *   TimerOutput::Scope t(timer, "rk_stage - integrals L_h");
2417 *  
2418 *   for (auto &i : inflow_boundaries)
2419 *   i.second->set_time(current_time);
2420 *   for (auto &i : subsonic_outflow_boundaries)
2421 *   i.second->set_time(current_time);
2422 *  
2423 *   data.loop(&EulerOperator::local_apply_cell,
2426 *   this,
2427 *   vec_ki,
2428 *   current_ri,
2429 *   true,
2432 *   }
2433 *  
2434 *  
2435 *   {
2436 *   TimerOutput::Scope t(timer, "rk_stage - inv mass + vec upd");
2437 *   data.cell_loop(
2439 *   this,
2440 *   next_ri,
2441 *   vec_ki,
2442 *   std::function<void(const unsigned int, const unsigned int)>(),
2443 *   [&](const unsigned int start_range, const unsigned int end_range) {
2444 *   const Number ai = factor_ai;
2445 *   const Number bi = factor_solution;
2446 *   if (ai == Number())
2447 *   {
2448 *   /* DEAL_II_OPENMP_SIMD_PRAGMA */
2449 *   for (unsigned int i = start_range; i < end_range; ++i)
2450 *   {
2451 *   const Number k_i = next_ri.local_element(i);
2452 *   const Number sol_i = solution.local_element(i);
2453 *   solution.local_element(i) = sol_i + bi * k_i;
2454 *   }
2455 *   }
2456 *   else
2457 *   {
2458 *   /* DEAL_II_OPENMP_SIMD_PRAGMA */
2459 *   for (unsigned int i = start_range; i < end_range; ++i)
2460 *   {
2461 *   const Number k_i = next_ri.local_element(i);
2462 *   const Number sol_i = solution.local_element(i);
2463 *   solution.local_element(i) = sol_i + bi * k_i;
2464 *   next_ri.local_element(i) = sol_i + ai * k_i;
2465 *   }
2466 *   }
2467 *   });
2468 *   }
2469 *   }
2470 *  
2471 *  
2472 *  
2473 * @endcode
2474 *
2476 * advancing the solution by one time step, let us now move to functions
2478 * functions that compute projections, evaluate errors, and compute the speed
2479 * of information transport on a cell.
2480 *
2481
2482 *
2485 * elements where there is no need to set up and solve a linear system, as
2486 * each element has independent basis functions. The reason why we show the
2487 * code here, besides a small speedup of this non-critical operation, is that
2490 *
2491
2492 *
2494 * shape functions evaluated at quadrature points by @f$S@f$, the projection on
2495 * cell @f$K@f$ is an operation of the form @f$\underbrace{S J^K S^\mathrm
2496 * T}_{\mathcal M^K} \mathbf{w}^K = S J^K
2499 * weight (JxW), @f$\mathcal M^K@f$ is the cell-wise mass matrix, and
2501 * field to be projected onto quadrature points. (In reality the matrix @f$S@f$
2504 * @f$\mathbf{w}^K = \left(S J^K S^\mathrm T\right)^{-1} S J^K
2505 * \tilde{\mathbf{w}}(\mathbf{x}_q)_{q=1:n_q} = S^{-\mathrm T}
2506 * \left(J^K\right)^{-1} S^{-1} S J^K
2507 * \tilde{\mathbf{w}}(\mathbf{x}_q)_{q=1:n_q}@f$. Now, the term @f$S^{-1} S@f$ and
2508 * then @f$\left(J^K\right)^{-1} J^K@f$ cancel, resulting in the final
2509 * expression @f$\mathbf{w}^K = S^{-\mathrm T}
2510 * \tilde{\mathbf{w}}(\mathbf{x}_q)_{q=1:n_q}@f$. This operation is
2511 * implemented by
2514 * the multiplication by @f$S^{-\mathrm T}@f$, a basis change from the
2515 * nodal basis in the points of the Gaussian quadrature to the given finite
2516 * element basis. Note that we call FEEvaluation::set_dof_values() to write
2517 * the result into the vector, overwriting previous content, rather than
2518 * accumulating the results as typical in integration tasks -- we can do
2520 * cell for discontinuous Galerkin discretizations.
2521 *
2522 * @code
2523 *   template <int dim, int degree, int n_points_1d>
2524 *   void EulerOperator<dim, degree, n_points_1d>::project(
2525 *   const Function<dim> & function,
2526 *   LinearAlgebra::distributed::Vector<Number> &solution) const
2527 *   {
2530 *   inverse(phi);
2531 *   solution.zero_out_ghost_values();
2532 *   for (unsigned int cell = 0; cell < data.n_cell_batches(); ++cell)
2533 *   {
2534 *   phi.reinit(cell);
2535 *   for (unsigned int q = 0; q < phi.n_q_points; ++q)
2536 *   phi.submit_dof_value(evaluate_function(function,
2537 *   phi.quadrature_point(q)),
2538 *   q);
2539 *   inverse.transform_from_q_points_to_basis(dim + 2,
2540 *   phi.begin_dof_values(),
2541 *   phi.begin_dof_values());
2542 *   phi.set_dof_values(solution);
2543 *   }
2544 *   }
2545 *  
2546 *  
2547 *  
2548 * @endcode
2549 *
2554 * <i>lane</i> of the vectorized array holds data from a different cell. By
2555 * the loop over all cell batches that are owned by the current MPI process,
2556 * we could then fill a VectorizedArray of results; to obtain a global sum,
2557 * we would need to further go on and sum across the entries in the SIMD
2558 * array. However, such a procedure is not stable as the SIMD array could in
2559 * fact not hold valid data for all its lanes. This happens when the number
2560 * of locally owned cells is not a multiple of the SIMD width. To avoid
2563 * setting the empty lanes to zero (and thus, not contribute to a sum), the
2564 * situation is more complicated than that: What if we were to compute a
2570 * number of lanes with valid data. It equals VectorizedArray::size() on
2571 * most cells, but can be less on the last cell batch if the number of cells
2572 * has a remainder compared to the SIMD width.
2573 *
2574 * @code
2575 *   template <int dim, int degree, int n_points_1d>
2576 *   std::array<double, 3> EulerOperator<dim, degree, n_points_1d>::compute_errors(
2577 *   const Function<dim> & function,
2578 *   const LinearAlgebra::distributed::Vector<Number> &solution) const
2579 *   {
2580 *   TimerOutput::Scope t(timer, "compute errors");
2581 *   double errors_squared[3] = {};
2583 *  
2584 *   for (unsigned int cell = 0; cell < data.n_cell_batches(); ++cell)
2585 *   {
2586 *   phi.reinit(cell);
2587 *   phi.gather_evaluate(solution, EvaluationFlags::values);
2589 *   for (unsigned int q = 0; q < phi.n_q_points; ++q)
2590 *   {
2591 *   const auto error =
2592 *   evaluate_function(function, phi.quadrature_point(q)) -
2593 *   phi.get_value(q);
2594 *   const auto JxW = phi.JxW(q);
2595 *  
2596 *   local_errors_squared[0] += error[0] * error[0] * JxW;
2597 *   for (unsigned int d = 0; d < dim; ++d)
2598 *   local_errors_squared[1] += (error[d + 1] * error[d + 1]) * JxW;
2599 *   local_errors_squared[2] += (error[dim + 1] * error[dim + 1]) * JxW;
2600 *   }
2601 *   for (unsigned int v = 0; v < data.n_active_entries_per_cell_batch(cell);
2602 *   ++v)
2603 *   for (unsigned int d = 0; d < 3; ++d)
2605 *   }
2606 *  
2608 *  
2609 *   std::array<double, 3> errors;
2610 *   for (unsigned int d = 0; d < 3; ++d)
2611 *   errors[d] = std::sqrt(errors_squared[d]);
2612 *  
2613 *   return errors;
2614 *   }
2615 *  
2616 *  
2617 *  
2618 * @endcode
2619 *
2620 * This final function of the EulerOperator class is used to estimate the
2622 * the time step size in the explicit time integrator. In the Euler
2627 *
2628
2629 *
2630 * In the formula for the time step size, we are interested not by
2635 * @f$\|J^{-\mathrm T} \mathbf{u}\|_\infty@f$, where @f$J@f$ is the Jacobian of the
2640 * in the variable `convective_limit` in the code below.
2641 *
2642
2643 *
2644 * The sound propagation is isotropic, so we need to take mesh sizes in any
2645 * direction into account. The appropriate mesh size scaling is then given
2647 * singular value of @f$J^{-1}@f$. Note that one could approximate this quantity
2648 * by the minimal distance between vertices of a cell when ignoring curved
2650 * strategy would be some LAPACK function. Since all we need here is an
2651 * estimate, we can avoid the hassle of decomposing a tensor of
2653 * eigenvalue function without vectorization, and instead use a few
2655 * @f$J^{-1}J^{-\mathrm T}@f$. The speed of convergence of this method depends
2656 * on the ratio of the largest to the next largest eigenvalue and the
2658 * we get slow convergence on cells close to a cube shape where all
2664 *
2665 * @code
2666 *   template <int dim, int degree, int n_points_1d>
2668 *   const LinearAlgebra::distributed::Vector<Number> &solution) const
2669 *   {
2670 *   TimerOutput::Scope t(timer, "compute transport speed");
2671 *   Number max_transport = 0;
2673 *  
2674 *   for (unsigned int cell = 0; cell < data.n_cell_batches(); ++cell)
2675 *   {
2676 *   phi.reinit(cell);
2677 *   phi.gather_evaluate(solution, EvaluationFlags::values);
2679 *   for (unsigned int q = 0; q < phi.n_q_points; ++q)
2680 *   {
2681 *   const auto solution = phi.get_value(q);
2682 *   const auto velocity = euler_velocity<dim>(solution);
2683 *   const auto pressure = euler_pressure<dim>(solution);
2684 *  
2685 *   const auto inverse_jacobian = phi.inverse_jacobian(q);
2686 *   const auto convective_speed = inverse_jacobian * velocity;
2688 *   for (unsigned int d = 0; d < dim; ++d)
2691 *  
2692 *   const auto speed_of_sound =
2693 *   std::sqrt(gamma * pressure * (1. / solution[0]));
2694 *  
2696 *   for (unsigned int d = 0; d < dim; ++d)
2697 *   eigenvector[d] = 1.;
2698 *   for (unsigned int i = 0; i < 5; ++i)
2699 *   {
2700 *   eigenvector = transpose(inverse_jacobian) *
2701 *   (inverse_jacobian * eigenvector);
2703 *   for (unsigned int d = 0; d < dim; ++d)
2707 *   }
2708 *   const auto jac_times_ev = inverse_jacobian * eigenvector;
2709 *   const auto max_eigenvalue = std::sqrt(
2711 *   local_max =
2713 *   max_eigenvalue * speed_of_sound + convective_limit);
2714 *   }
2715 *  
2716 * @endcode
2717 *
2719 * speed only on the valid cells of a cell batch.
2720 *
2721 * @code
2722 *   for (unsigned int v = 0; v < data.n_active_entries_per_cell_batch(cell);
2723 *   ++v)
2724 *   for (unsigned int d = 0; d < 3; ++d)
2726 *   }
2727 *  
2729 *  
2730 *   return max_transport;
2731 *   }
2732 *  
2733 *  
2734 *  
2735 * @endcode
2736 *
2737 *
2738 * <a name="TheEulerProblemclass"></a>
2739 * <h3>The EulerProblem class</h3>
2740 *
2741
2742 *
2746 *
2747
2748 *
2749 * The member variables are a triangulation, a finite element, a mapping (to
2750 * create high-order curved surfaces, see e.g. @ref step_10 "step-10"), and a DoFHandler to
2753 * terms of integrals, and some parameters for time integration like the
2754 * current time or the time step size.
2755 *
2756
2757 *
2759 * information to the output file, in similarity to what was done in
2760 * @ref step_33 "step-33". The interface of the DataPostprocessor class is intuitive,
2763 * that we only enable in 2d where it makes sense), and the names of what
2767 * the output.
2768 *
2769 * @code
2771 *   class EulerProblem
2772 *   {
2773 *   public:
2774 *   EulerProblem();
2775 *  
2776 *   void run();
2777 *  
2778 *   private:
2779 *   void make_grid_and_dofs();
2780 *  
2781 *   void output_results(const unsigned int result_number);
2782 *  
2784 *  
2786 *  
2789 *   #else
2791 *   #endif
2792 *  
2793 *   FESystem<dim> fe;
2794 *   MappingQ<dim> mapping;
2795 *   DoFHandler<dim> dof_handler;
2796 *  
2797 *   TimerOutput timer;
2798 *  
2800 *  
2801 *   double time, time_step;
2802 *  
2803 *   class Postprocessor : public DataPostprocessor<dim>
2804 *   {
2805 *   public:
2806 *   Postprocessor();
2807 *  
2808 *   virtual void evaluate_vector_field(
2810 *   std::vector<Vector<double>> &computed_quantities) const override;
2811 *  
2812 *   virtual std::vector<std::string> get_names() const override;
2813 *  
2814 *   virtual std::vector<
2816 *   get_data_component_interpretation() const override;
2817 *  
2818 *   virtual UpdateFlags get_needed_update_flags() const override;
2819 *  
2820 *   private:
2821 *   const bool do_schlieren_plot;
2822 *   };
2823 *   };
2824 *  
2825 *  
2826 *  
2827 *   template <int dim>
2829 *   : do_schlieren_plot(dim == 2)
2830 *   {}
2831 *  
2832 *  
2833 *  
2834 * @endcode
2835 *
2838 * `2*dim+5` are derived from the sizes of the names we specify in the
2839 * get_names() function below). Then we loop over all evaluation points and
2840 * fill the respective information: First we fill the primal solution
2844 * showing @f$s = |\nabla \rho|^2@f$ in case it is enabled. (See @ref step_69 "step-69" for
2845 * another example where we create a Schlieren plot.)
2846 *
2847 * @code
2848 *   template <int dim>
2851 *   std::vector<Vector<double>> & computed_quantities) const
2852 *   {
2853 *   const unsigned int n_evaluation_points = inputs.solution_values.size();
2854 *  
2855 *   if (do_schlieren_plot == true)
2856 *   Assert(inputs.solution_gradients.size() == n_evaluation_points,
2857 *   ExcInternalError());
2858 *  
2860 *   ExcInternalError());
2861 *   Assert(inputs.solution_values[0].size() == dim + 2, ExcInternalError());
2862 *   Assert(computed_quantities[0].size() ==
2863 *   dim + 2 + (do_schlieren_plot == true ? 1 : 0),
2864 *   ExcInternalError());
2865 *  
2866 *   for (unsigned int p = 0; p < n_evaluation_points; ++p)
2867 *   {
2868 *   Tensor<1, dim + 2> solution;
2869 *   for (unsigned int d = 0; d < dim + 2; ++d)
2870 *   solution[d] = inputs.solution_values[p](d);
2871 *  
2872 *   const double density = solution[0];
2873 *   const Tensor<1, dim> velocity = euler_velocity<dim>(solution);
2874 *   const double pressure = euler_pressure<dim>(solution);
2875 *  
2876 *   for (unsigned int d = 0; d < dim; ++d)
2877 *   computed_quantities[p](d) = velocity[d];
2878 *   computed_quantities[p](dim) = pressure;
2879 *   computed_quantities[p](dim + 1) = std::sqrt(gamma * pressure / density);
2880 *  
2881 *   if (do_schlieren_plot == true)
2882 *   computed_quantities[p](dim + 2) =
2883 *   inputs.solution_gradients[p][0] * inputs.solution_gradients[p][0];
2884 *   }
2885 *   }
2886 *  
2887 *  
2888 *  
2889 *   template <int dim>
2890 *   std::vector<std::string> EulerProblem<dim>::Postprocessor::get_names() const
2891 *   {
2892 *   std::vector<std::string> names;
2893 *   for (unsigned int d = 0; d < dim; ++d)
2894 *   names.emplace_back("velocity");
2895 *   names.emplace_back("pressure");
2896 *   names.emplace_back("speed_of_sound");
2897 *  
2898 *   if (do_schlieren_plot == true)
2899 *   names.emplace_back("schlieren_plot");
2900 *  
2901 *   return names;
2902 *   }
2903 *  
2904 *  
2905 *  
2906 * @endcode
2907 *
2909 * pressure, speed of sound, and the Schlieren plot, and vectors for the
2911 *
2912 * @code
2913 *   template <int dim>
2914 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
2916 *   {
2917 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
2919 *   for (unsigned int d = 0; d < dim; ++d)
2920 *   interpretation.push_back(
2924 *  
2925 *   if (do_schlieren_plot == true)
2926 *   interpretation.push_back(
2928 *  
2929 *   return interpretation;
2930 *   }
2931 *  
2932 *  
2933 *  
2934 * @endcode
2935 *
2938 * gradient.
2939 *
2940 * @code
2941 *   template <int dim>
2943 *   {
2944 *   if (do_schlieren_plot == true)
2946 *   else
2947 *   return update_values;
2948 *   }
2949 *  
2950 *  
2951 *  
2952 * @endcode
2953 *
2954 * The constructor for this class is unsurprising: We set up a parallel
2955 * triangulation based on the `MPI_COMM_WORLD` communicator, a vector finite
2956 * element with `dim+2` components for density, momentum, and energy, a
2957 * high-order mapping of the same degree as the underlying finite element,
2958 * and initialize the time and time step to zero.
2959 *
2960 * @code
2961 *   template <int dim>
2963 *   : pcout(std::cout, Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
2965 *   , triangulation(MPI_COMM_WORLD)
2966 *   #endif
2967 *   , fe(FE_DGQ<dim>(fe_degree), dim + 2)
2968 *   , mapping(fe_degree)
2969 *   , dof_handler(triangulation)
2970 *   , timer(pcout, TimerOutput::never, TimerOutput::wall_times)
2971 *   , euler_operator(timer)
2972 *   , time(0)
2973 *   , time_step(0)
2974 *   {}
2975 *  
2976 *  
2977 *  
2978 * @endcode
2979 *
2981 * global variable `testcase`: For the analytical variant (`testcase==0`),
2982 * the domain is @f$(0, 10) \times (-5, 5)@f$, with Dirichlet boundary
2987 * part at the left of the channel is given the inflow type, for which we
2989 * the right. For the boundary around the cylinder (boundary id equal to 2)
2990 * as well as the channel walls (boundary id equal to 3) we use the wall
2991 * boundary type, which is no-normal flow. Furthermore, for the 3d cylinder
2992 * we also add a gravity force in vertical direction. Having the base mesh
2993 * in place (including the manifolds set by
2995 * specified number of global refinements, create the unknown numbering from
2998 *
2999 * @code
3000 *   template <int dim>
3002 *   {
3003 *   switch (testcase)
3004 *   {
3005 *   case 0:
3006 *   {
3008 *   for (unsigned int d = 1; d < dim; ++d)
3009 *   lower_left[d] = -5;
3010 *  
3012 *   upper_right[0] = 10;
3013 *   for (unsigned int d = 1; d < dim; ++d)
3014 *   upper_right[d] = 5;
3015 *  
3017 *   lower_left,
3018 *   upper_right);
3019 *   triangulation.refine_global(2);
3020 *  
3021 *   euler_operator.set_inflow_boundary(
3022 *   0, std::make_unique<ExactSolution<dim>>(0));
3023 *  
3024 *   break;
3025 *   }
3026 *  
3027 *   case 1:
3028 *   {
3030 *   triangulation, 0.03, 1, 0, true);
3031 *  
3032 *   euler_operator.set_inflow_boundary(
3033 *   0, std::make_unique<ExactSolution<dim>>(0));
3034 *   euler_operator.set_subsonic_outflow_boundary(
3035 *   1, std::make_unique<ExactSolution<dim>>(0));
3036 *  
3037 *   euler_operator.set_wall_boundary(2);
3038 *   euler_operator.set_wall_boundary(3);
3039 *  
3040 *   if (dim == 3)
3041 *   euler_operator.set_body_force(
3042 *   std::make_unique<Functions::ConstantFunction<dim>>(
3043 *   std::vector<double>({0., 0., -0.2})));
3044 *  
3045 *   break;
3046 *   }
3047 *  
3048 *   default:
3049 *   Assert(false, ExcNotImplemented());
3050 *   }
3051 *  
3052 *   triangulation.refine_global(n_global_refinements);
3053 *  
3054 *   dof_handler.distribute_dofs(fe);
3055 *  
3056 *   euler_operator.reinit(mapping, dof_handler);
3057 *   euler_operator.initialize_vector(solution);
3058 *  
3059 * @endcode
3060 *
3062 * often end up with quite large numbers of cells or degrees of freedom, we
3064 * digits. This can be done via "locales", although the way this works is
3065 * not particularly intuitive. @ref step_32 "step-32" explains this in slightly more
3066 * detail.
3067 *
3068 * @code
3069 *   std::locale s = pcout.get_stream().getloc();
3070 *   pcout.get_stream().imbue(std::locale(""));
3071 *   pcout << "Number of degrees of freedom: " << dof_handler.n_dofs()
3072 *   << " ( = " << (dim + 2) << " [vars] x "
3073 *   << triangulation.n_global_active_cells() << " [cells] x "
3074 *   << Utilities::pow(fe_degree + 1, dim) << " [dofs/cell/var] )"
3075 *   << std::endl;
3076 *   pcout.get_stream().imbue(s);
3077 *   }
3078 *  
3079 *  
3080 *  
3081 * @endcode
3082 *
3083 * For output, we first let the Euler operator compute the errors of the
3084 * numerical results. More precisely, we compute the error against the
3085 * analytical result for the analytical solution case, whereas we compute
3087 * energy and constant velocity in @f$x@f$ direction for the second test case.
3088 *
3089
3090 *
3091 * The next step is to create output. This is similar to what is done in
3092 * @ref step_33 "step-33": We let the postprocessor defined above control most of the
3093 * output, except for the primal field that we write directly. For the
3094 * analytical solution test case, we also perform another projection of the
3095 * analytical solution and print the difference between that field and the
3097 * build the patches for output. Similarly to @ref step_65 "step-65", we create a
3098 * high-order VTK output by setting the appropriate flag, which enables us
3099 * to visualize fields of high polynomial degrees. Finally, we call the
3101 * to the given file name. This function uses special MPI parallel write
3102 * facilities, which are typically more optimized for parallel file systems
3103 * than the standard library's `std::ofstream` variants used in most other
3104 * tutorial programs. A particularly nice feature of the
3105 * `write_vtu_in_parallel()` function is the fact that it can combine output
3106 * from all MPI ranks into a single file, making it unnecessary to have a
3107 * central record of all such files (namely, the "pvtu" file).
3108 *
3109
3110 *
3111 * For parallel programs, it is often instructive to look at the partitioning
3112 * of cells among processors. To this end, one can pass a vector of numbers
3113 * to DataOut::add_data_vector() that contains as many entries as the
3114 * current processor has active cells; these numbers should then be the
3115 * rank of the processor that owns each of these cells. Such a vector
3116 * could, for example, be obtained from
3117 * GridTools::get_subdomain_association(). On the other hand, on each MPI
3118 * process, DataOut will only read those entries that correspond to locally
3119 * owned cells, and these of course all have the same value: namely, the rank
3120 * of the current process. What is in the remaining entries of the vector
3121 * doesn't actually matter, and so we can just get away with a cheap trick: We
3122 * just fill *all* values of the vector we give to DataOut::add_data_vector()
3123 * with the rank of the current MPI process. The key is that on each process,
3124 * only the entries corresponding to the locally owned cells will be read,
3125 * ignoring the (wrong) values in other entries. The fact that every process
3126 * submits a vector in which the correct subset of entries is correct is all
3127 * that is necessary.
3128 *
3129
3130 *
3131 * @note As of 2023, Visit 3.3.3 can still not deal with higher-order cells.
3132 * Rather, it simply reports that there is no data to show. To view the
3133 * results of this program with Visit, you will want to comment out the
3134 * line that sets `flags.write_higher_order_cells = true;`. On the other
3136 * just fine.
3137 *
3138 * @code
3139 *   template <int dim>
3140 *   void EulerProblem<dim>::output_results(const unsigned int result_number)
3141 *   {
3142 *   const std::array<double, 3> errors =
3143 *   euler_operator.compute_errors(ExactSolution<dim>(time), solution);
3144 *   const std::string quantity_name = testcase == 0 ? "error" : "norm";
3145 *  
3146 *   pcout << "Time:" << std::setw(8) << std::setprecision(3) << time
3147 *   << ", dt: " << std::setw(8) << std::setprecision(2) << time_step
3148 *   << ", " << quantity_name << " rho: " << std::setprecision(4)
3149 *   << std::setw(10) << errors[0] << ", rho * u: " << std::setprecision(4)
3150 *   << std::setw(10) << errors[1] << ", energy:" << std::setprecision(4)
3151 *   << std::setw(10) << errors[2] << std::endl;
3152 *  
3153 *   {
3154 *   TimerOutput::Scope t(timer, "output");
3155 *  
3156 *   Postprocessor postprocessor;
3157 *   DataOut<dim> data_out;
3158 *  
3159 *   DataOutBase::VtkFlags flags;
3160 *   flags.write_higher_order_cells = true;
3161 *   data_out.set_flags(flags);
3162 *  
3163 *   data_out.attach_dof_handler(dof_handler);
3164 *   {
3165 *   std::vector<std::string> names;
3166 *   names.emplace_back("density");
3167 *   for (unsigned int d = 0; d < dim; ++d)
3168 *   names.emplace_back("momentum");
3169 *   names.emplace_back("energy");
3170 *  
3171 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
3173 *   interpretation.push_back(
3175 *   for (unsigned int d = 0; d < dim; ++d)
3176 *   interpretation.push_back(
3178 *   interpretation.push_back(
3180 *  
3181 *   data_out.add_data_vector(dof_handler, solution, names, interpretation);
3182 *   }
3183 *   data_out.add_data_vector(solution, postprocessor);
3184 *  
3186 *   if (testcase == 0 && dim == 2)
3187 *   {
3188 *   reference.reinit(solution);
3189 *   euler_operator.project(ExactSolution<dim>(time), reference);
3190 *   reference.sadd(-1., 1, solution);
3191 *   std::vector<std::string> names;
3192 *   names.emplace_back("error_density");
3193 *   for (unsigned int d = 0; d < dim; ++d)
3194 *   names.emplace_back("error_momentum");
3195 *   names.emplace_back("error_energy");
3196 *  
3197 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
3199 *   interpretation.push_back(
3201 *   for (unsigned int d = 0; d < dim; ++d)
3202 *   interpretation.push_back(
3204 *   interpretation.push_back(
3206 *  
3207 *   data_out.add_data_vector(dof_handler,
3208 *   reference,
3209 *   names,
3210 *   interpretation);
3211 *   }
3212 *  
3213 *   Vector<double> mpi_owner(triangulation.n_active_cells());
3215 *   data_out.add_data_vector(mpi_owner, "owner");
3216 *  
3217 *   data_out.build_patches(mapping,
3218 *   fe.degree,
3220 *  
3221 *   const std::string filename =
3222 *   "solution_" + Utilities::int_to_string(result_number, 3) + ".vtu";
3223 *   data_out.write_vtu_in_parallel(filename, MPI_COMM_WORLD);
3224 *   }
3225 *   }
3226 *  
3227 *  
3228 *  
3229 * @endcode
3230 *
3231 * The EulerProblem::run() function puts all pieces together. It starts off
3232 * by calling the function that creates the mesh and sets up data structures,
3234 * the low-storage integrator. We call these vectors `rk_register_1` and
3236 * @f$\mathbf{r}_i@f$ and the second one for @f$\mathbf{k}_i@f$ in the formulas for
3237 * the Runge--Kutta scheme outlined in the introduction. Before we start the
3238 * time loop, we compute the time step size by the
3241 * size and print them to screen. For velocities and speeds of sound close
3243 * will be close, but they could vary if scaling were different.
3244 *
3245 * @code
3246 *   template <int dim>
3247 *   void EulerProblem<dim>::run()
3248 *   {
3249 *   {
3250 *   const unsigned int n_vect_number = VectorizedArray<Number>::size();
3251 *   const unsigned int n_vect_bits = 8 * sizeof(Number) * n_vect_number;
3252 *  
3253 *   pcout << "Running with "
3255 *   << " MPI processes" << std::endl;
3256 *   pcout << "Vectorization over " << n_vect_number << ' '
3257 *   << (std::is_same<Number, double>::value ? "doubles" : "floats")
3258 *   << " = " << n_vect_bits << " bits ("
3260 *   << std::endl;
3261 *   }
3262 *  
3264 *  
3266 *  
3269 *   rk_register_1.reinit(solution);
3270 *   rk_register_2.reinit(solution);
3271 *  
3272 *   euler_operator.project(ExactSolution<dim>(time), solution);
3273 *  
3274 *   double min_vertex_distance = std::numeric_limits<double>::max();
3275 *   for (const auto &cell : triangulation.active_cell_iterators())
3276 *   if (cell->is_locally_owned())
3278 *   std::min(min_vertex_distance, cell->minimum_vertex_distance());
3281 *  
3282 *   time_step = courant_number * integrator.n_stages() /
3283 *   euler_operator.compute_cell_transport_speed(solution);
3284 *   pcout << "Time step size: " << time_step
3285 *   << ", minimal h: " << min_vertex_distance
3286 *   << ", initial transport scaling: "
3287 *   << 1. / euler_operator.compute_cell_transport_speed(solution)
3288 *   << std::endl
3289 *   << std::endl;
3290 *  
3291 *   output_results(0);
3292 *  
3293 * @endcode
3294 *
3295 * Now we are ready to start the time loop, which we run until the time
3296 * has reached the desired end time. Every 5 time steps, we compute a new
3297 * estimate for the time step -- since the solution is nonlinear, it is
3305 * or truncate the time step size to a few digits, e.g. 3 in this case. In
3306 * case the current time is near the prescribed 'tick' value for output
3307 * (e.g. 0.02), we also write the output. After the end of the time loop,
3310 *
3311 * @code
3312 *   unsigned int timestep_number = 0;
3313 *  
3314 *   while (time < final_time - 1e-12)
3315 *   {
3316 *   ++timestep_number;
3317 *   if (timestep_number % 5 == 0)
3318 *   time_step =
3319 *   courant_number * integrator.n_stages() /
3321 *   euler_operator.compute_cell_transport_speed(solution), 3);
3322 *  
3323 *   {
3324 *   TimerOutput::Scope t(timer, "rk time stepping total");
3325 *   integrator.perform_time_step(euler_operator,
3326 *   time,
3327 *   time_step,
3328 *   solution,
3329 *   rk_register_1,
3330 *   rk_register_2);
3331 *   }
3332 *  
3333 *   time += time_step;
3334 *  
3335 *   if (static_cast<int>(time / output_tick) !=
3336 *   static_cast<int>((time - time_step) / output_tick) ||
3337 *   time >= final_time - 1e-12)
3339 *   static_cast<unsigned int>(std::round(time / output_tick)));
3340 *   }
3341 *  
3342 *   timer.print_wall_time_statistics(MPI_COMM_WORLD);
3343 *   pcout << std::endl;
3344 *   }
3345 *  
3346 *   } // namespace Euler_DG
3347 *  
3348 *  
3349 *  
3350 * @endcode
3351 *
3352 * The main() function is not surprising and follows what was done in all
3353 * previous MPI programs: As we run an MPI program, we need to call `MPI_Init()`
3355 * Utilities::MPI::MPI_InitFinalize data structure. Note that we run the program
3356 * only with MPI, and set the thread count to 1.
3357 *
3358 * @code
3359 *   int main(int argc, char **argv)
3360 *   {
3361 *   using namespace Euler_DG;
3362 *   using namespace dealii;
3363 *  
3365 *  
3366 *   try
3367 *   {
3369 *  
3371 *   euler_problem.run();
3372 *   }
3373 *   catch (std::exception &exc)
3374 *   {
3375 *   std::cerr << std::endl
3376 *   << std::endl
3377 *   << "----------------------------------------------------"
3378 *   << std::endl;
3379 *   std::cerr << "Exception on processing: " << std::endl
3380 *   << exc.what() << std::endl
3381 *   << "Aborting!" << std::endl
3382 *   << "----------------------------------------------------"
3383 *   << std::endl;
3384 *  
3385 *   return 1;
3386 *   }
3387 *   catch (...)
3388 *   {
3389 *   std::cerr << std::endl
3390 *   << std::endl
3391 *   << "----------------------------------------------------"
3392 *   << std::endl;
3393 *   std::cerr << "Unknown exception!" << std::endl
3394 *   << "Aborting!" << std::endl
3395 *   << "----------------------------------------------------"
3396 *   << std::endl;
3397 *   return 1;
3398 *   }
3399 *  
3400 *   return 0;
3401 *   }
3402 * @endcode
3403<a name="Results"></a><h1>Results</h1>
3404
3405
3406<a name="Programoutput"></a><h3>Program output</h3>
3407
3408
3410produces the following output:
3411@code
3412Running with 40 MPI processes
3414Number of degrees of freedom: 147,456 ( = 4 [vars] x 1,024 [cells] x 36 [dofs/cell/var] )
3415Time step size: 0.00689325, minimal h: 0.3125, initial transport scaling: 0.102759
3416
3417Time: 0, dt: 0.0069, error rho: 2.76e-07, rho * u: 1.259e-06, energy: 2.987e-06
3418Time: 1.01, dt: 0.0069, error rho: 1.37e-06, rho * u: 2.252e-06, energy: 4.153e-06
3419Time: 2.01, dt: 0.0069, error rho: 1.561e-06, rho * u: 2.43e-06, energy: 4.493e-06
3420Time: 3.01, dt: 0.0069, error rho: 1.714e-06, rho * u: 2.591e-06, energy: 4.762e-06
3421Time: 4.01, dt: 0.0069, error rho: 1.843e-06, rho * u: 2.625e-06, energy: 4.985e-06
3422Time: 5.01, dt: 0.0069, error rho: 1.496e-06, rho * u: 1.961e-06, energy: 4.142e-06
3423Time: 6, dt: 0.0083, error rho: 1.007e-06, rho * u: 7.119e-07, energy: 2.972e-06
3424Time: 7, dt: 0.0095, error rho: 9.096e-07, rho * u: 3.786e-07, energy: 2.626e-06
3425Time: 8, dt: 0.0096, error rho: 8.439e-07, rho * u: 3.338e-07, energy: 2.43e-06
3426Time: 9, dt: 0.0096, error rho: 7.822e-07, rho * u: 2.984e-07, energy: 2.248e-06
3427Time: 10, dt: 0.0096, error rho: 7.231e-07, rho * u: 2.666e-07, energy: 2.074e-06
3428
3429+-------------------------------------------+------------------+------------+------------------+
3430| Total wallclock time elapsed | 2.249s 30 | 2.249s | 2.249s 8 |
3431| | | |
3432| Section | no. calls | min time rank | avg time | max time rank |
3433+-------------------------------------------+------------------+------------+------------------+
3434| compute errors | 11 | 0.008066s 13 | 0.00952s | 0.01041s 20 |
3435| compute transport speed | 258 | 0.01012s 13 | 0.05392s | 0.08574s 25 |
3436| output | 11 | 0.9597s 13 | 0.9613s | 0.9623s 6 |
3437| rk time stepping total | 1283 | 0.9827s 25 | 1.015s | 1.06s 13 |
3438| rk_stage - integrals L_h | 6415 | 0.8803s 26 | 0.9198s | 0.9619s 14 |
3439| rk_stage - inv mass + vec upd | 6415 | 0.05677s 15 | 0.06487s | 0.07597s 13 |
3440+-------------------------------------------+------------------+------------+------------------+
3441@endcode
3442
3444that we use a relatively fine mesh of @f$32^2@f$ cells with polynomials of degree
34455 for a solution that is smooth. An interesting pattern shows for the time
3446step size: whereas it is 0.0069 up to time 5, it increases to 0.0096 for later
3452by the vortex. Our time step formula recognizes this effect.
3453
3454The final block of output shows detailed information about the timing
3457average time -- this is often useful in very large computations to
3460slow for other reasons.
3461The summary shows that 1283 time steps have been performed
3462in 1.02 seconds (looking at the average time among all MPI processes), while
3463the output of 11 files has taken additional 0.96 seconds. Broken down per time
3464step and into the five Runge--Kutta stages, the compute time per evaluation is
3466and a reason why explicit time integration is very competitive against
3471estimation of the transport speed for the time step size computation
3472contributes with another 0.05 seconds of compute time.
3473
3474If we use three more levels of global refinement and 9.4 million DoFs in total,
3477@code
3478+-------------------------------------------+------------------+------------+------------------+
3479| Total wallclock time elapsed | 244.9s 12 | 244.9s | 244.9s 34 |
3480| | | |
3481| Section | no. calls | min time rank | avg time | max time rank |
3482+-------------------------------------------+------------------+------------+------------------+
3483| compute errors | 11 | 0.4239s 12 | 0.4318s | 0.4408s 9 |
3484| compute transport speed | 2053 | 3.962s 12 | 6.727s | 10.12s 7 |
3485| output | 11 | 30.35s 12 | 30.36s | 30.37s 9 |
3486| rk time stepping total | 10258 | 201.7s 7 | 205.1s | 207.8s 12 |
3487| rk_stage - integrals L_h | 51290 | 121.3s 6 | 126.6s | 136.3s 16 |
3488| rk_stage - inv mass + vec upd | 51290 | 66.19s 16 | 77.52s | 81.84s 10 |
3489+-------------------------------------------+------------------+------------+------------------+
3490@endcode
3491
3492Per time step, the solver now takes 0.02 seconds, about 25 times as long as
3495surprising. Since we also do 8 times as many time steps, the compute time
3496should in theory increase by a factor of 512. The actual increase is 205 s /
34971.02 s = 202. This is because the small problem size cannot fully utilize the
3501from 0.92 seconds to 127 seconds, i.e., it increases with a factor of 138. On
3504all, has increased by a factor of 1195. The increase is more than the
3505theoretical factor of 512 because the operation is limited by the bandwidth
3506from RAM memory for the larger size while for the smaller size, all vectors
3513part can also reach 70%. If we compute a throughput number in terms of DoFs
3515\frac{n_\mathrm{time steps} n_\mathrm{stages}
3516n_\mathrm{dofs}}{t_\mathrm{compute}} = \frac{10258 \cdot 5 \cdot
35179.4\,\text{MDoFs}}{205s} = 2360\, \text{MDoFs/s} @f] This throughput number is
3518very high, given that simply copying one vector to another one runs at
3519only around 10,000 MDoFs/s.
3520
3521If we go to the next-larger size with 37.7 million DoFs, the overall
3522simulation time is 2196 seconds, with 1978 seconds spent in the time
3523stepping. The increase in run time is a factor of 9.3 for the L_h operator
3524(1179 versus 127 seconds) and a factor of 10.3 for the inverse mass matrix and
3525vector updates (797 vs 77.5 seconds). The reason for this non-optimal increase
3526in run time can be traced back to cache effects on the given hardware (with 40
3527MB of L2 cache and 55 MB of L3 cache): While not all of the relevant data fits
3528into caches for 9.4 million DoFs (one vector takes 75 MB and we have three
3529vectors plus some additional data in MatrixFree), there is capacity for one and
3532the data is used in a streaming-like fashion), we can assume that a sizeable
3533fraction of data can indeed be delivered from caches for the 9.4 million DoFs
3536
3537
3538<a name="Convergenceratesfortheanalyticaltestcase"></a><h3>Convergence rates for the analytical test case</h3>
3539
3540
3544
3545<table align="center" class="doxtable">
3546 <tr>
3547 <th>&nbsp;</th>
3548 <th colspan="3"><i>p</i>=2</th>
3549 <th colspan="3"><i>p</i>=3</th>
3550 <th colspan="3"><i>p</i>=5</th>
3551 </tr>
3552 <tr>
3553 <th>n_cells</th>
3554 <th>n_dofs</th>
3555 <th>error mom</th>
3556 <th>rate</th>
3557 <th>n_dofs</th>
3558 <th>error mom</th>
3559 <th>rate</th>
3560 <th>n_dofs</th>
3561 <th>error mom</th>
3562 <th>rate</th>
3563 </tr>
3564 <tr>
3565 <td align="right">16</td>
3566 <td>&nbsp;</td>
3567 <td>&nbsp;</td>
3568 <td>&nbsp;</td>
3569 <td>&nbsp;</td>
3570 <td>&nbsp;</td>
3571 <td>&nbsp;</td>
3572 <td align="right">2,304</td>
3573 <td align="center">1.373e-01</td>
3574 <td>&nbsp;</td>
3575 </tr>
3576 <tr>
3577 <td align="right">64</td>
3578 <td>&nbsp;</td>
3579 <td>&nbsp;</td>
3580 <td>&nbsp;</td>
3581 <td align="right">4,096</td>
3582 <td align="center">9.130e-02</td>
3583 <td>&nbsp;</td>
3584 <td align="right">9,216</td>
3585 <td align="center">8.899e-03</td>
3586 <td>3.94</td>
3587 </tr>
3588 <tr>
3589 <td align="right">256</td>
3590 <td align="right">9,216</td>
3591 <td align="center">5.577e-02</td>
3592 <td>&nbsp;</td>
3593 <td align="right">16,384</td>
3594 <td align="center">7.381e-03</td>
3595 <td>3.64</td>
3596 <td align="right">36,864</td>
3597 <td align="center">2.082e-04</td>
3598 <td>5.42</td>
3599 </tr>
3600 <tr>
3601 <td align="right">1024</td>
3602 <td align="right">36,864</td>
3603 <td align="center">4.724e-03</td>
3604 <td>3.56</td>
3605 <td align="right">65,536</td>
3606 <td align="center">3.072e-04</td>
3607 <td>4.59</td>
3608 <td align="right">147,456</td>
3609 <td align="center">2.625e-06</td>
3610 <td>6.31</td>
3611 </tr>
3612 <tr>
3613 <td align="right">4096</td>
3614 <td align="right">147,456</td>
3615 <td align="center">6.205e-04</td>
3616 <td>2.92</td>
3617 <td align="right">262,144</td>
3618 <td align="center">1.880e-05</td>
3619 <td>4.03</td>
3620 <td align="right">589,824</td>
3621 <td align="center">3.268e-08</td>
3622 <td>6.33</td>
3623 </tr>
3624 <tr>
3625 <td align="right">16,384</td>
3626 <td align="right">589,824</td>
3627 <td align="center">8.279e-05</td>
3628 <td>2.91</td>
3629 <td align="right">1,048,576</td>
3630 <td align="center">1.224e-06</td>
3631 <td>3.94</td>
3632 <td align="right">2,359,296</td>
3633 <td align="center">9.252e-10</td>
3634 <td>5.14</td>
3635 </tr>
3636 <tr>
3637 <td align="right">65,536</td>
3638 <td align="right">2,359,296</td>
3639 <td align="center">1.105e-05</td>
3640 <td>2.91</td>
3641 <td align="right">4,194,304</td>
3642 <td align="center">7.871e-08</td>
3643 <td>3.96</td>
3644 <td align="right">9,437,184</td>
3645 <td align="center">1.369e-10</td>
3646 <td>2.77</td>
3647 </tr>
3648 <tr>
3649 <td align="right">262,144</td>
3650 <td align="right">9,437,184</td>
3651 <td align="center">1.615e-06</td>
3652 <td>2.77</td>
3653 <td align="right">16,777,216</td>
3654 <td align="center">4.961e-09</td>
3655 <td>3.99</td>
3656 <td align="right">37,748,736</td>
3657 <td align="center">7.091e-11</td>
3658 <td>0.95</td>
3659 </tr>
3660</table>
3661
3662If we switch to the Harten-Lax-van Leer flux, the results are as follows:
3663<table align="center" class="doxtable">
3664 <tr>
3665 <th>&nbsp;</th>
3666 <th colspan="3"><i>p</i>=2</th>
3667 <th colspan="3"><i>p</i>=3</th>
3668 <th colspan="3"><i>p</i>=5</th>
3669 </tr>
3670 <tr>
3671 <th>n_cells</th>
3672 <th>n_dofs</th>
3673 <th>error mom</th>
3674 <th>rate</th>
3675 <th>n_dofs</th>
3676 <th>error mom</th>
3677 <th>rate</th>
3678 <th>n_dofs</th>
3679 <th>error mom</th>
3680 <th>rate</th>
3681 </tr>
3682 <tr>
3683 <td align="right">16</td>
3684 <td>&nbsp;</td>
3685 <td>&nbsp;</td>
3686 <td>&nbsp;</td>
3687 <td>&nbsp;</td>
3688 <td>&nbsp;</td>
3689 <td>&nbsp;</td>
3690 <td align="right">2,304</td>
3691 <td align="center">1.339e-01</td>
3692 <td>&nbsp;</td>
3693 </tr>
3694 <tr>
3695 <td align="right">64</td>
3696 <td>&nbsp;</td>
3697 <td>&nbsp;</td>
3698 <td>&nbsp;</td>
3699 <td align="right">4,096</td>
3700 <td align="center">9.037e-02</td>
3701 <td>&nbsp;</td>
3702 <td align="right">9,216</td>
3703 <td align="center">8.849e-03</td>
3704 <td>3.92</td>
3705 </tr>
3706 <tr>
3707 <td align="right">256</td>
3708 <td align="right">9,216</td>
3709 <td align="center">4.204e-02</td>
3710 <td>&nbsp;</td>
3711 <td align="right">16,384</td>
3712 <td align="center">9.143e-03</td>
3713 <td>3.31</td>
3714 <td align="right">36,864</td>
3715 <td align="center">2.501e-04</td>
3716 <td>5.14</td>
3717 </tr>
3718 <tr>
3719 <td align="right">1024</td>
3720 <td align="right">36,864</td>
3721 <td align="center">4.913e-03</td>
3722 <td>3.09</td>
3723 <td align="right">65,536</td>
3724 <td align="center">3.257e-04</td>
3725 <td>4.81</td>
3726 <td align="right">147,456</td>
3727 <td align="center">3.260e-06</td>
3728 <td>6.26</td>
3729 </tr>
3730 <tr>
3731 <td align="right">4096</td>
3732 <td align="right">147,456</td>
3733 <td align="center">7.862e-04</td>
3734 <td>2.64</td>
3735 <td align="right">262,144</td>
3736 <td align="center">1.588e-05</td>
3737 <td>4.36</td>
3738 <td align="right">589,824</td>
3739 <td align="center">2.953e-08</td>
3740 <td>6.79</td>
3741 </tr>
3742 <tr>
3743 <td align="right">16,384</td>
3744 <td align="right">589,824</td>
3745 <td align="center">1.137e-04</td>
3746 <td>2.79</td>
3747 <td align="right">1,048,576</td>
3748 <td align="center">9.400e-07</td>
3749 <td>4.08</td>
3750 <td align="right">2,359,296</td>
3751 <td align="center">4.286e-10</td>
3752 <td>6.11</td>
3753 </tr>
3754 <tr>
3755 <td align="right">65,536</td>
3756 <td align="right">2,359,296</td>
3757 <td align="center">1.476e-05</td>
3758 <td>2.95</td>
3759 <td align="right">4,194,304</td>
3760 <td align="center">5.799e-08</td>
3761 <td>4.02</td>
3762 <td align="right">9,437,184</td>
3763 <td align="center">2.789e-11</td>
3764 <td>3.94</td>
3765 </tr>
3766 <tr>
3767 <td align="right">262,144</td>
3768 <td align="right">9,437,184</td>
3769 <td align="center">2.038e-06</td>
3770 <td>2.86</td>
3771 <td align="right">16,777,216</td>
3772 <td align="center">3.609e-09</td>
3773 <td>4.01</td>
3774 <td align="right">37,748,736</td>
3775 <td align="center">5.730e-11</td>
3776 <td>-1.04</td>
3777 </tr>
3778</table>
3779
3780The tables show that we get optimal @f$\mathcal O\left(h^{p+1}\right)@f$
3782for the Lax--Friedrichs flux for @f$p=2@f$, but the picture is reversed for
3783@f$p=3@f$; in any case, the differences on this testcase are relatively
3784small.
3785
3786For @f$p=5@f$, we reach the roundoff accuracy of @f$10^{-11}@f$ with both
3788domain length of @f$10^2@f$, so relative errors are below @f$10^{-12}@f$. The HLL flux
3791condition on the solution that leaves the domain, which results in a small
3794minor, as the polynomial part inside elements is the main driver of the
3797example the parameters and grid of @ref step_33 "step-33", we get oscillations (which in turn
3798make density negative and make the solution explode) with both fluxes once the
3799high-mass part comes near the boundary, as opposed to the low-order finite
3800volume case (@f$p=0@f$). Thus, any case that leads to shocks in the solution
3802alternative, see the @ref step_69 "step-69" tutorial program.
3803
3804
3805<a name="Resultsforflowinchannelaroundcylinderin2D"></a><h3>Results for flow in channel around cylinder in 2D</h3>
3806
3807
3808For the test case of the flow around a cylinder in a channel, we need to
3809change the first code line to
3810@code
3811 constexpr unsigned int testcase = 1;
3812@endcode
3813This test case starts with a background field of a constant velocity
3819times 0.1, 0.25, 0.5, and 1.0 (top left to bottom right) for the 2D case with
38205 levels of global refinement, using 102,400 cells with polynomial degree of
38215 and 14.7 million degrees of freedom over all 4 solution variables.
3823propagates slowly in the upstream direction and more quickly in downstream
3824direction in the first snapshot at time 0.1. At time 0.25, the sound wave has
3831
3832<table align="center" class="doxtable" style="width:85%">
3833 <tr>
3834 <td>
3835 <img src="https://www.dealii.org/images/steps/developer/step-67.pressure_010.png" alt="" width="100%">
3836 </td>
3837 <td>
3838 <img src="https://www.dealii.org/images/steps/developer/step-67.pressure_025.png" alt="" width="100%">
3839 </td>
3840 </tr>
3841 <tr>
3842 <td>
3843 <img src="https://www.dealii.org/images/steps/developer/step-67.pressure_050.png" alt="" width="100%">
3844 </td>
3845 <td>
3846 <img src="https://www.dealii.org/images/steps/developer/step-67.pressure_100.png" alt="" width="100%">
3847 </td>
3848 </tr>
3849</table>
3850
3853we can see the large number
3859travels through elements with high-order polynomials. This effect can be cured
3861the result of the transport accuracy of the high-order DG method.
3862
3863<img src="https://www.dealii.org/images/steps/developer/step-67.pressure_elevated.jpg" alt="" width="40%">
3864
3865If we run the program with degree 2 and 6 levels of global refinements (410k
3866cells, 14.7M unknowns), we get the following evolution of the pressure
3868
3869@htmlonly
3870<p align="center">
3871 <iframe width="560" height="315" src="https://www.youtube.com/embed/ivxCLiSdQpc"
3872 frameborder="0"
3875 </p>
3877
3878
3879With 2 levels of global refinement with 1,600 cells, the mesh and its
3881
3882<img src="https://www.dealii.org/images/steps/developer/step-67.grid-owner.png" alt="" width="70%">
3883
3884When we run the code with 4 levels of global refinements on 40 cores, we get
3885the following output:
3886@code
3887Running with 40 MPI processes
3889Number of degrees of freedom: 3,686,400 ( = 4 [vars] x 25,600 [cells] x 36 [dofs/cell/var] )
3890Time step size: 7.39876e-05, minimal h: 0.001875, initial transport scaling: 0.00110294
3891
3892Time: 0, dt: 7.4e-05, norm rho: 4.17e-16, rho * u: 1.629e-16, energy: 1.381e-15
3893Time: 0.05, dt: 6.3e-05, norm rho: 0.02075, rho * u: 0.03801, energy: 0.08772
3894Time: 0.1, dt: 5.9e-05, norm rho: 0.02211, rho * u: 0.04515, energy: 0.08953
3895Time: 0.15, dt: 5.7e-05, norm rho: 0.02261, rho * u: 0.04592, energy: 0.08967
3896Time: 0.2, dt: 5.8e-05, norm rho: 0.02058, rho * u: 0.04361, energy: 0.08222
3897Time: 0.25, dt: 5.9e-05, norm rho: 0.01695, rho * u: 0.04203, energy: 0.06873
3898Time: 0.3, dt: 5.9e-05, norm rho: 0.01653, rho * u: 0.0401, energy: 0.06604
3899Time: 0.35, dt: 5.7e-05, norm rho: 0.01774, rho * u: 0.04264, energy: 0.0706
3900
3901...
3902
3903Time: 1.95, dt: 5.8e-05, norm rho: 0.01488, rho * u: 0.03923, energy: 0.05185
3904Time: 2, dt: 5.7e-05, norm rho: 0.01432, rho * u: 0.03969, energy: 0.04889
3905
3906+-------------------------------------------+------------------+------------+------------------+
3907| Total wallclock time elapsed | 273.6s 13 | 273.6s | 273.6s 0 |
3908| | | |
3909| Section | no. calls | min time rank | avg time | max time rank |
3910+-------------------------------------------+------------------+------------+------------------+
3911| compute errors | 41 | 0.01112s 35 | 0.0672s | 0.1337s 0 |
3912| compute transport speed | 6914 | 5.422s 35 | 15.96s | 29.99s 1 |
3913| output | 41 | 37.24s 35 | 37.3s | 37.37s 0 |
3914| rk time stepping total | 34564 | 205.4s 1 | 219.5s | 230.1s 35 |
3915| rk_stage - integrals L_h | 172820 | 153.6s 1 | 164.9s | 175.6s 27 |
3916| rk_stage - inv mass + vec upd | 172820 | 47.13s 13 | 53.09s | 64.05s 33 |
3917+-------------------------------------------+------------------+------------+------------------+
3918@endcode
3919
3921@f$\rho'@f$, @f$(\rho u)'@f$, and @f$E'@f$ against the background field (namely, the
3922initial condition). The distribution of run time is overall similar as in the
3924time spent in @f$\mathcal L_h@f$ as compared to the inverse mass matrix and vector
3925updates. This is because the geometry is deformed and the matrix-free
3926framework needs to load additional arrays for the geometry from memory that
3927are compressed in the affine mesh case.
3928
3929Increasing the number of global refinements to 5, the output becomes:
3930@code
3931Running with 40 MPI processes
3933Number of degrees of freedom: 14,745,600 ( = 4 [vars] x 102,400 [cells] x 36 [dofs/cell/var] )
3934
3935...
3936
3937+-------------------------------------------+------------------+------------+------------------+
3938| Total wallclock time elapsed | 2693s 32 | 2693s | 2693s 23 |
3939| | | |
3940| Section | no. calls | min time rank | avg time | max time rank |
3941+-------------------------------------------+------------------+------------+------------------+
3942| compute errors | 41 | 0.04537s 32 | 0.173s | 0.3489s 0 |
3943| compute transport speed | 13858 | 40.75s 32 | 85.99s | 149.8s 0 |
3944| output | 41 | 153.8s 32 | 153.9s | 154.1s 0 |
3945| rk time stepping total | 69284 | 2386s 0 | 2450s | 2496s 32 |
3946| rk_stage - integrals L_h | 346420 | 1365s 32 | 1574s | 1718s 19 |
3947| rk_stage - inv mass + vec upd | 346420 | 722.5s 10 | 870.7s | 1125s 32 |
3948+-------------------------------------------+------------------+------------+------------------+
3949@endcode
3950
3953see an increase by a factor of 11 for the time steps (219.5 seconds versus
3960total" part. At the same time, it appears to be slowest for the "compute
3962average and almost a factor of 4 compared to the faster rank.
3966catch up. At this point, one can wonder about the reason for this imbalance:
3967The number of cells is almost the same on all MPI processes.
3968However, the matrix-free framework is faster on affine and Cartesian
3970are assigned. On the other hand, rank 32, which reports the highest run time
3971for the Runga--Kutta stages, owns the curved cells near the cylinder, for
3972which no data compression is possible. To improve throughput, we could assign
3974parallel::distributed::Triangulation object, or even measure the run time for a
3975few time steps and try to rebalance then.
3976
3980
3982@code
3983Running with 40 MPI processes
3985Number of degrees of freedom: 58,982,400 ( = 4 [vars] x 409,600 [cells] x 36 [dofs/cell/var] )
3986
3987...
3988
3989Time: 1.95, dt: 1.4e-05, norm rho: 0.01488, rho * u: 0.03923, energy: 0.05183
3990Time: 2, dt: 1.4e-05, norm rho: 0.01431, rho * u: 0.03969, energy: 0.04887
3991
3992+-------------------------------------------+------------------+------------+------------------+
3993| Total wallclock time elapsed | 2.166e+04s 26 | 2.166e+04s | 2.166e+04s 24 |
3994| | | |
3995| Section | no. calls | min time rank | avg time | max time rank |
3996+-------------------------------------------+------------------+------------+------------------+
3997| compute errors | 41 | 0.1758s 30 | 0.672s | 1.376s 1 |
3998| compute transport speed | 27748 | 321.3s 34 | 678.8s | 1202s 1 |
3999| output | 41 | 616.3s 32 | 616.4s | 616.4s 34 |
4000| rk time stepping total | 138733 | 1.983e+04s 1 | 2.036e+04s | 2.072e+04s 34 |
4001| rk_stage - integrals L_h | 693665 | 1.052e+04s 32 | 1.248e+04s | 1.387e+04s 19 |
4002| rk_stage - inv mass + vec upd | 693665 | 6404s 10 | 7868s | 1.018e+04s 32 |
4003+-------------------------------------------+------------------+------------+------------------+
4004@endcode
4005
4007overall run time to perform 139k time steps is 20k seconds (5.7 hours) or 7
4008time steps per second -- not so bad for having nearly 60 million
4011
4012
4014
4015
4016Switching the channel test case to 3D with 3 global refinements, the output is
4017@code
4018Running with 40 MPI processes
4020Number of degrees of freedom: 221,184,000 ( = 5 [vars] x 204,800 [cells] x 216 [dofs/cell/var] )
4021
4022...
4023
4024Time: 1.95, dt: 0.00011, norm rho: 0.01131, rho * u: 0.03056, energy: 0.04091
4025Time: 2, dt: 0.00011, norm rho: 0.0119, rho * u: 0.03142, energy: 0.04425
4026
4027+-------------------------------------------+------------------+------------+------------------+
4028| Total wallclock time elapsed | 1.734e+04s 4 | 1.734e+04s | 1.734e+04s 38 |
4029| | | |
4030| Section | no. calls | min time rank | avg time | max time rank |
4031+-------------------------------------------+------------------+------------+------------------+
4032| compute errors | 41 | 0.6551s 34 | 3.216s | 7.281s 0 |
4033| compute transport speed | 3546 | 160s 34 | 393.2s | 776.9s 0 |
4034| output | 41 | 1350s 34 | 1353s | 1357s 0 |
4035| rk time stepping total | 17723 | 1.519e+04s 0 | 1.558e+04s | 1.582e+04s 34 |
4036| rk_stage - integrals L_h | 88615 | 1.005e+04s 32 | 1.126e+04s | 1.23e+04s 11 |
4037| rk_stage - inv mass + vec upd | 88615 | 3056s 11 | 4322s | 5759s 32 |
4038+-------------------------------------------+------------------+------------+------------------+
4039@endcode
4040
4041The physics are similar to the 2D case, with a slight motion in the z
4043stage in this case is
4044@f[
4045\text{throughput} = \frac{n_\mathrm{time steps} n_\mathrm{stages}
4046n_\mathrm{dofs}}{t_\mathrm{compute}} =
4047\frac{17723 \cdot 5 \cdot 221.2\,\text{M}}{15580s} = 1258\, \text{MDoFs/s}.
4048@f]
4049
4051is more expensive. This is due to over-integration with `degree+2` points and
4052the larger fraction of face integrals (worse volume-to-surface ratio) with
4054and vector update part, we record a throughput of 4857 MDoFs/s for the 2D case
4057fact limited by the memory bandwidth.
4058
4059If we go to four levels of global refinement, we need to increase the number
4061GB of RAM memory in this case. Also, the time it takes to complete 35k time
4064@code
4065Running with 240 MPI processes
4067Number of degrees of freedom: 1,769,472,000 ( = 5 [vars] x 1,638,400 [cells] x 216 [dofs/cell/var] )
4068
4069...
4070
4071Time: 1.95, dt: 5.6e-05, norm rho: 0.01129, rho * u: 0.0306, energy: 0.04086
4072Time: 2, dt: 5.6e-05, norm rho: 0.01189, rho * u: 0.03145, energy: 0.04417
4073
4074+-------------------------------------------+------------------+------------+------------------+
4075| Total wallclock time elapsed | 5.396e+04s 151 | 5.396e+04s | 5.396e+04s 0 |
4076| | | |
4077| Section | no. calls | min time rank | avg time | max time rank |
4078+-------------------------------------------+------------------+------------+------------------+
4079| compute errors | 41 | 2.632s 178 | 7.221s | 16.56s 0 |
4080| compute transport speed | 7072 | 714s 193 | 1553s | 3351s 0 |
4081| output | 41 | 8065s 176 | 8070s | 8079s 0 |
4082| rk time stepping total | 35350 | 4.25e+04s 0 | 4.43e+04s | 4.515e+04s 193 |
4083| rk_stage - integrals L_h | 176750 | 2.936e+04s 134 | 3.222e+04s | 3.67e+04s 99 |
4084| rk_stage - inv mass + vec upd | 176750 | 7004s 99 | 1.207e+04s | 1.55e+04s 132 |
4085+-------------------------------------------+------------------+------------+------------------+
4086@endcode
4089step.
4090
4091
4092<a name="Possibilitiesforextensions"></a><h3>Possibilities for extensions</h3>
4093
4094
4098has been achieved by the <a href="https://github.com/kronbichler/exwave">exwave
4104
4113operations that are run separate from the mass matrix operation and compare
4115
4116
4117<a name="Moreadvancednumericalfluxfunctionsandskewsymmetricformulations"></a><h4>More advanced numerical flux functions and skew-symmetric formulations</h4>
4118
4119
4127
4136nonlinearities and higher-degree content from curved cells. A way out of this
4138simple variant. Skew symmetry means that switching the role of the solution
4149by special skew-symmetric finite difference schemes.
4150
4152https://github.com/kronbichler/advection_miniapp, where a
4154equation.
4155
4156<a name="Equippingthecodeforsupersoniccalculations"></a><h4>Equipping the code for supersonic calculations</h4>
4157
4158
4167
4172data,
4173@f[
4174\mathbf{w}^+ = \mathbf{w}^- = \begin{pmatrix} \rho^-\\
4175(\rho \mathbf u)^- \\ E^-\end{pmatrix} \quad
4176 \text{(Neumann)}.
4177@f]
4178
4180@code
4183 {
4184 w_p = w_m;
4185 at_outflow = true;
4186 }
4187@endcode
4188in the `local_apply_boundary_face()` function.
4189
4190<a name="ExtensiontothelinearizedEulerequations"></a><h4>Extension to the linearized Euler equations</h4>
4191
4192
4195background state, i.e., a given density, velocity and energy (or pressure)
4203quadrature points) or by a vector similar to the solution. Based on that
4204vector, we would create an additional FEEvaluation object to read from it and
4205provide the values of the field at quadrature points. If the background
4209
4217<a
4218href="https://en.wikipedia.org/wiki/Perfectly_matched_layer">perfectly
4220-- are common.
4221
4222
4223<a name="ExtensiontothecompressibleNavierStokesequations"></a><h4>Extension to the compressible Navier-Stokes equations</h4>
4224
4225
4229here despite the additional cost of elliptic terms, e.g. via an interior
4230penalty method, one can switch the basis from FE_DGQ to FE_DGQHermite like in
4231the @ref step_59 "step-59" tutorial program.
4232
4233
4234<a name="Usingcellcentricloopsandsharedmemory"></a><h4>Using cell-centric loops and shared memory</h4>
4235
4236
4237In this tutorial, we used face-centric loops. Here, cell and face integrals
4242processing all its 2d faces. Although this kind of loop implies that fluxes have
4243to be computed twice (for each side of an interior face), the fact that the
4246shared memory - already give a performance boost. If you are interested in these
4247advanced topics, you can take a look at @ref step_76 "step-76" where we take the present
4249 *
4250 *
4251<a name="PlainProg"></a>
4252<h1> The plain program</h1>
4253@include "step-67.cc"
4254*/
bool empty() const
Definition array_view.h:585
iterator end() const
Definition array_view.h:603
value_type * data() const noexcept
Definition array_view.h:553
void reinit(value_type *starting_element, const std::size_t n_elements)
Definition array_view.h:413
std::size_t size() const
Definition array_view.h:576
void write_vtu_in_parallel(const std::string &filename, const MPI_Comm comm) const
void add_data_vector(const VectorType &data, const std::vector< std::string > &names, const DataVectorType type=type_automatic, const std::vector< DataComponentInterpretation::DataComponentInterpretation > &data_component_interpretation={})
void set_dof_values(VectorType &dst, const unsigned int first_index=0, const std::bitset< VectorizedArrayType::size()> &mask=std::bitset< VectorizedArrayType::size()>().flip()) const
value_type get_value(const unsigned int q_point) const
Tensor< 2, dim, VectorizedArrayType > inverse_jacobian(const unsigned int q_point) const
const unsigned int n_q_points
void gather_evaluate(const VectorType &input_vector, const EvaluationFlags::EvaluationFlags evaluation_flag)
void evaluate(const EvaluationFlags::EvaluationFlags evaluation_flag)
unsigned int depth_console(const unsigned int n)
Definition logstream.cc:350
Abstract base class for mapping classes.
Definition mapping.h:317
void transform_from_q_points_to_basis(const unsigned int n_actual_components, const VectorizedArrayType *in_array, VectorizedArrayType *out_array) const
Definition operators.h:1192
void loop(const std::function< void(const MatrixFree< dim, Number, VectorizedArrayType > &, OutVector &, const InVector &, const std::pair< unsigned int, unsigned int > &)> &cell_operation, const std::function< void(const MatrixFree< dim, Number, VectorizedArrayType > &, OutVector &, const InVector &, const std::pair< unsigned int, unsigned int > &)> &face_operation, const std::function< void(const MatrixFree< dim, Number, VectorizedArrayType > &, OutVector &, const InVector &, const std::pair< unsigned int, unsigned int > &)> &boundary_operation, OutVector &dst, const InVector &src, const bool zero_dst_vector=false, const DataAccessOnFaces dst_vector_face_access=DataAccessOnFaces::unspecified, const DataAccessOnFaces src_vector_face_access=DataAccessOnFaces::unspecified) const
unsigned int n_active_entries_per_cell_batch(const unsigned int cell_batch_index) const
void cell_loop(const std::function< void(const MatrixFree< dim, Number, VectorizedArrayType > &, OutVector &, const InVector &, const std::pair< unsigned int, unsigned int > &)> &cell_operation, OutVector &dst, const InVector &src, const bool zero_dst_vector=false) const
Definition point.h:112
void print_wall_time_statistics(const MPI_Comm mpi_comm, const double print_quantile=0.) const
Definition timer.cc:842
#define DEAL_II_ALWAYS_INLINE
Definition config.h:106
#define DEAL_II_OPENMP_SIMD_PRAGMA
Definition config.h:138
#define DEAL_II_WITH_P4EST
Definition config.h:60
Point< 3 > center
Point< 3 > vertices[4]
DerivativeForm< 1, spacedim, dim, Number > transpose(const DerivativeForm< 1, dim, spacedim, Number > &DF)
Point< 2 > second
Definition grid_out.cc:4616
Point< 2 > first
Definition grid_out.cc:4615
unsigned int level
Definition grid_out.cc:4618
__global__ void set(Number *val, const Number s, const size_type N)
static ::ExceptionBase & ExcNotImplemented()
#define Assert(cond, exc)
#define AssertDimension(dim1, dim2)
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
void loop(ITERATOR begin, std_cxx20::type_identity_t< ITERATOR > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
Definition loop.h:439
UpdateFlags
@ update_values
Shape function values.
@ update_normal_vectors
Normal vectors.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
LogStream deallog
Definition logstream.cc:37
const Event initial
Definition event.cc:65
CGAL::Exact_predicates_exact_constructions_kernel_with_sqrt K
void apply(const Kokkos::TeamPolicy< MemorySpace::Default::kokkos_space::execution_space >::member_type &team_member, const Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_data, const ViewTypeIn in, ViewTypeOut out)
void approximate(SynchronousIterators< std::tuple< typename DoFHandler< dim, spacedim >::active_cell_iterator, Vector< float >::iterator > > const &cell, const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof_handler, const InputVector &solution, const unsigned int component)
The namespace for the EvaluationFlags enum.
void hyper_rectangle(Triangulation< dim, spacedim > &tria, const Point< dim > &p1, const Point< dim > &p2, const bool colorize=false)
void cylinder(Triangulation< dim > &tria, const double radius=1., const double half_length=1.)
void channel_with_cylinder(Triangulation< dim > &tria, const double shell_region_width=0.03, const unsigned int n_shells=2, const double skewness=2.0, const bool colorize=false)
void scale(const double scaling_factor, Triangulation< dim, spacedim > &triangulation)
double volume(const Triangulation< dim, spacedim > &tria)
@ valid
Iterator points to a valid object.
@ matrix
Contents is actually a matrix.
@ symmetric
Matrix is symmetric.
@ diagonal
Matrix is diagonal.
@ general
No special properties.
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim > > > &Du)
Definition divergence.h:472
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
Definition utilities.cc:189
SymmetricTensor< 2, dim, Number > E(const Tensor< 2, dim, Number > &F)
Tensor< 2, dim, Number > w(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
Tensor< 2, dim, Number > F(const Tensor< 2, dim, Number > &Grad_u)
void call(const std::function< RT()> &function, internal::return_value< RT > &ret_val)
@ LOW_STORAGE_RK_STAGE9_ORDER5
@ LOW_STORAGE_RK_STAGE3_ORDER3
@ LOW_STORAGE_RK_STAGE7_ORDER4
@ LOW_STORAGE_RK_STAGE5_ORDER4
VectorType::value_type * end(VectorType &V)
VectorType::value_type * begin(VectorType &V)
void free(T *&pointer)
Definition cuda.h:97
T sum(const T &t, const MPI_Comm mpi_communicator)
unsigned int n_mpi_processes(const MPI_Comm mpi_communicator)
Definition mpi.cc:150
T max(const T &t, const MPI_Comm mpi_communicator)
T min(const T &t, const MPI_Comm mpi_communicator)
unsigned int this_mpi_process(const MPI_Comm mpi_communicator)
Definition mpi.cc:161
std::string get_time()
const std::string get_current_vectorization_level()
Definition utilities.cc:939
constexpr T pow(const T base, const int iexp)
Definition utilities.h:447
Number truncate_to_n_digits(const Number number, const unsigned int n_digits)
Definition utilities.cc:579
std::string int_to_string(const unsigned int value, const unsigned int digits=numbers::invalid_unsigned_int)
Definition utilities.cc:471
void integrate_difference(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const InVector &fe_function, const Function< spacedim, typename InVector::value_type > &exact_solution, OutVector &difference, const Quadrature< dim > &q, const NormType &norm, const Function< spacedim, double > *weight=nullptr, const double exponent=2.)
void project(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const AffineConstraints< typename VectorType::value_type > &constraints, const Quadrature< dim > &quadrature, const Function< spacedim, typename VectorType::value_type > &function, VectorType &vec, const bool enforce_zero_boundary=false, const Quadrature< dim - 1 > &q_boundary=(dim > 1 ? QGauss< dim - 1 >(2) :Quadrature< dim - 1 >(0)), const bool project_to_boundary_first=false)
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
bool check(const ConstraintKinds kind_in, const unsigned int dim)
long double gamma(const unsigned int n)
DEAL_II_HOST constexpr TableIndices< 2 > merge(const TableIndices< 2 > &previous_indices, const unsigned int new_index, const unsigned int position)
unsigned int n_cells(const internal::TriangulationImplementation::NumberCache< 1 > &c)
Definition tria.cc:13826
int(&) functions(const void *v1, const void *v2)
static constexpr double PI
Definition numbers.h:259
STL namespace.
::VectorizedArray< Number, width > exp(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &, const Number p)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
Definition types.h:33
unsigned int boundary_id
Definition types.h:141
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation
DEAL_II_HOST constexpr Number determinant(const SymmetricTensor< 2, dim, Number > &)
std::array< Number, 1 > eigenvalues(const SymmetricTensor< 2, 1, Number > &T)
const TriangulationDescription::Settings settings