1348 *
template <
int dim>
1352 *
for (
unsigned int c = 0; c <
vp.
size(); ++c)
1362 * <a name=
"Linearsolversandpreconditioners"></a>
1373 * <a name=
"ThecodeInverseMatrixcodeclasstemplate"></a>
1392 *
const PreconditionerType &preconditioner);
1402 *
template <
class MatrixType,
class PreconditionerType>
1404 *
const MatrixType & m,
1405 *
const PreconditionerType &preconditioner)
1407 *
, preconditioner(&preconditioner)
1424 * tolerance, either.
1427 * template <class MatrixType, class PreconditionerType>
1428 * void InverseMatrix<MatrixType, PreconditionerType>::vmult(
1429 * Vector<double> & dst,
1430 * const Vector<double> &src) const
1432 * SolverControl solver_control(src.size(), 1e-6 * src.l2_norm());
1433 * SolverCG<Vector<double>> cg(solver_control);
1437 * cg.solve(*matrix, dst, src, *preconditioner);
1444 * <a name="ThecodeSchurComplementcodeclasstemplate"></a>
1445 * <h4>The <code>SchurComplement</code> class template</h4>
1449 * This class implements the Schur complement discussed in the introduction.
1450 * It is in analogy to @ref step_20 "step-20". Though, we now call it with a template
1451 * parameter <code>PreconditionerType</code> in order to access that when
1452 * specifying the respective type of the inverse matrix class. As a
1453 * consequence of the definition above, the declaration
1454 * <code>InverseMatrix</code> now contains the second template parameter for
1455 * a preconditioner class as above, which affects the
1456 * <code>SmartPointer</code> object <code>m_inverse</code> as well.
1459 * template <class PreconditionerType>
1460 * class SchurComplement : public Subscriptor
1464 * const BlockSparseMatrix<double> &system_matrix,
1465 * const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse);
1467 * void vmult(Vector<double> &dst, const Vector<double> &src) const;
1470 * const SmartPointer<const BlockSparseMatrix<double>> system_matrix;
1471 * const SmartPointer<
1472 * const InverseMatrix<SparseMatrix<double>, PreconditionerType>>
1475 * mutable Vector<double> tmp1, tmp2;
1480 * template <class PreconditionerType>
1481 * SchurComplement<PreconditionerType>::SchurComplement(
1482 * const BlockSparseMatrix<double> &system_matrix,
1483 * const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse)
1484 * : system_matrix(&system_matrix)
1485 * , A_inverse(&A_inverse)
1486 * , tmp1(system_matrix.block(0, 0).m())
1487 * , tmp2(system_matrix.block(0, 0).m())
1491 * template <class PreconditionerType>
1493 * SchurComplement<PreconditionerType>::vmult(Vector<double> & dst,
1494 * const Vector<double> &src) const
1496 * system_matrix->block(0, 1).vmult(tmp1, src);
1497 * A_inverse->vmult(tmp2, tmp1);
1498 * system_matrix->block(1, 0).vmult(dst, tmp2);
1505 * <a name="StokesProblemclassimplementation"></a>
1506 * <h3>StokesProblem class implementation</h3>
1511 * <a name="StokesProblemStokesProblem"></a>
1512 * <h4>StokesProblem::StokesProblem</h4>
1516 * The constructor of this class looks very similar to the one of
1517 * @ref step_20 "step-20". The constructor initializes the variables for the polynomial
1518 * degree, triangulation, finite element system and the dof handler. The
1519 * underlying polynomial functions are of order <code>degree+1</code> for
1520 * the vector-valued velocity components and of order <code>degree</code>
1521 * for the pressure. This gives the LBB-stable element pair
1522 * @f$Q_{degree+1}^d\times Q_{degree}@f$, often referred to as the Taylor-Hood
1527 * Note that we initialize the triangulation with a MeshSmoothing argument,
1528 * which ensures that the refinement of cells is done in a way that the
1529 * approximation of the PDE solution remains well-behaved (problems arise if
1530 * grids are too unstructured), see the documentation of
1531 * <code>Triangulation::MeshSmoothing</code> for details.
1534 * template <int dim>
1535 * StokesProblem<dim>::StokesProblem(const unsigned int degree)
1537 * , triangulation(Triangulation<dim>::maximum_smoothing)
1538 * , fe(FE_Q<dim>(degree + 1), dim, FE_Q<dim>(degree), 1)
1539 * , dof_handler(triangulation)
1546 * <a name="StokesProblemsetup_dofs"></a>
1547 * <h4>StokesProblem::setup_dofs</h4>
1551 * Given a mesh, this function associates the degrees of freedom with it and
1552 * creates the corresponding matrices and vectors. At the beginning it also
1553 * releases the pointer to the preconditioner object (if the shared pointer
1554 * pointed at anything at all at this point) since it will definitely not be
1555 * needed any more after this point and will have to be re-computed after
1556 * assembling the matrix, and unties the sparse matrices from their sparsity
1561 * We then proceed with distributing degrees of freedom and renumbering
1562 * them: In order to make the ILU preconditioner (in 3d) work efficiently,
1563 * it is important to enumerate the degrees of freedom in such a way that it
1564 * reduces the bandwidth of the matrix, or maybe more importantly: in such a
1565 * way that the ILU is as close as possible to a real LU decomposition. On
1566 * the other hand, we need to preserve the block structure of velocity and
1567 * pressure already seen in @ref step_20 "step-20" and @ref step_21 "step-21". This is done in two
1568 * steps: First, all dofs are renumbered to improve the ILU and then we
1569 * renumber once again by components. Since
1570 * <code>DoFRenumbering::component_wise</code> does not touch the
1571 * renumbering within the individual blocks, the basic renumbering from the
1572 * first step remains. As for how the renumber degrees of freedom to improve
1573 * the ILU: deal.II has a number of algorithms that attempt to find
1574 * orderings to improve ILUs, or reduce the bandwidth of matrices, or
1575 * optimize some other aspect. The DoFRenumbering namespace shows a
1576 * comparison of the results we obtain with several of these algorithms
1577 * based on the testcase discussed here in this tutorial program. Here, we
1578 * will use the traditional Cuthill-McKee algorithm already used in some of
1579 * the previous tutorial programs. In the <a href="#improved-ilu">section
1598 *
template <
int dim>
1602 *
system_matrix.clear();
1605 *
dof_handler.distribute_dofs(fe);
1638 *
constraints.clear();
1649 *
constraints.close();
1666 *
std::cout <<
" Number of active cells: " <<
triangulation.n_active_cells()
1668 *
<<
" Number of degrees of freedom: " << dof_handler.n_dofs()
1669 *
<<
" (" <<
n_u <<
'+' <<
n_p <<
')' << std::endl;
1678 *
In 3
d,
the function DoFTools::max_couplings_between_dofs
yields a
1683 * of most systems already for moderately-sized 3d problems, see also the
1684 * discussion in @ref step_18 "step-18". Instead, we first build temporary objects that use
1687 * BlockSparseMatrix objects; in a second step we then copy these objects
1688 * into objects of type BlockSparsityPattern. This is entirely analogous to
1689 * what we already did in @ref step_11 "step-11" and @ref step_18 "step-18". In particular, we make use of
1690 * the fact that we will never write into the @f$(1,1)@f$ block of the system
1691 * matrix and that this is the only block to be filled for the
1692 * preconditioner matrix.
1696 * All this is done inside new scopes, which means that the memory of
1697 * <code>dsp</code> will be released once the information has been copied to
1698 * <code>sparsity_pattern</code>.
1702 * BlockDynamicSparsityPattern dsp(dofs_per_block, dofs_per_block);
1704 * Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
1705 * for (unsigned int c = 0; c < dim + 1; ++c)
1706 * for (unsigned int d = 0; d < dim + 1; ++d)
1707 * if (!((c == dim) && (d == dim)))
1708 * coupling[c][d] = DoFTools::always;
1710 * coupling[c][d] = DoFTools::none;
1712 * DoFTools::make_sparsity_pattern(
1713 * dof_handler, coupling, dsp, constraints, false);
1715 * sparsity_pattern.copy_from(dsp);
1719 * BlockDynamicSparsityPattern preconditioner_dsp(dofs_per_block,
1722 * Table<2, DoFTools::Coupling> preconditioner_coupling(dim + 1, dim + 1);
1723 * for (unsigned int c = 0; c < dim + 1; ++c)
1724 * for (unsigned int d = 0; d < dim + 1; ++d)
1725 * if (((c == dim) && (d == dim)))
1726 * preconditioner_coupling[c][d] = DoFTools::always;
1728 * preconditioner_coupling[c][d] = DoFTools::none;
1730 * DoFTools::make_sparsity_pattern(dof_handler,
1731 * preconditioner_coupling,
1732 * preconditioner_dsp,
1736 * preconditioner_sparsity_pattern.copy_from(preconditioner_dsp);
1741 * Finally, the system matrix, the preconsitioner matrix, the solution and
1742 * the right hand side vector are created from the block structure similar
1743 * to the approach in @ref step_20 "step-20":
1746 * system_matrix.reinit(sparsity_pattern);
1747 * preconditioner_matrix.reinit(preconditioner_sparsity_pattern);
1749 * solution.reinit(dofs_per_block);
1750 * system_rhs.reinit(dofs_per_block);
1757 * <a name="StokesProblemassemble_system"></a>
1758 * <h4>StokesProblem::assemble_system</h4>
1762 * The assembly process follows the discussion in @ref step_20 "step-20" and in the
1763 * introduction. We use the well-known abbreviations for the data structures
1764 * that hold the local matrices, right hand side, and global numbering of the
1765 * degrees of freedom for the present cell.
1768 * template <int dim>
1769 * void StokesProblem<dim>::assemble_system()
1771 * system_matrix = 0;
1773 * preconditioner_matrix = 0;
1775 * QGauss<dim> quadrature_formula(degree + 2);
1777 * FEValues<dim> fe_values(fe,
1778 * quadrature_formula,
1779 * update_values | update_quadrature_points |
1780 * update_JxW_values | update_gradients);
1782 * const unsigned int dofs_per_cell = fe.n_dofs_per_cell();
1784 * const unsigned int n_q_points = quadrature_formula.size();
1786 * FullMatrix<double> local_matrix(dofs_per_cell, dofs_per_cell);
1787 * FullMatrix<double> local_preconditioner_matrix(dofs_per_cell,
1789 * Vector<double> local_rhs(dofs_per_cell);
1791 * std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
1793 * const RightHandSide<dim> right_hand_side;
1794 * std::vector<Tensor<1, dim>> rhs_values(n_q_points, Tensor<1, dim>());
1798 * Next, we need two objects that work as extractors for the FEValues
1799 * object. Their use is explained in detail in the report on @ref
1803 * const FEValuesExtractors::Vector velocities(0);
1804 * const FEValuesExtractors::Scalar pressure(dim);
1808 * As an extension over @ref step_20 "step-20" and @ref step_21 "step-21", we include a few optimizations
1809 * that make assembly much faster for this particular problem. The
1810 * improvements are based on the observation that we do a few calculations
1811 * too many times when we do as in @ref step_20 "step-20": The symmetric gradient actually
1812 * has <code>dofs_per_cell</code> different values per quadrature point, but
1813 * we extract it <code>dofs_per_cell*dofs_per_cell</code> times from the
1814 * FEValues object - for both the loop over <code>i</code> and the inner
1815 * loop over <code>j</code>. In 3d, that means evaluating it @f$89^2=7921@f$
1816 * instead of @f$89@f$ times, a not insignificant difference.
1834 *
std::vector<SymmetricTensor<2, dim>>
symgrad_phi_u(dofs_per_cell);
1835 *
std::vector<double>
div_phi_u(dofs_per_cell);
1836 *
std::vector<Tensor<1, dim>>
phi_u(dofs_per_cell);
1837 *
std::vector<double>
phi_p(dofs_per_cell);
1839 *
for (
const auto &cell : dof_handler.active_cell_iterators())
1841 *
fe_values.
reinit(cell);
1849 *
for (
unsigned int q = 0;
q < n_q_points; ++
q)
1851 *
for (
unsigned int k = 0;
k < dofs_per_cell; ++
k)
1892 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
1894 *
for (
unsigned int j = 0;
j <= i; ++
j)
1900 *
* fe_values.JxW(
q);
1904 *
* fe_values.JxW(
q);
1926 *
* fe_values.JxW(
q);
1940 *
of the local matrices.
1943 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
1944 *
for (
unsigned int j = i + 1;
j < dofs_per_cell; ++
j)
1951 *
cell->get_dof_indices(local_dof_indices);
1954 *
local_dof_indices,
1958 *
local_dof_indices,
1964 *
Before we're going to solve this linear system, we generate a
1965 * preconditioner for the velocity-velocity matrix, i.e.,
1966 * <code>block(0,0)</code> in the system matrix. As mentioned above, this
1967 * depends on the spatial dimension. Since the two classes described by
1968 * the <code>InnerPreconditioner::type</code> alias have the same
1969 * interface, we do not have to do anything different whether we want to
1970 * use a sparse direct solver or an ILU:
1973 * std::cout << " Computing preconditioner..." << std::endl << std::flush;
1975 * A_preconditioner =
1976 * std::make_shared<typename InnerPreconditioner<dim>::type>();
1977 * A_preconditioner->initialize(
1978 * system_matrix.block(0, 0),
1979 * typename InnerPreconditioner<dim>::type::AdditionalData());
1987 * <a name="StokesProblemsolve"></a>
1988 * <h4>StokesProblem::solve</h4>
1992 * After the discussion in the introduction and the definition of the
1993 * respective classes above, the implementation of the <code>solve</code>
1994 * function is rather straight-forward and done in a similar way as in
1995 * @ref step_20 "step-20". To start with, we need an object of the
1996 * <code>InverseMatrix</code> class that represents the inverse of the
1997 * matrix A. As described in the introduction, the inverse is generated with
1998 * the help of an inner preconditioner of type
1999 * <code>InnerPreconditioner::type</code>.
2002 * template <int dim>
2003 * void StokesProblem<dim>::solve()
2005 * const InverseMatrix<SparseMatrix<double>,
2006 * typename InnerPreconditioner<dim>::type>
2007 * A_inverse(system_matrix.block(0, 0), *A_preconditioner);
2008 * Vector<double> tmp(solution.block(0).size());
2012 * This is as in @ref step_20 "step-20". We generate the right hand side @f$B A^{-1} F - G@f$
2013 * for the Schur complement and an object that represents the respective
2014 * linear operation @f$B A^{-1} B^T@f$, now with a template parameter
2015 * indicating the preconditioner - in accordance with the definition of
2020 * Vector<double> schur_rhs(solution.block(1).size());
2021 * A_inverse.vmult(tmp, system_rhs.block(0));
2022 * system_matrix.block(1, 0).vmult(schur_rhs, tmp);
2023 * schur_rhs -= system_rhs.block(1);
2025 * SchurComplement<typename InnerPreconditioner<dim>::type> schur_complement(
2026 * system_matrix, A_inverse);
2030 * The usual control structures for the solver call are created...
2033 * SolverControl solver_control(solution.block(1).size(),
2034 * 1e-6 * schur_rhs.l2_norm());
2035 * SolverCG<Vector<double>> cg(solver_control);
2039 * Now to the preconditioner to the Schur complement. As explained in
2040 * the introduction, the preconditioning is done by a @ref GlossMassMatrix "mass matrix" in the
2041 * pressure variable.
2045 * Actually, the solver needs to have the preconditioner in the form
2046 * @f$P^{-1}@f$, so we need to create an inverse operation. Once again, we
2047 * use an object of the class <code>InverseMatrix</code>, which
2048 * implements the <code>vmult</code> operation that is needed by the
2049 * solver. In this case, we have to invert the pressure mass matrix. As
2050 * it already turned out in earlier tutorial programs, the inversion of
2051 * a mass matrix is a rather cheap and straight-forward operation
2052 * (compared to, e.g., a Laplace matrix). The CG method with ILU
2053 * preconditioning converges in 5-10 steps, independently on the mesh
2054 * size. This is precisely what we do here: We choose another ILU
2055 * preconditioner and take it along to the InverseMatrix object via the
2056 * corresponding template parameter. A CG solver is then called within
2057 * the vmult operation of the inverse matrix.
2061 * An alternative that is cheaper to build, but needs more iterations
2062 * afterwards, would be to choose a SSOR preconditioner with factor
2063 * 1.2. It needs about twice the number of iterations, but the costs for
2064 * its generation are almost negligible.
2067 * SparseILU<double> preconditioner;
2068 * preconditioner.initialize(preconditioner_matrix.block(1, 1),
2069 * SparseILU<double>::AdditionalData());
2071 * InverseMatrix<SparseMatrix<double>, SparseILU<double>> m_inverse(
2072 * preconditioner_matrix.block(1, 1), preconditioner);
2076 * With the Schur complement and an efficient preconditioner at hand, we
2077 * can solve the respective equation for the pressure (i.e. block 0 in
2078 * the solution vector) in the usual way:
2081 * cg.solve(schur_complement, solution.block(1), schur_rhs, m_inverse);
2085 * After this first solution step, the hanging node constraints have to
2086 * be distributed to the solution in order to achieve a consistent
2090 * constraints.distribute(solution);
2092 * std::cout << " " << solver_control.last_step()
2093 * << " outer CG Schur complement iterations for pressure"
2099 * As in @ref step_20 "step-20", we finally need to solve for the velocity equation where
2100 * we plug in the solution to the pressure equation. This involves only
2101 * objects we already know - so we simply multiply @f$p@f$ by @f$B^T@f$, subtract
2102 * the right hand side and multiply by the inverse of @f$A@f$. At the end, we
2103 * need to distribute the constraints from hanging nodes in order to
2104 * obtain a consistent flow field:
2108 * system_matrix.block(0, 1).vmult(tmp, solution.block(1));
2110 * tmp += system_rhs.block(0);
2112 * A_inverse.vmult(solution.block(0), tmp);
2114 * constraints.distribute(solution);
2122 * <a name="StokesProblemoutput_results"></a>
2123 * <h4>StokesProblem::output_results</h4>
2127 * The next function generates graphical output. In this example, we are
2128 * going to use the VTK file format. We attach names to the individual
2129 * variables in the problem: <code>velocity</code> to the <code>dim</code>
2130 * components of velocity and <code>pressure</code> to the pressure.
2134 * Not all visualization programs have the ability to group individual
2135 * vector components into a vector to provide vector plots; in particular,
2136 * this holds for some VTK-based visualization programs. In this case, the
2137 * logical grouping of components into vectors should already be described
2138 * in the file containing the data. In other words, what we need to do is
2139 * provide our output writers with a way to know which of the components of
2140 * the finite element logically form a vector (with @f$d@f$ components in @f$d@f$
2141 * space dimensions) rather than letting them assume that we simply have a
2142 * bunch of scalar fields. This is achieved using the members of the
2143 * <code>DataComponentInterpretation</code> namespace: as with the filename,
2144 * we create a vector in which the first <code>dim</code> components refer
2145 * to the velocities and are given the tag
2146 * DataComponentInterpretation::component_is_part_of_vector; we
2147 * finally push one tag
2148 * DataComponentInterpretation::component_is_scalar to describe
2149 * the grouping of the pressure variable.
2153 * The rest of the function is then the same as in @ref step_20 "step-20".
2156 * template <int dim>
2158 * StokesProblem<dim>::output_results(const unsigned int refinement_cycle) const
2160 * std::vector<std::string> solution_names(dim, "velocity");
2161 * solution_names.emplace_back("pressure");
2163 * std::vector<DataComponentInterpretation::DataComponentInterpretation>
2164 * data_component_interpretation(
2165 * dim, DataComponentInterpretation::component_is_part_of_vector);
2166 * data_component_interpretation.push_back(
2167 * DataComponentInterpretation::component_is_scalar);
2169 * DataOut<dim> data_out;
2170 * data_out.attach_dof_handler(dof_handler);
2171 * data_out.add_data_vector(solution,
2173 * DataOut<dim>::type_dof_data,
2174 * data_component_interpretation);
2175 * data_out.build_patches();
2177 * std::ofstream output(
2178 * "solution-" + Utilities::int_to_string(refinement_cycle, 2) + ".vtk");
2179 * data_out.write_vtk(output);
2186 * <a name="StokesProblemrefine_mesh"></a>
2187 * <h4>StokesProblem::refine_mesh</h4>
2191 * This is the last interesting function of the <code>StokesProblem</code>
2192 * class. As indicated by its name, it takes the solution to the problem
2193 * and refines the mesh where this is needed. The procedure is the same as
2194 * in the respective step in @ref step_6 "step-6", with the exception that we base the
2195 * refinement only on the change in pressure, i.e., we call the Kelly error
2196 * estimator with a mask object of type ComponentMask that selects the
2197 * single scalar component for the pressure that we are interested in (we
2198 * get such a mask from the finite element class by specifying the component
2199 * we want). Additionally, we do not coarsen the grid again:
2202 * template <int dim>
2203 * void StokesProblem<dim>::refine_mesh()
2205 * Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
2207 * const FEValuesExtractors::Scalar pressure(dim);
2208 * KellyErrorEstimator<dim>::estimate(
2210 * QGauss<dim - 1>(degree + 1),
2211 * std::map<types::boundary_id, const Function<dim> *>(),
2213 * estimated_error_per_cell,
2214 * fe.component_mask(pressure));
2216 * GridRefinement::refine_and_coarsen_fixed_number(triangulation,
2217 * estimated_error_per_cell,
2220 * triangulation.execute_coarsening_and_refinement();
2227 * <a name="StokesProblemrun"></a>
2228 * <h4>StokesProblem::run</h4>
2232 * The last step in the Stokes class is, as usual, the function that
2233 * generates the initial grid and calls the other functions in the
2238 * We start off with a rectangle of size @f$4 \times 1@f$ (in 2d) or @f$4 \times 1
2239 * \times 1@f$ (in 3d), placed in @f$R^2/R^3@f$ as @f$(-2,2)\times(-1,0)@f$ or
2240 * @f$(-2,2)\times(0,1)\times(-1,0)@f$, respectively. It is natural to start
2241 * with equal mesh size in each direction, so we subdivide the initial
2242 * rectangle four times in the first coordinate direction. To limit the
2243 * scope of the variables involved in the creation of the mesh to the range
2244 * where we actually need them, we put the entire block between a pair of
2248 * template <int dim>
2249 * void StokesProblem<dim>::run()
2252 * std::vector<unsigned int> subdivisions(dim, 1);
2253 * subdivisions[0] = 4;
2255 * const Point<dim> bottom_left = (dim == 2 ?
2256 * Point<dim>(-2, -1) : // 2d case
2257 * Point<dim>(-2, 0, -1)); // 3d case
2259 * const Point<dim> top_right = (dim == 2 ?
2260 * Point<dim>(2, 0) : // 2d case
2261 * Point<dim>(2, 1, 0)); // 3d case
2263 * GridGenerator::subdivided_hyper_rectangle(triangulation,
2271 * A boundary indicator of 1 is set to all boundaries that are subject to
2272 * Dirichlet boundary conditions, i.e. to faces that are located at 0 in
2273 * the last coordinate direction. See the example description above for
2277 * for (const auto &cell : triangulation.active_cell_iterators())
2278 * for (const auto &face : cell->face_iterators())
2279 * if (face->center()[dim - 1] == 0)
2280 * face->set_all_boundary_ids(1);
2285 * We then apply an initial refinement before solving for the first
2286 * time. In 3d, there are going to be more degrees of freedom, so we
2287 * refine less there:
2290 * triangulation.refine_global(4 - dim);
2294 * As first seen in @ref step_6 "step-6", we cycle over the different refinement levels
2295 * and refine (except for the first cycle), setup the degrees of freedom
2296 * and matrices, assemble, solve and create output:
2299 * for (unsigned int refinement_cycle = 0; refinement_cycle < 6;
2300 * ++refinement_cycle)
2302 * std::cout << "Refinement cycle " << refinement_cycle << std::endl;
2304 * if (refinement_cycle > 0)
2309 * std::cout << " Assembling..." << std::endl << std::flush;
2310 * assemble_system();
2312 * std::cout << " Solving..." << std::flush;
2315 * output_results(refinement_cycle);
2317 * std::cout << std::endl;
2320 * } // namespace Step22
2326 * <a name="Thecodemaincodefunction"></a>
2327 * <h3>The <code>main</code> function</h3>
2331 * The main function is the same as in @ref step_20 "step-20". We pass the element degree as
2332 * a parameter and choose the space dimension at the well-known template slot.
2339 * using namespace Step22;
2341 * StokesProblem<2> flow_problem(1);
2342 * flow_problem.run();
2344 * catch (std::exception &exc)
2346 * std::cerr << std::endl
2348 * << "----------------------------------------------------"
2350 * std::cerr << "Exception on processing: " << std::endl
2351 * << exc.what() << std::endl
2352 * << "Aborting!" << std::endl
2353 * << "----------------------------------------------------"
2360 * std::cerr << std::endl
2362 * << "----------------------------------------------------"
2364 * std::cerr << "Unknown exception!" << std::endl
2365 * << "Aborting!" << std::endl
2366 * << "----------------------------------------------------"
2374<a name="Results"></a>
2375<a name="Results"></a><h1>Results</h1>
2378<a name="Outputoftheprogramandgraphicalvisualization"></a><h3>Output of the program and graphical visualization</h3>
2381<a name="2Dcalculations"></a><h4>2D calculations</h4>
2384Running the program with the space dimension set to 2 in the <code>main</code>
2385function yields the following output (in "release mode",
2386See also <a href="http://www.math.colostate.edu/~bangerth/videos.676.18.html">video lecture 18</a>.):
2388examples/step-22> make run
2390 Number of active cells: 64
2391 Number of degrees of freedom: 679 (594+85)
2393 Computing preconditioner...
2394 Solving... 11 outer CG Schur complement iterations for pressure
2397 Number of active cells: 160
2398 Number of degrees of freedom: 1683 (1482+201)
2400 Computing preconditioner...
2401 Solving... 11 outer CG Schur complement iterations for pressure
2404 Number of active cells: 376
2405 Number of degrees of freedom: 3813 (3370+443)
2407 Computing preconditioner...
2408 Solving... 11 outer CG Schur complement iterations for pressure
2411 Number of active cells: 880
2412 Number of degrees of freedom: 8723 (7722+1001)
2414 Computing preconditioner...
2415 Solving... 11 outer CG Schur complement iterations for pressure
2418 Number of active cells: 2008
2419 Number of degrees of freedom: 19383 (17186+2197)
2421 Computing preconditioner...
2422 Solving... 11 outer CG Schur complement iterations for pressure
2425 Number of active cells: 4288
2426 Number of degrees of freedom: 40855 (36250+4605)
2428 Computing preconditioner...
2429 Solving... 11 outer CG Schur complement iterations for pressure
2432The entire computation above takes about 2 seconds on a reasonably
2433quick (for 2015 standards) machine.
2435What we see immediately from this is that the number of (outer)
2436iterations does not increase as we refine the mesh. This confirms the
2437statement in the introduction that preconditioning the Schur
2438complement with the mass matrix indeed yields a matrix spectrally
2439equivalent to the identity matrix (i.e. with eigenvalues bounded above
2440and below independently of the mesh size or the relative sizes of
2441cells). In other words, the mass matrix and the Schur complement are
2442spectrally equivalent.
2444In the images below, we show the grids for the first six refinement
2445steps in the program. Observe how the grid is refined in regions
2446where the solution rapidly changes: On the upper boundary, we have
2447Dirichlet boundary conditions that are -1 in the left half of the line
2448and 1 in the right one, so there is an abrupt change at @f$x=0@f$. Likewise,
2449there are changes from Dirichlet to Neumann data in the two upper
2450corners, so there is need for refinement there as well:
2452<table width="60%" align="center">
2455 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-0.png" alt="">
2458 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-1.png" alt="">
2463 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-2.png" alt="">
2466 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-3.png" alt="">
2471 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-4.png" alt="">
2474 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-5.png" alt="">
2479Finally, following is a plot of the flow field. It shows fluid
2480transported along with the moving upper boundary and being replaced by
2481material coming from below:
2483<img src="https://www.dealii.org/images/steps/developer/step-22.2d.solution.png" alt="">
2485This plot uses the capability of VTK-based visualization programs (in
2486this case of VisIt) to show vector data; this is the result of us
2487declaring the velocity components of the finite element in use to be a
2488set of vector components, rather than independent scalar components in
2489the <code>StokesProblem@<dim@>::%output_results</code> function of this
2494<a name="3Dcalculations"></a><h4>3D calculations</h4>
2497In 3d, the screen output of the program looks like this:
2501 Number of active cells: 32
2502 Number of degrees of freedom: 1356 (1275+81)
2504 Computing preconditioner...
2505 Solving... 13 outer CG Schur complement iterations for pressure.
2508 Number of active cells: 144
2509 Number of degrees of freedom: 5088 (4827+261)
2511 Computing preconditioner...
2512 Solving... 14 outer CG Schur complement iterations for pressure.
2515 Number of active cells: 704
2516 Number of degrees of freedom: 22406 (21351+1055)
2518 Computing preconditioner...
2519 Solving... 14 outer CG Schur complement iterations for pressure.
2522 Number of active cells: 3168
2523 Number of degrees of freedom: 93176 (89043+4133)
2525 Computing preconditioner...
2526 Solving... 15 outer CG Schur complement iterations for pressure.
2529 Number of active cells: 11456
2530 Number of degrees of freedom: 327808 (313659+14149)
2532 Computing preconditioner...
2533 Solving... 15 outer CG Schur complement iterations for pressure.
2536 Number of active cells: 45056
2537 Number of degrees of freedom: 1254464 (1201371+53093)
2539 Computing preconditioner...
2540 Solving... 14 outer CG Schur complement iterations for pressure.
2543Again, we see that the number of outer iterations does not increase as
2544we refine the mesh. Nevertheless, the compute time increases
2545significantly: for each of the iterations above separately, it takes about
25460.14 seconds, 0.63 seconds, 4.8 seconds, 35 seconds, 2 minutes and 33 seconds,
2547and 13 minutes and 12 seconds. This overall superlinear (in the number of
2548unknowns) increase in runtime is due to the fact that our inner solver is not
2549@f${\cal O}(N)@f$: a simple experiment shows that as we keep refining the mesh, the
2550average number of ILU-preconditioned CG iterations to invert the
2551velocity-velocity block @f$A@f$ increases.
2553We will address the question of how possibly to improve our solver <a
2554href="#improved-solver">below</a>.
2556As for the graphical output, the grids generated during the solution
2559<table width="60%" align="center">
2562 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-0.png" alt="">
2565 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-1.png" alt="">
2570 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-2.png" alt="">
2573 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-3.png" alt="">
2578 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-4.png" alt="">
2581 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-5.png" alt="">
2586Again, they show essentially the location of singularities introduced
2587by boundary conditions. The vector field computed makes for an
2590<img src="https://www.dealii.org/images/steps/developer/step-22.3d.solution.png" alt="">
2592The isocontours shown here as well are those of the pressure
2593variable, showing the singularity at the point of discontinuous
2594velocity boundary conditions.
2598<a name="Sparsitypattern"></a><h3>Sparsity pattern</h3>
2601As explained during the generation of the sparsity pattern, it is
2602important to have the numbering of degrees of freedom in mind when
2603using preconditioners like incomplete LU decompositions. This is most
2604conveniently visualized using the distribution of nonzero elements in
2605the @ref GlossStiffnessMatrix "stiffness matrix".
2614<
img src=
"https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-nor.png" alt=
"">
2620 std::ofstream out (
"sparsity_pattern.gpl");
2621 sparsity_pattern.print_gnuplot(out);
2635<
img src=
"https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-ren.png" alt=
"">
2732<a name="block-
schur"></a>
2754 A^{-1} & 0 \\ S^{-1}
B A^{-1} & -S^{-1}
2764 A^{-1} & 0 \\ S^{-1}
B A^{-1} & -S^{-1}
2770 I & A^{-1}
B^T \\ 0 &
I
2789complement preconditioner.
The <code>vmult</code> operation
for block vectors
2793template <
class PreconditionerA,
class PreconditionerMp>
2814template <
class PreconditionerA,
class PreconditionerMp>
2824 tmp (S.block(1,1).m())
2829template <
class PreconditionerA,
class PreconditionerMp>
2839 system_matrix->block(1,0).residual(tmp, dst.block(0), src.block(1));
2863<a
href=
"http://en.wikipedia.org/wiki/GMRES#Comparison_with_other_solvers">
Wikipedia</a>'s
2864article on the GMRES method gives a comparative presentation.
2865A more comprehensive and well-founded comparison can be read e.g. in the book by
2866J.W. Demmel (Applied Numerical Linear Algebra, SIAM, 1997, section 6.6.6).
2868For our specific problem with the ILU preconditioner for @f$A@f$, we certainly need
2869to perform hundreds of iterations on the block system for large problem sizes
2877basis
to 30 vectors
by default.
2885(one iteration uses only results from one preceding step and
2886not all the steps as GMRES). Besides the fact the BiCGStab is more expensive per
2887step since two matrix-vector products are needed (compared to one for
2888CG or GMRES), there is one main reason which makes BiCGStab not appropriate for
2889this problem: The preconditioner applies the inverse of the pressure
2890mass matrix by using the InverseMatrix class. Since the application of the
2891inverse matrix to a vector is done only in approximative way (an exact inverse
2892is too expensive), this will also affect the solver. In the case of BiCGStab,
2893the Krylov vectors will not be orthogonal due to that perturbation. While
2894this is uncritical for a small number of steps (up to about 50), it ruins the
2895performance of the solver when these perturbations have grown to a significant
2896magnitude in the coarse of iterations.
2898We did some experiments with BiCGStab and found it to
2899be faster than GMRES up to refinement cycle 3 (in 3D), but it became very slow
2900for cycles 4 and 5 (even slower than the original Schur complement), so the
2901solver is useless in this situation. Choosing a sharper tolerance for the
2902inverse matrix class (<code>1e-10*src.l2_norm()</code> instead of
2903<code>1e-6*src.l2_norm()</code>) made BiCGStab perform well also for cycle 4,
2904but did not change the failure on the very large problems.
2906GMRES is of course also effected by the approximate inverses, but it is not as
2907sensitive to orthogonality and retains a relatively good performance also for
2908large sizes, see the results below.
2910With this said, we turn to the realization of the solver call with GMRES with
2911@f$k=100@f$ temporary vectors:
2914 const SparseMatrix<double> &pressure_mass_matrix
2915 = preconditioner_matrix.block(1,1);
2916 SparseILU<double> pmass_preconditioner;
2917 pmass_preconditioner.initialize (pressure_mass_matrix,
2918 SparseILU<double>::AdditionalData());
2920 InverseMatrix<SparseMatrix<double>,SparseILU<double> >
2921 m_inverse (pressure_mass_matrix, pmass_preconditioner);
2923 BlockSchurPreconditioner<typename InnerPreconditioner<dim>::type,
2925 preconditioner (system_matrix, m_inverse, *A_preconditioner);
2927 SolverControl solver_control (system_matrix.m(),
2928 1e-6*system_rhs.l2_norm());
2929 GrowingVectorMemory<BlockVector<double> > vector_memory;
2930 SolverGMRES<BlockVector<double> >::AdditionalData gmres_data;
2931 gmres_data.max_n_tmp_vectors = 100;
2933 SolverGMRES<BlockVector<double> > gmres(solver_control, vector_memory,
2936 gmres.solve(system_matrix, solution, system_rhs,
2939 constraints.distribute (solution);
2942 << solver_control.last_step()
2943 << " block GMRES iterations";
2946Obviously, one needs to add the include file @ref SolverGMRES
2947"<lac/solver_gmres.h>" in order to make this run.
2948We call the solver with a BlockVector template in order to enable
2949GMRES to operate on block vectors and matrices.
2950Note also that we need to set the (1,1) block in the system
2951matrix to zero (we saved the pressure mass matrix there which is not part of the
2952problem) after we copied the information to another matrix.
2954Using the Timer class, we collect some statistics that compare the runtime
2955of the block solver with the one from the problem implementation above.
2956Besides the solution with the two options we also check if the solutions
2957of the two variants are close to each other (i.e. this solver gives indeed the
2958same solution as we had before) and calculate the infinity
2959norm of the vector difference.
2965 Number
of degrees
of freedom: 679 (594+85) [0.00162792 s]
2967 Computing preconditioner... [0.0025959 s]
2975 Number
of degrees
of freedom: 1683 (1482+201) [0.00345707 s]
2977 Computing preconditioner... [0.00605702 s]
2985 Number
of degrees
of freedom: 3813 (3370+443) [0.00729299 s]
2987 Computing preconditioner... [0.0167508 s]
2995 Number
of degrees
of freedom: 8723 (7722+1001) [0.017709 s]
2997 Computing preconditioner... [0.0435679 s]
3005 Number
of degrees
of freedom: 19383 (17186+2197) [0.039988 s]
3007 Computing preconditioner... [0.118314 s]
3015 Number
of degrees
of freedom: 40855 (36250+4605) [0.0880702 s]
3017 Computing preconditioner... [0.278339 s]
3036 Number
of degrees
of freedom: 1356 (1275+81) [0.00845218 s]
3038 Computing preconditioner... [0.00712395 s]
3046 Number
of degrees
of freedom: 5088 (4827+261) [0.0346942 s]
3048 Computing preconditioner... [0.0465031 s]
3056 Number
of degrees
of freedom: 22406 (21351+1055) [0.175669 s]
3058 Computing preconditioner... [0.286435 s]
3066 Number
of degrees
of freedom: 93176 (89043+4133) [0.790985 s]
3076 Number
of degrees
of freedom: 327808 (313659+14149) [3.44995 s]
3086 Number
of degrees
of freedom: 1254464 (1201371+53093) [19.6795 s]
3102<a name=
"Combiningtheblockpreconditionerandmultigrid"></a><
h5>
Combining the block preconditioner
and multigrid</
h5>
3112<a name=
"Noblockmatricesandvectors"></a><
h5>
No block matrices
and vectors</
h5>
3141 ExcIndexRange (component, 0, this->n_components));
3143 const double x_offset = std::atan(p[1]*4)/3;
3160 Point<dim>(-2,-2,-1));
3171<table width=
"60%" align=
"center">
3174 <
img src=
"https://www.dealii.org/images/steps/developer/step-22.3d-extension.png" alt=
"">
3177 <
img src=
"https://www.dealii.org/images/steps/developer/step-22.3d-grid-extension.png" alt=
"">
3183<a name=
"PlainProg"></a>
value_type * data() const noexcept
void reinit(value_type *starting_element, const std::size_t n_elements)
__global__ void set(Number *val, const Number s, const size_type N)
#define Assert(cond, exc)
void loop(ITERATOR begin, std_cxx20::type_identity_t< ITERATOR > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
void make_hanging_node_constraints(const DoFHandler< dim, spacedim > &dof_handler, AffineConstraints< number > &constraints)
void make_sparsity_pattern(const DoFHandler< dim, spacedim > &dof_handler, SparsityPatternBase &sparsity_pattern, const AffineConstraints< number > &constraints=AffineConstraints< number >(), const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
void component_wise(DoFHandler< dim, spacedim > &dof_handler, const std::vector< unsigned int > &target_component=std::vector< unsigned int >())
void Cuthill_McKee(DoFHandler< dim, spacedim > &dof_handler, const bool reversed_numbering=false, const bool use_constraints=false, const std::vector< types::global_dof_index > &starting_indices=std::vector< types::global_dof_index >())
void subdivided_hyper_rectangle(Triangulation< dim, spacedim > &tria, const std::vector< unsigned int > &repetitions, const Point< dim > &p1, const Point< dim > &p2, const bool colorize=false)
@ matrix
Contents is actually a matrix.
@ symmetric
Matrix is symmetric.
@ diagonal
Matrix is diagonal.
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
int(&) functions(const void *v1, const void *v2)
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation
DEAL_II_HOST constexpr SymmetricTensor< 2, dim, Number > invert(const SymmetricTensor< 2, dim, Number > &)
std::array< Number, 1 > eigenvalues(const SymmetricTensor< 2, 1, Number > &T)