Reference documentation for deal.II version 9.5.1
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
step-32.h
Go to the documentation of this file.
1
1510 *   constexpr double kappa = 1e-6; /* m^2 / s */
1511 *   constexpr double reference_density = 3300; /* kg / m^3 */
1512 *   constexpr double reference_temperature = 293; /* K */
1513 *   constexpr double expansion_coefficient = 2e-5; /* 1/K */
1514 *   constexpr double specific_heat = 1250; /* J / K / kg */
1515 *   constexpr double radiogenic_heating = 7.4e-12; /* W / kg */
1516 *  
1517 *  
1518 *   constexpr double R0 = 6371000. - 2890000.; /* m */
1519 *   constexpr double R1 = 6371000. - 35000.; /* m */
1520 *  
1521 *   constexpr double T0 = 4000 + 273; /* K */
1522 *   constexpr double T1 = 700 + 273; /* K */
1523 *  
1524 *  
1525 * @endcode
1526 *
1528 * as a function of temperature, the gravity vector, and the initial
1529 * values for the temperature. Again, all of these (along with the values
1530 * they compute) are discussed in the introduction:
1531 *
1532 * @code
1533 *   double density(const double temperature)
1534 *   {
1535 *   return (
1538 *   }
1539 *  
1540 *  
1541 *   template <int dim>
1543 *   {
1544 *   const double r = p.norm();
1545 *   return -(1.245e-6 * r + 7.714e13 / r / r) * p / r;
1546 *   }
1547 *  
1548 *  
1549 *  
1550 *   template <int dim>
1551 *   class TemperatureInitialValues : public Function<dim>
1552 *   {
1553 *   public:
1555 *   : Function<dim>(1)
1556 *   {}
1557 *  
1558 *   virtual double value(const Point<dim> & p,
1559 *   const unsigned int component = 0) const override;
1560 *  
1561 *   virtual void vector_value(const Point<dim> &p,
1562 *   Vector<double> & value) const override;
1563 *   };
1564 *  
1565 *  
1566 *  
1567 *   template <int dim>
1569 *   const unsigned int) const
1570 *   {
1571 *   const double r = p.norm();
1572 *   const double h = R1 - R0;
1573 *  
1574 *   const double s = (r - R0) / h;
1575 *   const double q =
1576 *   (dim == 3) ? std::max(0.0, cos(numbers::PI * abs(p(2) / R1))) : 1.0;
1577 *   const double phi = std::atan2(p(0), p(1));
1578 *   const double tau = s + 0.2 * s * (1 - s) * std::sin(6 * phi) * q;
1579 *  
1580 *   return T0 * (1.0 - tau) + T1 * tau;
1581 *   }
1582 *  
1583 *  
1584 *   template <int dim>
1585 *   void
1587 *   Vector<double> & values) const
1588 *   {
1589 *   for (unsigned int c = 0; c < this->n_components; ++c)
1591 *   }
1592 *  
1593 *  
1594 * @endcode
1595 *
1598 * conservation equations. The scaling factor is @f$\frac{\eta}{L}@f$ where
1599 * @f$L@f$ was a typical length scale. By experimenting it turns out that a
1600 * good length scale is the diameter of plumes, which is around 10 km:
1601 *
1602 * @code
1603 *   constexpr double pressure_scaling = eta / 10000;
1604 *  
1605 * @endcode
1606 *
1607 * The final number in this namespace is a constant that denotes the
1608 * number of seconds per (average, tropical) year. We use this only when
1611 * times in seconds yields numbers that one can't relate to reality, and
1612 * so we convert to years using the factor defined here:
1613 *
1614 * @code
1615 *   const double year_in_seconds = 60 * 60 * 24 * 365.2425;
1616 *  
1617 *   } // namespace EquationData
1618 *  
1619 *  
1620 *  
1621 * @endcode
1622 *
1623 *
1624 * <a name="PreconditioningtheStokessystem"></a>
1625 * <h3>Preconditioning the Stokes system</h3>
1626 *
1627
1628 *
1629 * This namespace implements the preconditioner. As discussed in the
1630 * introduction, this preconditioner differs in a number of key portions
1631 * from the one used in @ref step_31 "step-31". Specifically, it is a right preconditioner,
1632 * implementing the matrix
1633 * @f{align*}
1634 * \left(\begin{array}{cc}A^{-1} & B^T
1635 * \\0 & S^{-1}
1636 * \end{array}\right)
1637 * @f}
1638 * where the two inverse matrix operations
1639 * are approximated by linear solvers or, if the right flag is given to the
1640 * constructor of this class, by a single AMG V-cycle for the velocity
1641 * block. The three code blocks of the <code>vmult</code> function implement
1642 * the multiplications with the three blocks of this preconditioner matrix
1643 * and should be self explanatory if you have read through @ref step_31 "step-31" or the
1644 * discussion of composing solvers in @ref step_20 "step-20".
1645 *
1646 * @code
1647 *   namespace LinearSolvers
1648 *   {
1649 *   template <class PreconditionerTypeA, class PreconditionerTypeMp>
1650 *   class BlockSchurPreconditioner : public Subscriptor
1651 *   {
1652 *   public:
1653 *   BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix &S,
1654 *   const TrilinosWrappers::BlockSparseMatrix &Spre,
1655 *   const PreconditionerTypeMp &Mppreconditioner,
1656 *   const PreconditionerTypeA & Apreconditioner,
1657 *   const bool do_solve_A)
1658 *   : stokes_matrix(&S)
1659 *   , stokes_preconditioner_matrix(&Spre)
1660 *   , mp_preconditioner(Mppreconditioner)
1661 *   , a_preconditioner(Apreconditioner)
1662 *   , do_solve_A(do_solve_A)
1663 *   {}
1664 *  
1665 *   void vmult(TrilinosWrappers::MPI::BlockVector & dst,
1666 *   const TrilinosWrappers::MPI::BlockVector &src) const
1667 *   {
1668 *   TrilinosWrappers::MPI::Vector utmp(src.block(0));
1669 *  
1670 *   {
1671 *   SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
1672 *  
1673 *   SolverCG<TrilinosWrappers::MPI::Vector> solver(solver_control);
1674 *  
1675 *   solver.solve(stokes_preconditioner_matrix->block(1, 1),
1676 *   dst.block(1),
1677 *   src.block(1),
1678 *   mp_preconditioner);
1679 *  
1680 *   dst.block(1) *= -1.0;
1681 *   }
1682 *  
1683 *   {
1684 *   stokes_matrix->block(0, 1).vmult(utmp, dst.block(1));
1685 *   utmp *= -1.0;
1686 *   utmp.add(src.block(0));
1687 *   }
1688 *  
1689 *   if (do_solve_A == true)
1690 *   {
1691 *   SolverControl solver_control(5000, utmp.l2_norm() * 1e-2);
1692 *   TrilinosWrappers::SolverCG solver(solver_control);
1693 *   solver.solve(stokes_matrix->block(0, 0),
1694 *   dst.block(0),
1695 *   utmp,
1696 *   a_preconditioner);
1697 *   }
1698 *   else
1699 *   a_preconditioner.vmult(dst.block(0), utmp);
1700 *   }
1701 *  
1702 *   private:
1703 *   const SmartPointer<const TrilinosWrappers::BlockSparseMatrix>
1704 *   stokes_matrix;
1705 *   const SmartPointer<const TrilinosWrappers::BlockSparseMatrix>
1706 *   stokes_preconditioner_matrix;
1707 *   const PreconditionerTypeMp &mp_preconditioner;
1708 *   const PreconditionerTypeA & a_preconditioner;
1709 *   const bool do_solve_A;
1710 *   };
1711 *   } // namespace LinearSolvers
1712 *  
1713 *  
1714 *  
1715 * @endcode
1716 *
1717 *
1718 * <a name="Definitionofassemblydatastructures"></a>
1719 * <h3>Definition of assembly data structures</h3>
1720 *
1721
1722 *
1723 * As described in the introduction, we will use the WorkStream mechanism
1724 * discussed in the @ref threads module to parallelize operations among the
1725 * processors of a single machine. The WorkStream class requires that data
1726 * is passed around in two kinds of data structures, one for scratch data
1727 * and one to pass data from the assembly function to the function that
1728 * copies local contributions into global objects.
1729 *
1730
1731 *
1732 * The following namespace (and the two sub-namespaces) contains a
1733 * collection of data structures that serve this purpose, one pair for each
1734 * of the four operations discussed in the introduction that we will want to
1735 * parallelize. Each assembly routine gets two sets of data: a Scratch array
1736 * that collects all the classes and arrays that are used for the
1737 * calculation of the cell contribution, and a CopyData array that keeps
1738 * local matrices and vectors which will be written into the global
1739 * matrix. Whereas CopyData is a container for the final data that is
1740 * written into the global matrices and vector (and, thus, absolutely
1741 * necessary), the Scratch arrays are merely there for performance reasons
1742 * &mdash; it would be much more expensive to set up a FEValues object on
1743 * each cell, than creating it only once and updating some derivative data.
1744 *
1745
1746 *
1747 * @ref step_31 "step-31" had four assembly routines: One for the preconditioner matrix of
1748 * the Stokes system, one for the Stokes matrix and right hand side, one for
1749 * the temperature matrices and one for the right hand side of the
1750 * temperature equation. We here organize the scratch arrays and CopyData
1751 * objects for each of those four assembly components using a
1752 * <code>struct</code> environment (since we consider these as temporary
1753 * objects we pass around, rather than classes that implement functionality
1754 * of their own, though this is a more subjective point of view to
1755 * distinguish between <code>struct</code>s and <code>class</code>es).
1756 *
1757
1758 *
1759 * Regarding the Scratch objects, each struct is equipped with a constructor
1760 * that creates an @ref FEValues object using the @ref FiniteElement,
1761 * Quadrature, @ref Mapping (which describes the interpolation of curved
1762 * boundaries), and @ref UpdateFlags instances. Moreover, we manually
1763 * implement a copy constructor (since the FEValues class is not copyable by
1764 * itself), and provide some additional vector fields that are used to hold
1765 * intermediate data during the computation of local contributions.
1766 *
1767
1768 *
1769 * Let us start with the scratch arrays and, specifically, the one used for
1770 * assembly of the Stokes preconditioner:
1771 *
1772 * @code
1773 *   namespace Assembly
1774 *   {
1775 *   namespace Scratch
1776 *   {
1777 *   template <int dim>
1778 *   struct StokesPreconditioner
1779 *   {
1780 *   StokesPreconditioner(const FiniteElement<dim> &stokes_fe,
1781 *   const Quadrature<dim> & stokes_quadrature,
1782 *   const Mapping<dim> & mapping,
1783 *   const UpdateFlags update_flags);
1784 *  
1785 *   StokesPreconditioner(const StokesPreconditioner &data);
1786 *  
1787 *  
1788 *   FEValues<dim> stokes_fe_values;
1789 *  
1790 *   std::vector<Tensor<2, dim>> grad_phi_u;
1791 *   std::vector<double> phi_p;
1792 *   };
1793 *  
1794 *   template <int dim>
1795 *   StokesPreconditioner<dim>::StokesPreconditioner(
1796 *   const FiniteElement<dim> &stokes_fe,
1797 *   const Quadrature<dim> & stokes_quadrature,
1798 *   const Mapping<dim> & mapping,
1799 *   const UpdateFlags update_flags)
1800 *   : stokes_fe_values(mapping, stokes_fe, stokes_quadrature, update_flags)
1801 *   , grad_phi_u(stokes_fe.n_dofs_per_cell())
1802 *   , phi_p(stokes_fe.n_dofs_per_cell())
1803 *   {}
1804 *  
1805 *  
1806 *  
1807 *   template <int dim>
1808 *   StokesPreconditioner<dim>::StokesPreconditioner(
1809 *   const StokesPreconditioner &scratch)
1810 *   : stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
1811 *   scratch.stokes_fe_values.get_fe(),
1812 *   scratch.stokes_fe_values.get_quadrature(),
1813 *   scratch.stokes_fe_values.get_update_flags())
1814 *   , grad_phi_u(scratch.grad_phi_u)
1815 *   , phi_p(scratch.phi_p)
1816 *   {}
1817 *  
1818 *  
1819 *  
1820 * @endcode
1821 *
1822 * The next one is the scratch object used for the assembly of the full
1823 * Stokes system. Observe that we derive the StokesSystem scratch class
1824 * from the StokesPreconditioner class above. We do this because all the
1825 * objects that are necessary for the assembly of the preconditioner are
1826 * also needed for the actual matrix system and right hand side, plus
1827 * some extra data. This makes the program more compact. Note also that
1828 * the assembly of the Stokes system and the temperature right hand side
1829 * further down requires data from temperature and velocity,
1830 * respectively, so we actually need two FEValues objects for those two
1831 * cases.
1832 *
1833 * @code
1834 *   template <int dim>
1835 *   struct StokesSystem : public StokesPreconditioner<dim>
1836 *   {
1837 *   StokesSystem(const FiniteElement<dim> &stokes_fe,
1838 *   const Mapping<dim> & mapping,
1839 *   const Quadrature<dim> & stokes_quadrature,
1840 *   const UpdateFlags stokes_update_flags,
1841 *   const FiniteElement<dim> &temperature_fe,
1842 *   const UpdateFlags temperature_update_flags);
1843 *  
1844 *   StokesSystem(const StokesSystem<dim> &data);
1845 *  
1846 *  
1847 *   FEValues<dim> temperature_fe_values;
1848 *  
1849 *   std::vector<Tensor<1, dim>> phi_u;
1850 *   std::vector<SymmetricTensor<2, dim>> grads_phi_u;
1851 *   std::vector<double> div_phi_u;
1852 *  
1853 *   std::vector<double> old_temperature_values;
1854 *   };
1855 *  
1856 *  
1857 *   template <int dim>
1858 *   StokesSystem<dim>::StokesSystem(
1859 *   const FiniteElement<dim> &stokes_fe,
1860 *   const Mapping<dim> & mapping,
1861 *   const Quadrature<dim> & stokes_quadrature,
1862 *   const UpdateFlags stokes_update_flags,
1863 *   const FiniteElement<dim> &temperature_fe,
1864 *   const UpdateFlags temperature_update_flags)
1865 *   : StokesPreconditioner<dim>(stokes_fe,
1866 *   stokes_quadrature,
1867 *   mapping,
1868 *   stokes_update_flags)
1869 *   , temperature_fe_values(mapping,
1870 *   temperature_fe,
1871 *   stokes_quadrature,
1872 *   temperature_update_flags)
1873 *   , phi_u(stokes_fe.n_dofs_per_cell())
1874 *   , grads_phi_u(stokes_fe.n_dofs_per_cell())
1875 *   , div_phi_u(stokes_fe.n_dofs_per_cell())
1876 *   , old_temperature_values(stokes_quadrature.size())
1877 *   {}
1878 *  
1879 *  
1880 *   template <int dim>
1881 *   StokesSystem<dim>::StokesSystem(const StokesSystem<dim> &scratch)
1882 *   : StokesPreconditioner<dim>(scratch)
1883 *   , temperature_fe_values(
1884 *   scratch.temperature_fe_values.get_mapping(),
1885 *   scratch.temperature_fe_values.get_fe(),
1886 *   scratch.temperature_fe_values.get_quadrature(),
1887 *   scratch.temperature_fe_values.get_update_flags())
1888 *   , phi_u(scratch.phi_u)
1889 *   , grads_phi_u(scratch.grads_phi_u)
1890 *   , div_phi_u(scratch.div_phi_u)
1891 *   , old_temperature_values(scratch.old_temperature_values)
1892 *   {}
1893 *  
1894 *  
1895 * @endcode
1896 *
1897 * After defining the objects used in the assembly of the Stokes system,
1898 * we do the same for the assembly of the matrices necessary for the
1899 * temperature system. The general structure is very similar:
1900 *
1901 * @code
1902 *   template <int dim>
1903 *   struct TemperatureMatrix
1904 *   {
1905 *   TemperatureMatrix(const FiniteElement<dim> &temperature_fe,
1906 *   const Mapping<dim> & mapping,
1907 *   const Quadrature<dim> & temperature_quadrature);
1908 *  
1909 *   TemperatureMatrix(const TemperatureMatrix &data);
1910 *  
1911 *  
1912 *   FEValues<dim> temperature_fe_values;
1913 *  
1914 *   std::vector<double> phi_T;
1915 *   std::vector<Tensor<1, dim>> grad_phi_T;
1916 *   };
1917 *  
1918 *  
1919 *   template <int dim>
1920 *   TemperatureMatrix<dim>::TemperatureMatrix(
1921 *   const FiniteElement<dim> &temperature_fe,
1922 *   const Mapping<dim> & mapping,
1923 *   const Quadrature<dim> & temperature_quadrature)
1924 *   : temperature_fe_values(mapping,
1925 *   temperature_fe,
1926 *   temperature_quadrature,
1927 *   update_values | update_gradients |
1928 *   update_JxW_values)
1929 *   , phi_T(temperature_fe.n_dofs_per_cell())
1930 *   , grad_phi_T(temperature_fe.n_dofs_per_cell())
1931 *   {}
1932 *  
1933 *  
1934 *   template <int dim>
1935 *   TemperatureMatrix<dim>::TemperatureMatrix(
1936 *   const TemperatureMatrix &scratch)
1937 *   : temperature_fe_values(
1938 *   scratch.temperature_fe_values.get_mapping(),
1939 *   scratch.temperature_fe_values.get_fe(),
1940 *   scratch.temperature_fe_values.get_quadrature(),
1941 *   scratch.temperature_fe_values.get_update_flags())
1942 *   , phi_T(scratch.phi_T)
1943 *   , grad_phi_T(scratch.grad_phi_T)
1944 *   {}
1945 *  
1946 *  
1947 * @endcode
1948 *
1949 * The final scratch object is used in the assembly of the right hand
1950 * side of the temperature system. This object is significantly larger
1951 * than the ones above because a lot more quantities enter the
1952 * computation of the right hand side of the temperature equation. In
1953 * particular, the temperature values and gradients of the previous two
1954 * time steps need to be evaluated at the quadrature points, as well as
1955 * the velocities and the strain rates (i.e. the symmetric gradients of
1956 * the velocity) that enter the right hand side as friction heating
1957 * terms. Despite the number of terms, the following should be rather
1958 * self explanatory:
1959 *
1960 * @code
1961 *   template <int dim>
1962 *   struct TemperatureRHS
1963 *   {
1964 *   TemperatureRHS(const FiniteElement<dim> &temperature_fe,
1965 *   const FiniteElement<dim> &stokes_fe,
1966 *   const Mapping<dim> & mapping,
1967 *   const Quadrature<dim> & quadrature);
1968 *  
1969 *   TemperatureRHS(const TemperatureRHS &data);
1970 *  
1971 *  
1972 *   FEValues<dim> temperature_fe_values;
1973 *   FEValues<dim> stokes_fe_values;
1974 *  
1975 *   std::vector<double> phi_T;
1976 *   std::vector<Tensor<1, dim>> grad_phi_T;
1977 *  
1978 *   std::vector<Tensor<1, dim>> old_velocity_values;
1979 *   std::vector<Tensor<1, dim>> old_old_velocity_values;
1980 *  
1981 *   std::vector<SymmetricTensor<2, dim>> old_strain_rates;
1982 *   std::vector<SymmetricTensor<2, dim>> old_old_strain_rates;
1983 *  
1984 *   std::vector<double> old_temperature_values;
1985 *   std::vector<double> old_old_temperature_values;
1986 *   std::vector<Tensor<1, dim>> old_temperature_grads;
1987 *   std::vector<Tensor<1, dim>> old_old_temperature_grads;
1988 *   std::vector<double> old_temperature_laplacians;
1989 *   std::vector<double> old_old_temperature_laplacians;
1990 *   };
1991 *  
1992 *  
1993 *   template <int dim>
1994 *   TemperatureRHS<dim>::TemperatureRHS(
1995 *   const FiniteElement<dim> &temperature_fe,
1996 *   const FiniteElement<dim> &stokes_fe,
1997 *   const Mapping<dim> & mapping,
1998 *   const Quadrature<dim> & quadrature)
1999 *   : temperature_fe_values(mapping,
2000 *   temperature_fe,
2001 *   quadrature,
2002 *   update_values | update_gradients |
2003 *   update_hessians | update_quadrature_points |
2004 *   update_JxW_values)
2005 *   , stokes_fe_values(mapping,
2006 *   stokes_fe,
2007 *   quadrature,
2008 *   update_values | update_gradients)
2009 *   , phi_T(temperature_fe.n_dofs_per_cell())
2010 *   , grad_phi_T(temperature_fe.n_dofs_per_cell())
2011 *   ,
2012 *  
2013 *   old_velocity_values(quadrature.size())
2014 *   , old_old_velocity_values(quadrature.size())
2015 *   , old_strain_rates(quadrature.size())
2016 *   , old_old_strain_rates(quadrature.size())
2017 *   ,
2018 *  
2019 *   old_temperature_values(quadrature.size())
2020 *   , old_old_temperature_values(quadrature.size())
2021 *   , old_temperature_grads(quadrature.size())
2022 *   , old_old_temperature_grads(quadrature.size())
2023 *   , old_temperature_laplacians(quadrature.size())
2024 *   , old_old_temperature_laplacians(quadrature.size())
2025 *   {}
2026 *  
2027 *  
2028 *   template <int dim>
2029 *   TemperatureRHS<dim>::TemperatureRHS(const TemperatureRHS &scratch)
2030 *   : temperature_fe_values(
2031 *   scratch.temperature_fe_values.get_mapping(),
2032 *   scratch.temperature_fe_values.get_fe(),
2033 *   scratch.temperature_fe_values.get_quadrature(),
2034 *   scratch.temperature_fe_values.get_update_flags())
2035 *   , stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
2036 *   scratch.stokes_fe_values.get_fe(),
2037 *   scratch.stokes_fe_values.get_quadrature(),
2038 *   scratch.stokes_fe_values.get_update_flags())
2039 *   , phi_T(scratch.phi_T)
2040 *   , grad_phi_T(scratch.grad_phi_T)
2041 *   ,
2042 *  
2043 *   old_velocity_values(scratch.old_velocity_values)
2044 *   , old_old_velocity_values(scratch.old_old_velocity_values)
2045 *   , old_strain_rates(scratch.old_strain_rates)
2046 *   , old_old_strain_rates(scratch.old_old_strain_rates)
2047 *   ,
2048 *  
2049 *   old_temperature_values(scratch.old_temperature_values)
2050 *   , old_old_temperature_values(scratch.old_old_temperature_values)
2051 *   , old_temperature_grads(scratch.old_temperature_grads)
2052 *   , old_old_temperature_grads(scratch.old_old_temperature_grads)
2053 *   , old_temperature_laplacians(scratch.old_temperature_laplacians)
2054 *   , old_old_temperature_laplacians(scratch.old_old_temperature_laplacians)
2055 *   {}
2056 *   } // namespace Scratch
2057 *  
2058 *  
2059 * @endcode
2060 *
2061 * The CopyData objects are even simpler than the Scratch objects as all
2062 * they have to do is to store the results of local computations until
2063 * they can be copied into the global matrix or vector objects. These
2064 * structures therefore only need to provide a constructor, a copy
2065 * operation, and some arrays for local matrix, local vectors and the
2066 * relation between local and global degrees of freedom (a.k.a.
2067 * <code>local_dof_indices</code>). Again, we have one such structure for
2068 * each of the four operations we will parallelize using the WorkStream
2069 * class:
2070 *
2071 * @code
2072 *   namespace CopyData
2073 *   {
2074 *   template <int dim>
2075 *   struct StokesPreconditioner
2076 *   {
2077 *   StokesPreconditioner(const FiniteElement<dim> &stokes_fe);
2078 *   StokesPreconditioner(const StokesPreconditioner &data);
2079 *   StokesPreconditioner &operator=(const StokesPreconditioner &) = default;
2080 *  
2081 *   FullMatrix<double> local_matrix;
2082 *   std::vector<types::global_dof_index> local_dof_indices;
2083 *   };
2084 *  
2085 *   template <int dim>
2086 *   StokesPreconditioner<dim>::StokesPreconditioner(
2087 *   const FiniteElement<dim> &stokes_fe)
2088 *   : local_matrix(stokes_fe.n_dofs_per_cell(), stokes_fe.n_dofs_per_cell())
2089 *   , local_dof_indices(stokes_fe.n_dofs_per_cell())
2090 *   {}
2091 *  
2092 *   template <int dim>
2093 *   StokesPreconditioner<dim>::StokesPreconditioner(
2094 *   const StokesPreconditioner &data)
2095 *   : local_matrix(data.local_matrix)
2096 *   , local_dof_indices(data.local_dof_indices)
2097 *   {}
2098 *  
2099 *  
2100 *  
2101 *   template <int dim>
2102 *   struct StokesSystem : public StokesPreconditioner<dim>
2103 *   {
2104 *   StokesSystem(const FiniteElement<dim> &stokes_fe);
2105 *  
2106 *   Vector<double> local_rhs;
2107 *   };
2108 *  
2109 *   template <int dim>
2110 *   StokesSystem<dim>::StokesSystem(const FiniteElement<dim> &stokes_fe)
2111 *   : StokesPreconditioner<dim>(stokes_fe)
2112 *   , local_rhs(stokes_fe.n_dofs_per_cell())
2113 *   {}
2114 *  
2115 *  
2116 *  
2117 *   template <int dim>
2118 *   struct TemperatureMatrix
2119 *   {
2120 *   TemperatureMatrix(const FiniteElement<dim> &temperature_fe);
2121 *  
2122 *   FullMatrix<double> local_mass_matrix;
2123 *   FullMatrix<double> local_stiffness_matrix;
2124 *   std::vector<types::global_dof_index> local_dof_indices;
2125 *   };
2126 *  
2127 *   template <int dim>
2128 *   TemperatureMatrix<dim>::TemperatureMatrix(
2129 *   const FiniteElement<dim> &temperature_fe)
2130 *   : local_mass_matrix(temperature_fe.n_dofs_per_cell(),
2131 *   temperature_fe.n_dofs_per_cell())
2132 *   , local_stiffness_matrix(temperature_fe.n_dofs_per_cell(),
2133 *   temperature_fe.n_dofs_per_cell())
2134 *   , local_dof_indices(temperature_fe.n_dofs_per_cell())
2135 *   {}
2136 *  
2137 *  
2138 *  
2139 *   template <int dim>
2140 *   struct TemperatureRHS
2141 *   {
2142 *   TemperatureRHS(const FiniteElement<dim> &temperature_fe);
2143 *  
2144 *   Vector<double> local_rhs;
2145 *   std::vector<types::global_dof_index> local_dof_indices;
2146 *   FullMatrix<double> matrix_for_bc;
2147 *   };
2148 *  
2149 *   template <int dim>
2150 *   TemperatureRHS<dim>::TemperatureRHS(
2151 *   const FiniteElement<dim> &temperature_fe)
2152 *   : local_rhs(temperature_fe.n_dofs_per_cell())
2153 *   , local_dof_indices(temperature_fe.n_dofs_per_cell())
2154 *   , matrix_for_bc(temperature_fe.n_dofs_per_cell(),
2155 *   temperature_fe.n_dofs_per_cell())
2156 *   {}
2157 *   } // namespace CopyData
2158 *   } // namespace Assembly
2159 *  
2160 *  
2161 *  
2162 * @endcode
2163 *
2164 *
2165 * <a name="ThecodeBoussinesqFlowProblemcodeclasstemplate"></a>
2166 * <h3>The <code>BoussinesqFlowProblem</code> class template</h3>
2167 *
2168
2169 *
2170 * This is the declaration of the main class. It is very similar to @ref step_31 "step-31"
2171 * but there are a number differences we will comment on below.
2172 *
2173
2174 *
2175 * The top of the class is essentially the same as in @ref step_31 "step-31", listing the
2176 * public methods and a set of private functions that do the heavy
2177 * lifting. Compared to @ref step_31 "step-31" there are only two additions to this
2178 * section: the function <code>get_cfl_number()</code> that computes the
2179 * maximum CFL number over all cells which we then compute the global time
2180 * step from, and the function <code>get_entropy_variation()</code> that is
2181 * used in the computation of the entropy stabilization. It is akin to the
2182 * <code>get_extrapolated_temperature_range()</code> we have used in @ref step_31 "step-31"
2183 * for this purpose, but works on the entropy instead of the temperature
2184 * instead.
2185 *
2186 * @code
2187 *   template <int dim>
2188 *   class BoussinesqFlowProblem
2189 *   {
2190 *   public:
2191 *   struct Parameters;
2192 *   BoussinesqFlowProblem(Parameters &parameters);
2193 *   void run();
2194 *  
2195 *   private:
2196 *   void setup_dofs();
2197 *   void assemble_stokes_preconditioner();
2198 *   void build_stokes_preconditioner();
2199 *   void assemble_stokes_system();
2200 *   void assemble_temperature_matrix();
2201 *   void assemble_temperature_system(const double maximal_velocity);
2202 *   double get_maximal_velocity() const;
2203 *   double get_cfl_number() const;
2204 *   double get_entropy_variation(const double average_temperature) const;
2205 *   std::pair<double, double> get_extrapolated_temperature_range() const;
2206 *   void solve();
2207 *   void output_results();
2208 *   void refine_mesh(const unsigned int max_grid_level);
2209 *  
2210 *   double compute_viscosity(
2211 *   const std::vector<double> & old_temperature,
2212 *   const std::vector<double> & old_old_temperature,
2213 *   const std::vector<Tensor<1, dim>> &old_temperature_grads,
2214 *   const std::vector<Tensor<1, dim>> &old_old_temperature_grads,
2215 *   const std::vector<double> & old_temperature_laplacians,
2216 *   const std::vector<double> & old_old_temperature_laplacians,
2217 *   const std::vector<Tensor<1, dim>> &old_velocity_values,
2218 *   const std::vector<Tensor<1, dim>> &old_old_velocity_values,
2219 *   const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
2220 *   const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
2221 *   const double global_u_infty,
2222 *   const double global_T_variation,
2223 *   const double average_temperature,
2224 *   const double global_entropy_variation,
2225 *   const double cell_diameter) const;
2226 *  
2227 *   public:
2228 * @endcode
2229 *
2230 * The first significant new component is the definition of a struct for
2231 * the parameters according to the discussion in the introduction. This
2232 * structure is initialized by reading from a parameter file during
2233 * construction of this object.
2234 *
2235 * @code
2236 *   struct Parameters
2237 *   {
2238 *   Parameters(const std::string &parameter_filename);
2239 *  
2240 *   static void declare_parameters(ParameterHandler &prm);
2241 *   void parse_parameters(ParameterHandler &prm);
2242 *  
2243 *   double end_time;
2244 *  
2245 *   unsigned int initial_global_refinement;
2246 *   unsigned int initial_adaptive_refinement;
2247 *  
2248 *   bool generate_graphical_output;
2249 *   unsigned int graphical_output_interval;
2250 *  
2251 *   unsigned int adaptive_refinement_interval;
2252 *  
2253 *   double stabilization_alpha;
2254 *   double stabilization_c_R;
2255 *   double stabilization_beta;
2256 *  
2257 *   unsigned int stokes_velocity_degree;
2258 *   bool use_locally_conservative_discretization;
2259 *  
2260 *   unsigned int temperature_degree;
2261 *   };
2262 *  
2263 *   private:
2264 *   Parameters &parameters;
2265 *  
2266 * @endcode
2267 *
2268 * The <code>pcout</code> (for <i>%parallel <code>std::cout</code></i>)
2269 * object is used to simplify writing output: each MPI process can use
2270 * this to generate output as usual, but since each of these processes
2271 * will (hopefully) produce the same output it will just be replicated
2272 * many times over; with the ConditionalOStream class, only the output
2273 * generated by one MPI process will actually be printed to screen,
2274 * whereas the output by all the other threads will simply be forgotten.
2275 *
2276 * @code
2277 *   ConditionalOStream pcout;
2278 *  
2279 * @endcode
2280 *
2281 * The following member variables will then again be similar to those in
2282 * @ref step_31 "step-31" (and to other tutorial programs). As mentioned in the
2283 * introduction, we fully distribute computations, so we will have to use
2284 * the parallel::distributed::Triangulation class (see @ref step_40 "step-40") but the
2285 * remainder of these variables is rather standard with two exceptions:
2286 *
2287
2288 *
2289 * - The <code>mapping</code> variable is used to denote a higher-order
2290 * polynomial mapping. As mentioned in the introduction, we use this
2291 * mapping when forming integrals through quadrature for all cells.
2292 *
2293
2294 *
2295 * - In a bit of naming confusion, you will notice below that some of the
2296 * variables from namespace TrilinosWrappers are taken from namespace
2297 * TrilinosWrappers::MPI (such as the right hand side vectors) whereas
2298 * others are not (such as the various matrices). This is due to legacy
2299 * reasons. We will frequently have to query velocities
2300 * and temperatures at arbitrary quadrature points; consequently, rather
2301 * than importing ghost information of a vector whenever we need access
2302 * to degrees of freedom that are relevant locally but owned by another
2303 * processor, we solve linear systems in %parallel but then immediately
2304 * initialize a vector including ghost entries of the solution for further
2305 * processing. The various <code>*_solution</code> vectors are therefore
2306 * filled immediately after solving their respective linear system in
2307 * %parallel and will always contain values for all
2308 * @ref GlossLocallyRelevantDof "locally relevant degrees of freedom";
2309 * the fully distributed vectors that we obtain from the solution process
2310 * and that only ever contain the
2311 * @ref GlossLocallyOwnedDof "locally owned degrees of freedom" are
2312 * destroyed immediately after the solution process and after we have
2313 * copied the relevant values into the member variable vectors.
2314 *
2315 * @code
2316 *   parallel::distributed::Triangulation<dim> triangulation;
2317 *   double global_Omega_diameter;
2318 *  
2319 *   const MappingQ<dim> mapping;
2320 *  
2321 *   const FESystem<dim> stokes_fe;
2322 *   DoFHandler<dim> stokes_dof_handler;
2323 *   AffineConstraints<double> stokes_constraints;
2324 *  
2325 *   TrilinosWrappers::BlockSparseMatrix stokes_matrix;
2326 *   TrilinosWrappers::BlockSparseMatrix stokes_preconditioner_matrix;
2327 *  
2328 *   TrilinosWrappers::MPI::BlockVector stokes_solution;
2329 *   TrilinosWrappers::MPI::BlockVector old_stokes_solution;
2330 *   TrilinosWrappers::MPI::BlockVector stokes_rhs;
2331 *  
2332 *  
2333 *   FE_Q<dim> temperature_fe;
2334 *   DoFHandler<dim> temperature_dof_handler;
2335 *   AffineConstraints<double> temperature_constraints;
2336 *  
2337 *   TrilinosWrappers::SparseMatrix temperature_mass_matrix;
2338 *   TrilinosWrappers::SparseMatrix temperature_stiffness_matrix;
2339 *   TrilinosWrappers::SparseMatrix temperature_matrix;
2340 *  
2341 *   TrilinosWrappers::MPI::Vector temperature_solution;
2342 *   TrilinosWrappers::MPI::Vector old_temperature_solution;
2343 *   TrilinosWrappers::MPI::Vector old_old_temperature_solution;
2344 *   TrilinosWrappers::MPI::Vector temperature_rhs;
2345 *  
2346 *  
2347 *   double time_step;
2348 *   double old_time_step;
2349 *   unsigned int timestep_number;
2350 *  
2351 *   std::shared_ptr<TrilinosWrappers::PreconditionAMG> Amg_preconditioner;
2352 *   std::shared_ptr<TrilinosWrappers::PreconditionJacobi> Mp_preconditioner;
2353 *   std::shared_ptr<TrilinosWrappers::PreconditionJacobi> T_preconditioner;
2354 *  
2355 *   bool rebuild_stokes_matrix;
2356 *   bool rebuild_stokes_preconditioner;
2357 *   bool rebuild_temperature_matrices;
2358 *   bool rebuild_temperature_preconditioner;
2359 *  
2360 * @endcode
2361 *
2362 * The next member variable, <code>computing_timer</code> is used to
2363 * conveniently account for compute time spent in certain "sections" of
2364 * the code that are repeatedly entered. For example, we will enter (and
2365 * leave) sections for Stokes matrix assembly and would like to accumulate
2366 * the run time spent in this section over all time steps. Every so many
2367 * time steps as well as at the end of the program (through the destructor
2368 * of the TimerOutput class) we will then produce a nice summary of the
2369 * times spent in the different sections into which we categorize the
2370 * run-time of this program.
2371 *
2372 * @code
2373 *   TimerOutput computing_timer;
2374 *  
2375 * @endcode
2376 *
2377 * After these member variables we have a number of auxiliary functions
2378 * that have been broken out of the ones listed above. Specifically, there
2379 * are first three functions that we call from <code>setup_dofs</code> and
2380 * then the ones that do the assembling of linear systems:
2381 *
2382 * @code
2383 *   void setup_stokes_matrix(
2384 *   const std::vector<IndexSet> &stokes_partitioning,
2385 *   const std::vector<IndexSet> &stokes_relevant_partitioning);
2386 *   void setup_stokes_preconditioner(
2387 *   const std::vector<IndexSet> &stokes_partitioning,
2388 *   const std::vector<IndexSet> &stokes_relevant_partitioning);
2389 *   void setup_temperature_matrices(
2390 *   const IndexSet &temperature_partitioning,
2391 *   const IndexSet &temperature_relevant_partitioning);
2392 *  
2393 *  
2394 * @endcode
2395 *
2396 * Following the @ref MTWorkStream "task-based parallelization" paradigm,
2397 * we split all the assembly routines into two parts: a first part that
2398 * can do all the calculations on a certain cell without taking care of
2399 * other threads, and a second part (which is writing the local data into
2400 * the global matrices and vectors) which can be entered by only one
2401 * thread at a time. In order to implement that, we provide functions for
2402 * each of those two steps for all the four assembly routines that we use
2403 * in this program. The following eight functions do exactly this:
2404 *
2405 * @code
2406 *   void local_assemble_stokes_preconditioner(
2407 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2408 *   Assembly::Scratch::StokesPreconditioner<dim> & scratch,
2409 *   Assembly::CopyData::StokesPreconditioner<dim> & data);
2410 *  
2411 *   void copy_local_to_global_stokes_preconditioner(
2412 *   const Assembly::CopyData::StokesPreconditioner<dim> &data);
2413 *  
2414 *  
2415 *   void local_assemble_stokes_system(
2416 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2417 *   Assembly::Scratch::StokesSystem<dim> & scratch,
2418 *   Assembly::CopyData::StokesSystem<dim> & data);
2419 *  
2420 *   void copy_local_to_global_stokes_system(
2421 *   const Assembly::CopyData::StokesSystem<dim> &data);
2422 *  
2423 *  
2424 *   void local_assemble_temperature_matrix(
2425 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2426 *   Assembly::Scratch::TemperatureMatrix<dim> & scratch,
2427 *   Assembly::CopyData::TemperatureMatrix<dim> & data);
2428 *  
2429 *   void copy_local_to_global_temperature_matrix(
2430 *   const Assembly::CopyData::TemperatureMatrix<dim> &data);
2431 *  
2432 *  
2433 *  
2434 *   void local_assemble_temperature_rhs(
2435 *   const std::pair<double, double> global_T_range,
2436 *   const double global_max_velocity,
2437 *   const double global_entropy_variation,
2438 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
2439 *   Assembly::Scratch::TemperatureRHS<dim> & scratch,
2440 *   Assembly::CopyData::TemperatureRHS<dim> & data);
2441 *  
2442 *   void copy_local_to_global_temperature_rhs(
2443 *   const Assembly::CopyData::TemperatureRHS<dim> &data);
2444 *  
2445 * @endcode
2446 *
2447 * Finally, we forward declare a member class that we will define later on
2448 * and that will be used to compute a number of quantities from our
2449 * solution vectors that we'd like to put into the output files for
2450 * visualization.
2451 *
2452 * @code
2453 *   class Postprocessor;
2454 *   };
2455 *  
2456 *  
2457 * @endcode
2458 *
2459 *
2460 * <a name="BoussinesqFlowProblemclassimplementation"></a>
2462 *
2463
2464 *
2465 *
2466 * <a name="BoussinesqFlowProblemParameters"></a>
2468 *
2469
2470 *
2471 * Here comes the definition of the parameters for the Stokes problem. We
2472 * allow to set the end time for the simulation, the level of refinements
2473 * (both global and adaptive, which in the sum specify what maximum level
2474 * the cells are allowed to have), and the interval between refinements in
2475 * the time stepping.
2476 *
2477
2478 *
2479 * Then, we let the user specify constants for the stabilization parameters
2480 * (as discussed in the introduction), the polynomial degree for the Stokes
2482 * based on FE_DGP elements for the pressure or not (FE_Q elements for
2483 * pressure), and the polynomial degree for the temperature interpolation.
2484 *
2485
2486 *
2487 * The constructor checks for a valid input file (if not, a file with
2488 * default parameters for the quantities is written), and eventually parses
2489 * the parameters.
2490 *
2491 * @code
2492 *   template <int dim>
2494 *   const std::string &parameter_filename)
2495 *   : end_time(1e8)
2499 *   , stabilization_alpha(2)
2500 *   , stabilization_c_R(0.11)
2501 *   , stabilization_beta(0.078)
2504 *   , temperature_degree(2)
2505 *   {
2506 *   ParameterHandler prm;
2508 *  
2509 *   std::ifstream parameter_file(parameter_filename);
2510 *  
2511 *   if (!parameter_file)
2512 *   {
2513 *   parameter_file.close();
2514 *  
2515 *   std::ofstream parameter_out(parameter_filename);
2516 *   prm.print_parameters(parameter_out, ParameterHandler::Text);
2517 *  
2518 *   AssertThrow(
2519 *   false,
2520 *   ExcMessage(
2521 *   "Input parameter file <" + parameter_filename +
2522 *   "> not found. Creating a template file of the same name."));
2523 *   }
2524 *  
2525 *   prm.parse_input(parameter_file);
2526 *   parse_parameters(prm);
2527 *   }
2528 *  
2529 *  
2530 *  
2531 * @endcode
2532 *
2533 * Next we have a function that declares the parameters that we expect in
2534 * the input file, together with their data types, default values and a
2535 * description:
2536 *
2537 * @code
2538 *   template <int dim>
2540 *   ParameterHandler &prm)
2541 *   {
2542 *   prm.declare_entry("End time",
2543 *   "1e8",
2544 *   Patterns::Double(0),
2545 *   "The end time of the simulation in years.");
2546 *   prm.declare_entry("Initial global refinement",
2547 *   "2",
2548 *   Patterns::Integer(0),
2549 *   "The number of global refinement steps performed on "
2550 *   "the initial coarse mesh, before the problem is first "
2551 *   "solved there.");
2552 *   prm.declare_entry("Initial adaptive refinement",
2553 *   "2",
2554 *   Patterns::Integer(0),
2555 *   "The number of adaptive refinement steps performed after "
2556 *   "initial global refinement.");
2557 *   prm.declare_entry("Time steps between mesh refinement",
2558 *   "10",
2559 *   Patterns::Integer(1),
2560 *   "The number of time steps after which the mesh is to be "
2561 *   "adapted based on computed error indicators.");
2562 *   prm.declare_entry("Generate graphical output",
2563 *   "false",
2564 *   Patterns::Bool(),
2565 *   "Whether graphical output is to be generated or not. "
2566 *   "You may not want to get graphical output if the number "
2567 *   "of processors is large.");
2568 *   prm.declare_entry("Time steps between graphical output",
2569 *   "50",
2570 *   Patterns::Integer(1),
2571 *   "The number of time steps between each generation of "
2572 *   "graphical output files.");
2573 *  
2574 *   prm.enter_subsection("Stabilization parameters");
2575 *   {
2576 *   prm.declare_entry("alpha",
2577 *   "2",
2578 *   Patterns::Double(1, 2),
2579 *   "The exponent in the entropy viscosity stabilization.");
2580 *   prm.declare_entry("c_R",
2581 *   "0.11",
2582 *   Patterns::Double(0),
2583 *   "The c_R factor in the entropy viscosity "
2584 *   "stabilization.");
2585 *   prm.declare_entry("beta",
2586 *   "0.078",
2587 *   Patterns::Double(0),
2588 *   "The beta factor in the artificial viscosity "
2589 *   "stabilization. An appropriate value for 2d is 0.052 "
2590 *   "and 0.078 for 3d.");
2591 *   }
2592 *   prm.leave_subsection();
2593 *  
2594 *   prm.enter_subsection("Discretization");
2595 *   {
2596 *   prm.declare_entry(
2597 *   "Stokes velocity polynomial degree",
2598 *   "2",
2599 *   Patterns::Integer(1),
2600 *   "The polynomial degree to use for the velocity variables "
2601 *   "in the Stokes system.");
2602 *   prm.declare_entry(
2603 *   "Temperature polynomial degree",
2604 *   "2",
2605 *   Patterns::Integer(1),
2606 *   "The polynomial degree to use for the temperature variable.");
2607 *   prm.declare_entry(
2608 *   "Use locally conservative discretization",
2609 *   "true",
2610 *   Patterns::Bool(),
2611 *   "Whether to use a Stokes discretization that is locally "
2612 *   "conservative at the expense of a larger number of degrees "
2613 *   "of freedom, or to go with a cheaper discretization "
2614 *   "that does not locally conserve mass (although it is "
2615 *   "globally conservative.");
2616 *   }
2617 *   prm.leave_subsection();
2618 *   }
2619 *  
2620 *  
2621 *  
2622 * @endcode
2623 *
2624 * And then we need a function that reads the contents of the
2625 * ParameterHandler object we get by reading the input file and puts the
2626 * results into variables that store the values of the parameters we have
2628 *
2629 * @code
2630 *   template <int dim>
2632 *   ParameterHandler &prm)
2633 *   {
2634 *   end_time = prm.get_double("End time");
2635 *   initial_global_refinement = prm.get_integer("Initial global refinement");
2637 *   prm.get_integer("Initial adaptive refinement");
2638 *  
2640 *   prm.get_integer("Time steps between mesh refinement");
2641 *  
2642 *   generate_graphical_output = prm.get_bool("Generate graphical output");
2644 *   prm.get_integer("Time steps between graphical output");
2645 *  
2646 *   prm.enter_subsection("Stabilization parameters");
2647 *   {
2648 *   stabilization_alpha = prm.get_double("alpha");
2649 *   stabilization_c_R = prm.get_double("c_R");
2650 *   stabilization_beta = prm.get_double("beta");
2651 *   }
2652 *   prm.leave_subsection();
2653 *  
2654 *   prm.enter_subsection("Discretization");
2655 *   {
2657 *   prm.get_integer("Stokes velocity polynomial degree");
2658 *   temperature_degree = prm.get_integer("Temperature polynomial degree");
2660 *   prm.get_bool("Use locally conservative discretization");
2661 *   }
2662 *   prm.leave_subsection();
2663 *   }
2664 *  
2665 *  
2666 *  
2667 * @endcode
2668 *
2669 *
2670 * <a name="BoussinesqFlowProblemBoussinesqFlowProblem"></a>
2672 *
2673
2674 *
2676 * @ref step_31 "step-31". What is different is the %parallel communication: Trilinos uses
2679 * is to be done. We choose a rather simple strategy and let all processors
2680 * that are running the program work together, specified by the communicator
2681 * <code>MPI_COMM_WORLD</code>. Next, we create the output stream (as we
2682 * already did in @ref step_18 "step-18") that only generates output on the first MPI
2684 * this idea is to check the process number when <code>pcout</code> gets a
2685 * true argument, and it uses the <code>std::cout</code> stream for
2686 * output. If we are one processor five, for instance, then we will give a
2688 * output of that processor will not be printed. With the exception of the
2689 * mapping object (for which we use polynomials of degree 4) all but the
2690 * final member variable are exactly the same as in @ref step_31 "step-31".
2691 *
2692
2693 *
2694 * This final object, the TimerOutput object, is then told to restrict
2695 * output to the <code>pcout</code> stream (processor 0), and then we
2696 * specify that we want to get a summary table at the end of the program
2698 * manually also request intermediate summaries every so many time steps in
2699 * the <code>run()</code> function below.
2700 *
2701 * @code
2702 *   template <int dim>
2704 *   : parameters(parameters_)
2706 *   ,
2707 *  
2712 *   ,
2713 *  
2715 *   ,
2716 *  
2717 *   mapping(4)
2718 *   ,
2719 *  
2720 *   stokes_fe(FE_Q<dim>(parameters.stokes_velocity_degree),
2721 *   dim,
2722 *   (parameters.use_locally_conservative_discretization ?
2723 *   static_cast<const FiniteElement<dim> &>(
2724 *   FE_DGP<dim>(parameters.stokes_velocity_degree - 1)) :
2726 *   FE_Q<dim>(parameters.stokes_velocity_degree - 1))),
2727 *   1)
2728 *   ,
2729 *  
2731 *   ,
2732 *  
2735 *   ,
2736 *  
2737 *   time_step(0)
2738 *   , old_time_step(0)
2739 *   , timestep_number(0)
2744 *   ,
2745 *  
2747 *   pcout,
2748 *   TimerOutput::summary,
2749 *   TimerOutput::wall_times)
2750 *   {}
2751 *  
2752 *  
2753 *  
2754 * @endcode
2755 *
2756 *
2757 * <a name="TheBoussinesqFlowProblemhelperfunctions"></a>
2759 *
2760 * <a name="BoussinesqFlowProblemget_maximal_velocity"></a>
2762 *
2763
2764 *
2765 * Except for two small details, the function to compute the global maximum
2766 * of the velocity is the same as in @ref step_31 "step-31". The first detail is actually
2767 * common to all functions that implement loops over all cells in the
2772 * @ref step_18 "step-18". All we need to change is hence to perform the cell-related
2773 * operations only on cells that are owned by the current process (as
2774 * opposed to ghost or artificial cells), i.e. for which the subdomain id
2775 * equals the number of the process ID. Since this is a commonly used
2776 * operation, there is a shortcut for this operation: we can ask whether the
2778 * <code>cell-@>is_locally_owned()</code>.
2779 *
2780
2781 *
2782 * The second difference is the way we calculate the maximum value. Before,
2783 * we could simply have a <code>double</code> variable that we checked
2784 * against on each quadrature point for each cell. Now, we have to be a bit
2786 * cells. What we do is to first let each processor calculate the maximum
2787 * among its cells, and then do a global communication operation
2790 * call, but it's even simpler to use the respective function in namespace
2791 * Utilities::MPI using the MPI communicator object since that will do the
2792 * right thing even if we work without MPI and on a single machine only. The
2793 * call to <code>Utilities::MPI::max</code> needs two arguments, namely the
2794 * local maximum (input) and the MPI communicator, which is MPI_COMM_WORLD
2795 * in this example.
2796 *
2797 * @code
2798 *   template <int dim>
2799 *   double BoussinesqFlowProblem<dim>::get_maximal_velocity() const
2800 *   {
2801 *   const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
2802 *   parameters.stokes_velocity_degree);
2803 *   const unsigned int n_q_points = quadrature_formula.size();
2804 *  
2805 *   FEValues<dim> fe_values(mapping,
2806 *   stokes_fe,
2807 *   quadrature_formula,
2808 *   update_values);
2809 *   std::vector<Tensor<1, dim>> velocity_values(n_q_points);
2810 *  
2811 *   const FEValuesExtractors::Vector velocities(0);
2812 *  
2813 *   double max_local_velocity = 0;
2814 *  
2815 *   for (const auto &cell : stokes_dof_handler.active_cell_iterators())
2816 *   if (cell->is_locally_owned())
2817 *   {
2818 *   fe_values.reinit(cell);
2819 *   fe_values[velocities].get_function_values(stokes_solution,
2820 *   velocity_values);
2821 *  
2822 *   for (unsigned int q = 0; q < n_q_points; ++q)
2823 *   max_local_velocity =
2824 *   std::max(max_local_velocity, velocity_values[q].norm());
2825 *   }
2826 *  
2827 *   return Utilities::MPI::max(max_local_velocity, MPI_COMM_WORLD);
2828 *   }
2829 *  
2830 *  
2831 * @endcode
2832 *
2833 *
2834 * <a name="BoussinesqFlowProblemget_cfl_number"></a>
2835 * <h5>BoussinesqFlowProblem::get_cfl_number</h5>
2836 *
2837
2838 *
2839 * The next function does something similar, but we now compute the CFL
2840 * number, i.e., maximal velocity on a cell divided by the cell
2841 * diameter. This number is necessary to determine the time step size, as we
2842 * use a semi-explicit time stepping scheme for the temperature equation
2843 * (see @ref step_31 "step-31" for a discussion). We compute it in the same way as above:
2844 * Compute the local maximum over all locally owned cells, then exchange it
2845 * via MPI to find the global maximum.
2846 *
2847 * @code
2848 *   template <int dim>
2849 *   double BoussinesqFlowProblem<dim>::get_cfl_number() const
2850 *   {
2851 *   const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
2852 *   parameters.stokes_velocity_degree);
2853 *   const unsigned int n_q_points = quadrature_formula.size();
2854 *  
2855 *   FEValues<dim> fe_values(mapping,
2856 *   stokes_fe,
2857 *   quadrature_formula,
2858 *   update_values);
2859 *   std::vector<Tensor<1, dim>> velocity_values(n_q_points);
2860 *  
2861 *   const FEValuesExtractors::Vector velocities(0);
2862 *  
2863 *   double max_local_cfl = 0;
2864 *  
2865 *   for (const auto &cell : stokes_dof_handler.active_cell_iterators())
2866 *   if (cell->is_locally_owned())
2867 *   {
2868 *   fe_values.reinit(cell);
2869 *   fe_values[velocities].get_function_values(stokes_solution,
2870 *   velocity_values);
2871 *  
2872 *   double max_local_velocity = 1e-10;
2873 *   for (unsigned int q = 0; q < n_q_points; ++q)
2874 *   max_local_velocity =
2875 *   std::max(max_local_velocity, velocity_values[q].norm());
2876 *   max_local_cfl =
2877 *   std::max(max_local_cfl, max_local_velocity / cell->diameter());
2878 *   }
2879 *  
2880 *   return Utilities::MPI::max(max_local_cfl, MPI_COMM_WORLD);
2881 *   }
2882 *  
2883 *  
2884 * @endcode
2885 *
2886 *
2887 * <a name="BoussinesqFlowProblemget_entropy_variation"></a>
2888 * <h5>BoussinesqFlowProblem::get_entropy_variation</h5>
2889 *
2890
2891 *
2892 * Next comes the computation of the global entropy variation
2893 * @f$\|E(T)-\bar{E}(T)\|_\infty@f$ where the entropy @f$E@f$ is defined as
2894 * discussed in the introduction. This is needed for the evaluation of the
2895 * stabilization in the temperature equation as explained in the
2896 * introduction. The entropy variation is actually only needed if we use
2897 * @f$\alpha=2@f$ as a power in the residual computation. The infinity norm is
2898 * computed by the maxima over quadrature points, as usual in discrete
2899 * computations.
2900 *
2901
2902 *
2903 * In order to compute this quantity, we first have to find the
2904 * space-average @f$\bar{E}(T)@f$ and then evaluate the maximum. However, that
2905 * means that we would need to perform two loops. We can avoid the overhead
2906 * by noting that @f$\|E(T)-\bar{E}(T)\|_\infty =
2907 * \max\big(E_{\textrm{max}}(T)-\bar{E}(T),
2908 * \bar{E}(T)-E_{\textrm{min}}(T)\big)@f$, i.e., the maximum out of the
2909 * deviation from the average entropy in positive and negative
2910 * directions. The four quantities we need for the latter formula (maximum
2911 * entropy, minimum entropy, average entropy, area) can all be evaluated in
2912 * the same loop over all cells, so we choose this simpler variant.
2913 *
2914 * @code
2915 *   template <int dim>
2916 *   double BoussinesqFlowProblem<dim>::get_entropy_variation(
2917 *   const double average_temperature) const
2918 *   {
2919 *   if (parameters.stabilization_alpha != 2)
2920 *   return 1.;
2921 *  
2922 *   const QGauss<dim> quadrature_formula(parameters.temperature_degree + 1);
2923 *   const unsigned int n_q_points = quadrature_formula.size();
2924 *  
2925 *   FEValues<dim> fe_values(temperature_fe,
2926 *   quadrature_formula,
2927 *   update_values | update_JxW_values);
2928 *   std::vector<double> old_temperature_values(n_q_points);
2929 *   std::vector<double> old_old_temperature_values(n_q_points);
2930 *  
2931 * @endcode
2932 *
2933 * In the two functions above we computed the maximum of numbers that were
2934 * all non-negative, so we knew that zero was certainly a lower bound. On
2935 * the other hand, here we need to find the maximum deviation from the
2936 * average value, i.e., we will need to know the maximal and minimal
2937 * values of the entropy for which we don't a priori know the sign.
2938 *
2939
2940 *
2941 * To compute it, we can therefore start with the largest and smallest
2942 * possible values we can store in a double precision number: The minimum
2943 * is initialized with a bigger and the maximum with a smaller number than
2946 * processor does not own any cells, in the communication step at the
2950 *
2951 * @code
2952 *   double min_entropy = std::numeric_limits<double>::max(),
2953 *   max_entropy = -std::numeric_limits<double>::max(), area = 0,
2954 *   entropy_integrated = 0;
2955 *  
2956 *   for (const auto &cell : temperature_dof_handler.active_cell_iterators())
2957 *   if (cell->is_locally_owned())
2958 *   {
2959 *   fe_values.reinit(cell);
2960 *   fe_values.get_function_values(old_temperature_solution,
2962 *   fe_values.get_function_values(old_old_temperature_solution,
2964 *   for (unsigned int q = 0; q < n_q_points; ++q)
2965 *   {
2966 *   const double T =
2968 *   const double entropy =
2969 *   ((T - average_temperature) * (T - average_temperature));
2970 *  
2973 *   area += fe_values.JxW(q);
2974 *   entropy_integrated += fe_values.JxW(q) * entropy;
2975 *   }
2976 *   }
2977 *  
2978 * @endcode
2979 *
2982 * and get the extrema for maximum and minimum. We could do this through
2984 * Utilities::MPI::sum also exists in a variant that takes an array of
2990 *
2991 * @code
2992 *   const double local_sums[2] = {entropy_integrated, area},
2994 *   double global_sums[2], global_maxima[2];
2995 *  
2998 *  
2999 * @endcode
3000 *
3001 * Having computed everything this way, we can then compute the average
3004 *
3005 * @code
3006 *   const double average_entropy = global_sums[0] / global_sums[1];
3008 *   average_entropy - (-global_maxima[0]));
3009 *   return entropy_diff;
3010 *   }
3011 *  
3012 *  
3013 *  
3014 * @endcode
3015 *
3016 *
3017 * <a name="BoussinesqFlowProblemget_extrapolated_temperature_range"></a>
3019 *
3020
3021 *
3022 * The next function computes the minimal and maximal value of the
3024 * slightly modified version of the respective function in @ref step_31 "step-31". As in
3025 * the function above, we collect local minima and maxima and then compute
3026 * the global extrema using the same trick as above.
3027 *
3028
3029 *
3030 * As already discussed in @ref step_31 "step-31", the function needs to distinguish
3031 * between the first and all following time steps because it uses a higher
3032 * order temperature extrapolation scheme when at least two previous time
3033 * steps are available.
3034 *
3035 * @code
3036 *   template <int dim>
3037 *   std::pair<double, double>
3039 *   {
3041 *   parameters.temperature_degree);
3042 *   const unsigned int n_q_points = quadrature_formula.size();
3043 *  
3044 *   FEValues<dim> fe_values(mapping,
3047 *   update_values);
3048 *   std::vector<double> old_temperature_values(n_q_points);
3049 *   std::vector<double> old_old_temperature_values(n_q_points);
3050 *  
3051 *   double min_local_temperature = std::numeric_limits<double>::max(),
3052 *   max_local_temperature = -std::numeric_limits<double>::max();
3053 *  
3054 *   if (timestep_number != 0)
3055 *   {
3056 *   for (const auto &cell : temperature_dof_handler.active_cell_iterators())
3057 *   if (cell->is_locally_owned())
3058 *   {
3059 *   fe_values.reinit(cell);
3060 *   fe_values.get_function_values(old_temperature_solution,
3062 *   fe_values.get_function_values(old_old_temperature_solution,
3064 *  
3065 *   for (unsigned int q = 0; q < n_q_points; ++q)
3066 *   {
3067 *   const double temperature =
3068 *   (1. + time_step / old_time_step) *
3071 *  
3076 *   }
3077 *   }
3078 *   }
3079 *   else
3080 *   {
3081 *   for (const auto &cell : temperature_dof_handler.active_cell_iterators())
3082 *   if (cell->is_locally_owned())
3083 *   {
3084 *   fe_values.reinit(cell);
3085 *   fe_values.get_function_values(old_temperature_solution,
3087 *  
3088 *   for (unsigned int q = 0; q < n_q_points; ++q)
3089 *   {
3090 *   const double temperature = old_temperature_values[q];
3091 *  
3096 *   }
3097 *   }
3098 *   }
3099 *  
3101 *   double global_extrema[2];
3103 *  
3104 *   return std::make_pair(-global_extrema[0], global_extrema[1]);
3105 *   }
3106 *  
3107 *  
3108 * @endcode
3109 *
3110 *
3111 * <a name="BoussinesqFlowProblemcompute_viscosity"></a>
3113 *
3114
3115 *
3116 * The function that calculates the viscosity is purely local and so needs
3117 * no communication at all. It is mostly the same as in @ref step_31 "step-31" but with an
3118 * updated formulation of the viscosity if @f$\alpha=2@f$ is chosen:
3119 *
3120 * @code
3121 *   template <int dim>
3123 *   const std::vector<double> & old_temperature,
3124 *   const std::vector<double> & old_old_temperature,
3125 *   const std::vector<Tensor<1, dim>> & old_temperature_grads,
3126 *   const std::vector<Tensor<1, dim>> & old_old_temperature_grads,
3127 *   const std::vector<double> & old_temperature_laplacians,
3128 *   const std::vector<double> & old_old_temperature_laplacians,
3129 *   const std::vector<Tensor<1, dim>> & old_velocity_values,
3130 *   const std::vector<Tensor<1, dim>> & old_old_velocity_values,
3131 *   const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
3132 *   const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
3133 *   const double global_u_infty,
3134 *   const double global_T_variation,
3135 *   const double average_temperature,
3136 *   const double global_entropy_variation,
3137 *   const double cell_diameter) const
3138 *   {
3139 *   if (global_u_infty == 0)
3140 *   return 5e-3 * cell_diameter;
3141 *  
3142 *   const unsigned int n_q_points = old_temperature.size();
3143 *  
3144 *   double max_residual = 0;
3145 *   double max_velocity = 0;
3146 *  
3147 *   for (unsigned int q = 0; q < n_q_points; ++q)
3148 *   {
3149 *   const Tensor<1, dim> u =
3151 *  
3154 *  
3155 *   const double T = (old_temperature[q] + old_old_temperature[q]) / 2;
3156 *   const double dT_dt =
3158 *   const double u_grad_T =
3160 *  
3161 *   const double kappa_Delta_T =
3164 *   2;
3165 *   const double gamma =
3169 *  
3170 *   double residual = std::abs(dT_dt + u_grad_T - kappa_Delta_T - gamma);
3171 *   if (parameters.stabilization_alpha == 2)
3172 *   residual *= std::abs(T - average_temperature);
3173 *  
3174 *   max_residual = std::max(residual, max_residual);
3176 *   }
3177 *  
3178 *   const double max_viscosity =
3179 *   (parameters.stabilization_beta * max_velocity * cell_diameter);
3180 *   if (timestep_number == 0)
3181 *   return max_viscosity;
3182 *   else
3183 *   {
3185 *  
3186 *   double entropy_viscosity;
3187 *   if (parameters.stabilization_alpha == 2)
3189 *   (parameters.stabilization_c_R * cell_diameter * cell_diameter *
3191 *   else
3193 *   (parameters.stabilization_c_R * cell_diameter *
3196 *  
3198 *   }
3199 *   }
3200 *  
3201 *  
3202 *  
3203 * @endcode
3204 *
3205 *
3206 * <a name="TheBoussinesqFlowProblemsetupfunctions"></a>
3208 *
3209
3210 *
3212 * for the Stokes preconditioner, and the temperature matrix. The code is
3213 * mostly the same as in @ref step_31 "step-31", but it has been broken out into three
3215 *
3216
3217 *
3218 * The main functional difference between the code here and that in @ref step_31 "step-31"
3219 * is that the matrices we want to set up are distributed across multiple
3220 * processors. Since we still want to build up the sparsity pattern first
3221 * for efficiency reasons, we could continue to build the <i>entire</i>
3222 * sparsity pattern as a BlockDynamicSparsityPattern, as we did in
3223 * @ref step_31 "step-31". However, that would be inefficient: every processor would build
3224 * the same sparsity pattern, but only initialize a small part of the matrix
3226 * work on those cells it owns (and, if necessary the layer of ghost cells
3227 * around it).
3228 *
3229
3230 *
3232 * which is (obviously) a wrapper around a sparsity pattern object provided
3233 * by Trilinos. The advantage is that the Trilinos sparsity pattern class
3235 * all the nonzero entries that result from the cells it owns, and every
3238 * the globally assembled sparsity pattern available with which the global
3239 * matrix can be initialized.
3240 *
3241
3242 *
3243 * There is one important aspect when initializing Trilinos sparsity
3244 * patterns in parallel: In addition to specifying the locally owned rows
3245 * and columns of the matrices via the @p stokes_partitioning index set, we
3248 * rows contains all such rows (possibly also a few unnecessary ones, but it
3249 * is difficult to find the exact row indices before actually getting
3250 * indices on all cells and resolving constraints). This additional
3252 * off-processor data found during assembly. While Trilinos matrices are
3254 * them from some other reinit method), it is less efficient and leads to
3255 * problems when assembling matrices with multiple threads. In this program,
3256 * we pessimistically assume that only one processor at a time can write
3258 * which is fine for Trilinos matrices. In practice, one can do better by
3259 * hinting WorkStream at cells that do not share vertices, allowing for
3260 * parallelism among those cells (see the graph coloring algorithms and
3262 * when only one MPI processor is present because Trilinos' internal data
3263 * structures for accumulating off-processor data on the fly are not thread
3264 * safe. With the initialization presented here, there is no such problem
3265 * and one could safely introduce graph coloring for this algorithm.
3266 *
3267
3268 *
3269 * The only other change we need to make is to tell the
3270 * DoFTools::make_sparsity_pattern() function that it is only supposed to
3271 * work on a subset of cells, namely the ones whose
3272 * <code>subdomain_id</code> equals the number of the current processor, and
3273 * to ignore all other cells.
3274 *
3275
3276 *
3277 * This strategy is replicated across all three of the following functions.
3278 *
3279
3280 *
3281 * Note that Trilinos matrices store the information contained in the
3282 * sparsity patterns, so we can safely release the <code>sp</code> variable
3283 * once the matrix has been given the sparsity structure.
3284 *
3285 * @code
3286 *   template <int dim>
3287 *   void BoussinesqFlowProblem<dim>::setup_stokes_matrix(
3288 *   const std::vector<IndexSet> &stokes_partitioning,
3289 *   const std::vector<IndexSet> &stokes_relevant_partitioning)
3290 *   {
3291 *   stokes_matrix.clear();
3292 *  
3293 *   TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
3294 *   stokes_partitioning,
3295 *   stokes_relevant_partitioning,
3296 *   MPI_COMM_WORLD);
3297 *  
3298 *   Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
3299 *   for (unsigned int c = 0; c < dim + 1; ++c)
3300 *   for (unsigned int d = 0; d < dim + 1; ++d)
3301 *   if (!((c == dim) && (d == dim)))
3302 *   coupling[c][d] = DoFTools::always;
3303 *   else
3304 *   coupling[c][d] = DoFTools::none;
3305 *  
3306 *   DoFTools::make_sparsity_pattern(stokes_dof_handler,
3307 *   coupling,
3308 *   sp,
3309 *   stokes_constraints,
3310 *   false,
3311 *   Utilities::MPI::this_mpi_process(
3312 *   MPI_COMM_WORLD));
3313 *   sp.compress();
3314 *  
3315 *   stokes_matrix.reinit(sp);
3316 *   }
3317 *  
3318 *  
3319 *  
3320 *   template <int dim>
3321 *   void BoussinesqFlowProblem<dim>::setup_stokes_preconditioner(
3322 *   const std::vector<IndexSet> &stokes_partitioning,
3323 *   const std::vector<IndexSet> &stokes_relevant_partitioning)
3324 *   {
3325 *   Amg_preconditioner.reset();
3326 *   Mp_preconditioner.reset();
3327 *  
3328 *   stokes_preconditioner_matrix.clear();
3329 *  
3330 *   TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
3331 *   stokes_partitioning,
3332 *   stokes_relevant_partitioning,
3333 *   MPI_COMM_WORLD);
3334 *  
3335 *   Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
3336 *   for (unsigned int c = 0; c < dim + 1; ++c)
3337 *   for (unsigned int d = 0; d < dim + 1; ++d)
3338 *   if (c == d)
3339 *   coupling[c][d] = DoFTools::always;
3340 *   else
3341 *   coupling[c][d] = DoFTools::none;
3342 *  
3343 *   DoFTools::make_sparsity_pattern(stokes_dof_handler,
3344 *   coupling,
3345 *   sp,
3346 *   stokes_constraints,
3347 *   false,
3348 *   Utilities::MPI::this_mpi_process(
3349 *   MPI_COMM_WORLD));
3350 *   sp.compress();
3351 *  
3352 *   stokes_preconditioner_matrix.reinit(sp);
3353 *   }
3354 *  
3355 *  
3356 *   template <int dim>
3357 *   void BoussinesqFlowProblem<dim>::setup_temperature_matrices(
3358 *   const IndexSet &temperature_partitioner,
3359 *   const IndexSet &temperature_relevant_partitioner)
3360 *   {
3361 *   T_preconditioner.reset();
3362 *   temperature_mass_matrix.clear();
3363 *   temperature_stiffness_matrix.clear();
3364 *   temperature_matrix.clear();
3365 *  
3366 *   TrilinosWrappers::SparsityPattern sp(temperature_partitioner,
3367 *   temperature_partitioner,
3368 *   temperature_relevant_partitioner,
3369 *   MPI_COMM_WORLD);
3370 *   DoFTools::make_sparsity_pattern(temperature_dof_handler,
3371 *   sp,
3372 *   temperature_constraints,
3373 *   false,
3374 *   Utilities::MPI::this_mpi_process(
3375 *   MPI_COMM_WORLD));
3376 *   sp.compress();
3377 *  
3378 *   temperature_matrix.reinit(sp);
3379 *   temperature_mass_matrix.reinit(sp);
3380 *   temperature_stiffness_matrix.reinit(sp);
3381 *   }
3382 *  
3383 *  
3384 *  
3385 * @endcode
3386 *
3387 * The remainder of the setup function (after splitting out the three
3388 * functions above) mostly has to deal with the things we need to do for
3389 * parallelization across processors. Because setting all of this up is a
3390 * significant compute time expense of the program, we put everything we do
3391 * here into a timer group so that we can get summary information about the
3392 * fraction of time spent in this part of the program at its end.
3393 *
3394
3395 *
3396 * At the top as usual we enumerate degrees of freedom and sort them by
3397 * component/block, followed by writing their numbers to the screen from
3398 * processor zero. The DoFHandler::distributed_dofs() function, when applied
3399 * to a parallel::distributed::Triangulation object, sorts degrees of
3400 * freedom in such a way that all degrees of freedom associated with
3401 * subdomain zero come before all those associated with subdomain one,
3402 * etc. For the Stokes part, this entails, however, that velocities and
3403 * pressures become intermixed, but this is trivially solved by sorting
3404 * again by blocks; it is worth noting that this latter operation leaves the
3405 * relative ordering of all velocities and pressures alone, i.e. within the
3406 * velocity block we will still have all those associated with subdomain
3407 * zero before all velocities associated with subdomain one, etc. This is
3408 * important since we store each of the blocks of this matrix distributed
3409 * across all processors and want this to be done in such a way that each
3410 * processor stores that part of the matrix that is roughly equal to the
3411 * degrees of freedom located on those cells that it will actually work on.
3412 *
3413
3414 *
3415 * When printing the numbers of degrees of freedom, note that these numbers
3416 * are going to be large if we use many processors. Consequently, we let the
3417 * stream put a comma separator in between every three digits. The state of
3418 * the stream, using the locale, is saved from before to after this
3419 * operation. While slightly opaque, the code works because the default
3420 * locale (which we get using the constructor call
3421 * <code>std::locale("")</code>) implies printing numbers with a comma
3422 * separator for every third digit (i.e., thousands, millions, billions).
3423 *
3424
3425 *
3426 * In this function as well as many below, we measure how much time
3427 * we spend here and collect that in a section called "Setup dof
3428 * systems" across function invocations. This is done using an
3429 * TimerOutput::Scope object that gets a timer going in the section
3430 * with above name of the `computing_timer` object upon construction
3431 * of the local variable; the timer is stopped again when the
3432 * destructor of the `timing_section` variable is called. This, of
3433 * course, happens either at the end of the function, or if we leave
3434 * the function through a `return` statement or when an exception is
3435 * thrown somewhere -- in other words, whenever we leave this
3436 * function in any way. The use of such "scope" objects therefore
3437 * makes sure that we do not have to manually add code that tells
3438 * the timer to stop at every location where this function may be
3439 * left.
3440 *
3441 * @code
3442 *   template <int dim>
3443 *   void BoussinesqFlowProblem<dim>::setup_dofs()
3444 *   {
3445 *   TimerOutput::Scope timing_section(computing_timer, "Setup dof systems");
3446 *  
3447 *   stokes_dof_handler.distribute_dofs(stokes_fe);
3448 *  
3449 *   std::vector<unsigned int> stokes_sub_blocks(dim + 1, 0);
3450 *   stokes_sub_blocks[dim] = 1;
3451 *   DoFRenumbering::component_wise(stokes_dof_handler, stokes_sub_blocks);
3452 *  
3453 *   temperature_dof_handler.distribute_dofs(temperature_fe);
3454 *  
3455 *   const std::vector<types::global_dof_index> stokes_dofs_per_block =
3456 *   DoFTools::count_dofs_per_fe_block(stokes_dof_handler, stokes_sub_blocks);
3457 *  
3458 *   const types::global_dof_index n_u = stokes_dofs_per_block[0],
3459 *   n_p = stokes_dofs_per_block[1],
3460 *   n_T = temperature_dof_handler.n_dofs();
3461 *  
3462 *   std::locale s = pcout.get_stream().getloc();
3463 *   pcout.get_stream().imbue(std::locale(""));
3464 *   pcout << "Number of active cells: " << triangulation.n_global_active_cells()
3465 *   << " (on " << triangulation.n_levels() << " levels)" << std::endl
3466 *   << "Number of degrees of freedom: " << n_u + n_p + n_T << " (" << n_u
3467 *   << '+' << n_p << '+' << n_T << ')' << std::endl
3468 *   << std::endl;
3469 *   pcout.get_stream().imbue(s);
3470 *  
3471 *  
3472 * @endcode
3473 *
3474 * After this, we have to set up the various partitioners (of type
3475 * <code>IndexSet</code>, see the introduction) that describe which parts
3476 * of each matrix or vector will be stored where, then call the functions
3477 * that actually set up the matrices, and at the end also resize the
3478 * various vectors we keep around in this program.
3479 *
3480 * @code
3481 *   std::vector<IndexSet> stokes_partitioning, stokes_relevant_partitioning;
3482 *   IndexSet temperature_partitioning(n_T),
3483 *   temperature_relevant_partitioning(n_T);
3484 *   IndexSet stokes_relevant_set;
3485 *   {
3486 *   IndexSet stokes_index_set = stokes_dof_handler.locally_owned_dofs();
3487 *   stokes_partitioning.push_back(stokes_index_set.get_view(0, n_u));
3488 *   stokes_partitioning.push_back(stokes_index_set.get_view(n_u, n_u + n_p));
3489 *  
3490 *   stokes_relevant_set =
3491 *   DoFTools::extract_locally_relevant_dofs(stokes_dof_handler);
3492 *   stokes_relevant_partitioning.push_back(
3493 *   stokes_relevant_set.get_view(0, n_u));
3494 *   stokes_relevant_partitioning.push_back(
3495 *   stokes_relevant_set.get_view(n_u, n_u + n_p));
3496 *  
3497 *   temperature_partitioning = temperature_dof_handler.locally_owned_dofs();
3498 *   temperature_relevant_partitioning =
3499 *   DoFTools::extract_locally_relevant_dofs(temperature_dof_handler);
3500 *   }
3501 *  
3502 * @endcode
3503 *
3504 * Following this, we can compute constraints for the solution vectors,
3505 * including hanging node constraints and homogeneous and inhomogeneous
3506 * boundary values for the Stokes and temperature fields. Note that as for
3507 * everything else, the constraint objects can not hold <i>all</i>
3508 * constraints on every processor. Rather, each processor needs to store
3509 * only those that are actually necessary for correctness given that it
3510 * only assembles linear systems on cells it owns. As discussed in the
3511 * @ref distributed_paper "this paper", the set of constraints we need to
3512 * know about is exactly the set of constraints on all locally relevant
3513 * degrees of freedom, so this is what we use to initialize the constraint
3514 * objects.
3515 *
3516 * @code
3517 *   {
3518 *   stokes_constraints.clear();
3519 *   stokes_constraints.reinit(stokes_relevant_set);
3520 *  
3521 *   DoFTools::make_hanging_node_constraints(stokes_dof_handler,
3522 *   stokes_constraints);
3523 *  
3524 *   const FEValuesExtractors::Vector velocity_components(0);
3525 *   VectorTools::interpolate_boundary_values(
3526 *   stokes_dof_handler,
3527 *   0,
3528 *   Functions::ZeroFunction<dim>(dim + 1),
3529 *   stokes_constraints,
3530 *   stokes_fe.component_mask(velocity_components));
3531 *  
3532 *   std::set<types::boundary_id> no_normal_flux_boundaries;
3533 *   no_normal_flux_boundaries.insert(1);
3534 *   VectorTools::compute_no_normal_flux_constraints(stokes_dof_handler,
3535 *   0,
3536 *   no_normal_flux_boundaries,
3537 *   stokes_constraints,
3538 *   mapping);
3539 *   stokes_constraints.close();
3540 *   }
3541 *   {
3542 *   temperature_constraints.clear();
3543 *   temperature_constraints.reinit(temperature_relevant_partitioning);
3544 *  
3545 *   DoFTools::make_hanging_node_constraints(temperature_dof_handler,
3546 *   temperature_constraints);
3547 *   VectorTools::interpolate_boundary_values(
3548 *   temperature_dof_handler,
3549 *   0,
3550 *   EquationData::TemperatureInitialValues<dim>(),
3551 *   temperature_constraints);
3552 *   VectorTools::interpolate_boundary_values(
3553 *   temperature_dof_handler,
3554 *   1,
3555 *   EquationData::TemperatureInitialValues<dim>(),
3556 *   temperature_constraints);
3557 *   temperature_constraints.close();
3558 *   }
3559 *  
3560 * @endcode
3561 *
3562 * All this done, we can then initialize the various matrix and vector
3563 * objects to their proper sizes. At the end, we also record that all
3564 * matrices and preconditioners have to be re-computed at the beginning of
3565 * the next time step. Note how we initialize the vectors for the Stokes
3566 * and temperature right hand sides: These are writable vectors (last
3567 * boolean argument set to @p true) that have the correct one-to-one
3568 * partitioning of locally owned elements but are still given the relevant
3569 * partitioning for means of figuring out the vector entries that are
3570 * going to be set right away. As for matrices, this allows for writing
3571 * local contributions into the vector with multiple threads (always
3572 * assuming that the same vector entry is not accessed by multiple threads
3573 * at the same time). The other vectors only allow for read access of
3574 * individual elements, including ghosts, but are not suitable for
3575 * solvers.
3576 *
3577 * @code
3578 *   setup_stokes_matrix(stokes_partitioning, stokes_relevant_partitioning);
3579 *   setup_stokes_preconditioner(stokes_partitioning,
3580 *   stokes_relevant_partitioning);
3581 *   setup_temperature_matrices(temperature_partitioning,
3582 *   temperature_relevant_partitioning);
3583 *  
3584 *   stokes_rhs.reinit(stokes_partitioning,
3585 *   stokes_relevant_partitioning,
3586 *   MPI_COMM_WORLD,
3587 *   true);
3588 *   stokes_solution.reinit(stokes_relevant_partitioning, MPI_COMM_WORLD);
3589 *   old_stokes_solution.reinit(stokes_solution);
3590 *  
3591 *   temperature_rhs.reinit(temperature_partitioning,
3592 *   temperature_relevant_partitioning,
3593 *   MPI_COMM_WORLD,
3594 *   true);
3595 *   temperature_solution.reinit(temperature_relevant_partitioning,
3596 *   MPI_COMM_WORLD);
3597 *   old_temperature_solution.reinit(temperature_solution);
3598 *   old_old_temperature_solution.reinit(temperature_solution);
3599 *  
3600 *   rebuild_stokes_matrix = true;
3601 *   rebuild_stokes_preconditioner = true;
3602 *   rebuild_temperature_matrices = true;
3603 *   rebuild_temperature_preconditioner = true;
3604 *   }
3605 *  
3606 *  
3607 *  
3608 * @endcode
3609 *
3610 *
3611 * <a name="TheBoussinesqFlowProblemassemblyfunctions"></a>
3612 * <h4>The BoussinesqFlowProblem assembly functions</h4>
3613 *
3614
3615 *
3616 * Following the discussion in the introduction and in the @ref threads
3617 * module, we split the assembly functions into different parts:
3618 *
3619
3620 *
3621 * <ul> <li> The local calculations of matrices and right hand sides, given
3622 * a certain cell as input (these functions are named
3623 * <code>local_assemble_*</code> below). The resulting function is, in other
3624 * words, essentially the body of the loop over all cells in @ref step_31 "step-31". Note,
3625 * however, that these functions store the result from the local
3626 * calculations in variables of classes from the CopyData namespace.
3627 *
3628
3629 *
3630 * <li>These objects are then given to the second step which writes the
3631 * local data into the global data structures (these functions are named
3632 * <code>copy_local_to_global_*</code> below). These functions are pretty
3633 * trivial.
3634 *
3635
3636 *
3637 * <li>These two subfunctions are then used in the respective assembly
3638 * routine (called <code>assemble_*</code> below), where a WorkStream object
3639 * is set up and runs over all the cells that belong to the processor's
3640 * subdomain. </ul>
3641 *
3642
3643 *
3644 *
3645 * <a name="Stokespreconditionerassembly"></a>
3646 * <h5>Stokes preconditioner assembly</h5>
3647 *
3648
3649 *
3650 * Let us start with the functions that builds the Stokes
3651 * preconditioner. The first two of these are pretty trivial, given the
3652 * discussion above. Note in particular that the main point in using the
3653 * scratch data object is that we want to avoid allocating any objects on
3654 * the free space each time we visit a new cell. As a consequence, the
3655 * assembly function below only has automatic local variables, and
3656 * everything else is accessed through the scratch data object, which is
3657 * allocated only once before we start the loop over all cells:
3658 *
3659 * @code
3660 *   template <int dim>
3662 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
3663 *   Assembly::Scratch::StokesPreconditioner<dim> & scratch,
3664 *   Assembly::CopyData::StokesPreconditioner<dim> & data)
3665 *   {
3666 *   const unsigned int dofs_per_cell = stokes_fe.n_dofs_per_cell();
3667 *   const unsigned int n_q_points =
3668 *   scratch.stokes_fe_values.n_quadrature_points;
3669 *  
3672 *  
3673 *   scratch.stokes_fe_values.reinit(cell);
3674 *   cell->get_dof_indices(data.local_dof_indices);
3675 *  
3676 *   data.local_matrix = 0;
3677 *  
3678 *   for (unsigned int q = 0; q < n_q_points; ++q)
3679 *   {
3680 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
3681 *   {
3682 *   scratch.grad_phi_u[k] =
3683 *   scratch.stokes_fe_values[velocities].gradient(k, q);
3684 *   scratch.phi_p[k] = scratch.stokes_fe_values[pressure].value(k, q);
3685 *   }
3686 *  
3687 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
3688 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
3689 *   data.local_matrix(i, j) +=
3691 *   scalar_product(scratch.grad_phi_u[i], scratch.grad_phi_u[j]) +
3694 *   (scratch.phi_p[i] * scratch.phi_p[j])) *
3695 *   scratch.stokes_fe_values.JxW(q);
3696 *   }
3697 *   }
3698 *  
3699 *  
3700 *  
3701 *   template <int dim>
3703 *   const Assembly::CopyData::StokesPreconditioner<dim> &data)
3704 *   {
3705 *   stokes_constraints.distribute_local_to_global(data.local_matrix,
3706 *   data.local_dof_indices,
3708 *   }
3709 *  
3710 *  
3711 * @endcode
3712 *
3713 * Now for the function that actually puts things together, using the
3715 * enumerate the cells it is supposed to work on. Typically, one would use
3720 * iterates over that subset of cells that satisfy a certain predicate (a
3721 * predicate is a function of one argument that either returns true or
3722 * false). The predicate we use here is IteratorFilters::LocallyOwnedCell,
3725 *
3726
3727 *
3728 * With this obstacle out of the way, we call the WorkStream::run
3729 * function with this set of cells, scratch and copy objects, and
3730 * with pointers to two functions: the local assembly and
3731 * copy-local-to-global function. These functions need to have very
3732 * specific signatures: three arguments in the first and one
3734 * WorkStream::run function for the meaning of these arguments).
3735 * Note how we use a lambda functions to
3736 * create a function object that satisfies this requirement. It uses
3737 * function arguments for the local assembly function that specify
3738 * cell, scratch data, and copy data, as well as function argument
3739 * for the copy function that expects the
3740 * data to be written into the global matrix (also see the discussion in
3741 * @ref step_13 "step-13"'s <code>assemble_linear_system()</code> function). On the other
3743 * the <code>this</code> pointer of the object on which that member
3744 * function is to operate on) is <i>bound</i> to the
3745 * <code>this</code> pointer of the current function and is captured. The
3746 * WorkStream::run function, as a consequence, does not need to know
3747 * anything about the object these functions work on.
3748 *
3749
3750 *
3751 * When the WorkStream is executed, it will create several local assembly
3753 * processors work on them. The function that needs to be synchronized,
3754 * i.e., the write operation into the global matrix, however, is executed by
3755 * only one thread at a time in the prescribed order. Of course, this only
3756 * holds for the parallelization on a single MPI process. Different MPI
3759 * distributed calculation, some data will accumulate at degrees of freedom
3762 * instead is that the Trilinos sparse matrix will keep that data and send
3764 * <code>compress()</code> command.
3765 *
3766 * @code
3767 *   template <int dim>
3769 *   {
3771 *  
3772 *   const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
3773 *  
3774 *   using CellFilter =
3776 *  
3777 *   auto worker =
3778 *   [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
3779 *   Assembly::Scratch::StokesPreconditioner<dim> & scratch,
3780 *   Assembly::CopyData::StokesPreconditioner<dim> & data) {
3781 *   this->local_assemble_stokes_preconditioner(cell, scratch, data);
3782 *   };
3783 *  
3784 *   auto copier =
3785 *   [this](const Assembly::CopyData::StokesPreconditioner<dim> &data) {
3787 *   };
3788 *  
3790 *   stokes_dof_handler.begin_active()),
3793 *   worker,
3794 *   copier,
3795 *   Assembly::Scratch::StokesPreconditioner<dim>(
3796 *   stokes_fe,
3798 *   mapping,
3800 *   Assembly::CopyData::StokesPreconditioner<dim>(stokes_fe));
3801 *  
3803 *   }
3804 *  
3805 *  
3806 *  
3807 * @endcode
3808 *
3809 * The final function in this block initiates assembly of the Stokes
3810 * preconditioner matrix and then in fact builds the Stokes
3811 * preconditioner. It is mostly the same as in the serial case. The only
3812 * difference to @ref step_31 "step-31" is that we use a Jacobi preconditioner for the
3814 *
3815 * @code
3816 *   template <int dim>
3818 *   {
3819 *   if (rebuild_stokes_preconditioner == false)
3820 *   return;
3821 *  
3823 *   " Build Stokes preconditioner");
3824 *   pcout << " Rebuilding Stokes preconditioner..." << std::flush;
3825 *  
3827 *  
3828 *   std::vector<std::vector<bool>> constant_modes;
3831 *   stokes_fe.component_mask(
3833 *   constant_modes);
3834 *  
3836 *   std::make_shared<TrilinosWrappers::PreconditionJacobi>();
3837 *   Amg_preconditioner = std::make_shared<TrilinosWrappers::PreconditionAMG>();
3838 *  
3840 *   Amg_data.constant_modes = constant_modes;
3841 *   Amg_data.elliptic = true;
3842 *   Amg_data.higher_order_elements = true;
3843 *   Amg_data.smoother_sweeps = 2;
3844 *   Amg_data.aggregation_threshold = 0.02;
3845 *  
3846 *   Mp_preconditioner->initialize(stokes_preconditioner_matrix.block(1, 1));
3847 *   Amg_preconditioner->initialize(stokes_preconditioner_matrix.block(0, 0),
3848 *   Amg_data);
3849 *  
3851 *  
3852 *   pcout << std::endl;
3853 *   }
3854 *  
3855 *  
3856 * @endcode
3857 *
3858 *
3859 * <a name="Stokessystemassembly"></a>
3861 *
3862
3863 *
3866 * the local data into the global matrix and vector, and one for actually
3867 * running the loop over all cells with the help of the WorkStream
3869 * in case we have changed the mesh. Otherwise, just the
3871 * here. Since we are working with distributed matrices and vectors, we have
3873 * the assembly in order to send non-local data to the owner process.
3874 *
3875 * @code
3876 *   template <int dim>
3878 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
3879 *   Assembly::Scratch::StokesSystem<dim> & scratch,
3880 *   Assembly::CopyData::StokesSystem<dim> & data)
3881 *   {
3882 *   const unsigned int dofs_per_cell =
3883 *   scratch.stokes_fe_values.get_fe().n_dofs_per_cell();
3884 *   const unsigned int n_q_points =
3885 *   scratch.stokes_fe_values.n_quadrature_points;
3886 *  
3889 *  
3890 *   scratch.stokes_fe_values.reinit(cell);
3891 *  
3893 *   cell->as_dof_handler_iterator(temperature_dof_handler);
3894 *   scratch.temperature_fe_values.reinit(temperature_cell);
3895 *  
3897 *   data.local_matrix = 0;
3898 *   data.local_rhs = 0;
3899 *  
3900 *   scratch.temperature_fe_values.get_function_values(
3901 *   old_temperature_solution, scratch.old_temperature_values);
3902 *  
3903 *   for (unsigned int q = 0; q < n_q_points; ++q)
3904 *   {
3905 *   const double old_temperature = scratch.old_temperature_values[q];
3906 *  
3907 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
3908 *   {
3909 *   scratch.phi_u[k] = scratch.stokes_fe_values[velocities].value(k, q);
3911 *   {
3912 *   scratch.grads_phi_u[k] =
3913 *   scratch.stokes_fe_values[velocities].symmetric_gradient(k, q);
3914 *   scratch.div_phi_u[k] =
3915 *   scratch.stokes_fe_values[velocities].divergence(k, q);
3916 *   scratch.phi_p[k] =
3917 *   scratch.stokes_fe_values[pressure].value(k, q);
3918 *   }
3919 *   }
3920 *  
3921 *   if (rebuild_stokes_matrix == true)
3922 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
3923 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
3924 *   data.local_matrix(i, j) +=
3925 *   (EquationData::eta * 2 *
3926 *   (scratch.grads_phi_u[i] * scratch.grads_phi_u[j]) -
3927 *   (EquationData::pressure_scaling * scratch.div_phi_u[i] *
3928 *   scratch.phi_p[j]) -
3929 *   (EquationData::pressure_scaling * scratch.phi_p[i] *
3930 *   scratch.div_phi_u[j])) *
3931 *   scratch.stokes_fe_values.JxW(q);
3932 *  
3934 *   scratch.stokes_fe_values.quadrature_point(q));
3935 *  
3936 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
3937 *   data.local_rhs(i) += (EquationData::density(old_temperature) *
3938 *   gravity * scratch.phi_u[i]) *
3939 *   scratch.stokes_fe_values.JxW(q);
3940 *   }
3941 *  
3942 *   cell->get_dof_indices(data.local_dof_indices);
3943 *   }
3944 *  
3945 *  
3946 *  
3947 *   template <int dim>
3949 *   const Assembly::CopyData::StokesSystem<dim> &data)
3950 *   {
3951 *   if (rebuild_stokes_matrix == true)
3952 *   stokes_constraints.distribute_local_to_global(data.local_matrix,
3953 *   data.local_rhs,
3954 *   data.local_dof_indices,
3955 *   stokes_matrix,
3956 *   stokes_rhs);
3957 *   else
3958 *   stokes_constraints.distribute_local_to_global(data.local_rhs,
3959 *   data.local_dof_indices,
3960 *   stokes_rhs);
3961 *   }
3962 *  
3963 *  
3964 *  
3965 *   template <int dim>
3967 *   {
3969 *   " Assemble Stokes system");
3970 *  
3971 *   if (rebuild_stokes_matrix == true)
3972 *   stokes_matrix = 0;
3973 *  
3974 *   stokes_rhs = 0;
3975 *  
3976 *   const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
3977 *  
3978 *   using CellFilter =
3980 *  
3983 *   stokes_dof_handler.begin_active()),
3985 *   [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
3986 *   Assembly::Scratch::StokesSystem<dim> & scratch,
3987 *   Assembly::CopyData::StokesSystem<dim> & data) {
3988 *   this->local_assemble_stokes_system(cell, scratch, data);
3989 *   },
3990 *   [this](const Assembly::CopyData::StokesSystem<dim> &data) {
3991 *   this->copy_local_to_global_stokes_system(data);
3992 *   },
3993 *   Assembly::Scratch::StokesSystem<dim>(
3994 *   stokes_fe,
3995 *   mapping,
4000 *   update_values),
4001 *   Assembly::CopyData::StokesSystem<dim>(stokes_fe));
4002 *  
4003 *   if (rebuild_stokes_matrix == true)
4006 *  
4007 *   rebuild_stokes_matrix = false;
4008 *  
4009 *   pcout << std::endl;
4010 *   }
4011 *  
4012 *  
4013 * @endcode
4014 *
4015 *
4016 * <a name="Temperaturematrixassembly"></a>
4018 *
4019
4020 *
4021 * The task to be performed by the next three functions is to calculate a
4024 * consists of the mass matrix plus a time step-dependent weight factor
4025 * times the Laplace matrix. This function is again essentially the body of
4026 * the loop over all cells from @ref step_31 "step-31".
4027 *
4028
4029 *
4031 *
4032 * @code
4033 *   template <int dim>
4035 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
4036 *   Assembly::Scratch::TemperatureMatrix<dim> & scratch,
4037 *   Assembly::CopyData::TemperatureMatrix<dim> & data)
4038 *   {
4039 *   const unsigned int dofs_per_cell =
4040 *   scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
4041 *   const unsigned int n_q_points =
4042 *   scratch.temperature_fe_values.n_quadrature_points;
4043 *  
4044 *   scratch.temperature_fe_values.reinit(cell);
4045 *   cell->get_dof_indices(data.local_dof_indices);
4046 *  
4047 *   data.local_mass_matrix = 0;
4048 *   data.local_stiffness_matrix = 0;
4049 *  
4050 *   for (unsigned int q = 0; q < n_q_points; ++q)
4051 *   {
4052 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
4053 *   {
4054 *   scratch.grad_phi_T[k] =
4055 *   scratch.temperature_fe_values.shape_grad(k, q);
4056 *   scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
4057 *   }
4058 *  
4059 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
4060 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
4061 *   {
4062 *   data.local_mass_matrix(i, j) +=
4063 *   (scratch.phi_T[i] * scratch.phi_T[j] *
4064 *   scratch.temperature_fe_values.JxW(q));
4065 *   data.local_stiffness_matrix(i, j) +=
4066 *   (EquationData::kappa * scratch.grad_phi_T[i] *
4067 *   scratch.grad_phi_T[j] * scratch.temperature_fe_values.JxW(q));
4068 *   }
4069 *   }
4070 *   }
4071 *  
4072 *  
4073 *  
4074 *   template <int dim>
4076 *   const Assembly::CopyData::TemperatureMatrix<dim> &data)
4077 *   {
4078 *   temperature_constraints.distribute_local_to_global(data.local_mass_matrix,
4079 *   data.local_dof_indices,
4081 *   temperature_constraints.distribute_local_to_global(
4082 *   data.local_stiffness_matrix,
4083 *   data.local_dof_indices,
4085 *   }
4086 *  
4087 *  
4088 *   template <int dim>
4090 *   {
4091 *   if (rebuild_temperature_matrices == false)
4092 *   return;
4093 *  
4095 *   " Assemble temperature matrices");
4098 *  
4099 *   const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
4100 *  
4101 *   using CellFilter =
4103 *  
4106 *   temperature_dof_handler.begin_active()),
4109 *   [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
4110 *   Assembly::Scratch::TemperatureMatrix<dim> & scratch,
4111 *   Assembly::CopyData::TemperatureMatrix<dim> & data) {
4112 *   this->local_assemble_temperature_matrix(cell, scratch, data);
4113 *   },
4114 *   [this](const Assembly::CopyData::TemperatureMatrix<dim> &data) {
4115 *   this->copy_local_to_global_temperature_matrix(data);
4116 *   },
4117 *   Assembly::Scratch::TemperatureMatrix<dim>(temperature_fe,
4118 *   mapping,
4120 *   Assembly::CopyData::TemperatureMatrix<dim>(temperature_fe));
4121 *  
4124 *  
4127 *   }
4128 *  
4129 *  
4130 * @endcode
4131 *
4132 *
4133 * <a name="Temperaturerighthandsideassembly"></a>
4134 * <h5>Temperature right hand side assembly</h5>
4135 *
4136
4137 *
4138 * This is the last assembly function. It calculates the right hand side of
4141 * the quadrature points (which are necessary for calculating the artificial
4144 * having inhomogeneous boundary conditions, by just making a right hand
4145 * side at this point (compare the comments for the <code>project()</code>
4146 * function above): We create some matrix columns with exactly the values
4149 * balance of the right hand side vector with the matrix system of
4150 * temperature.
4151 *
4152 * @code
4153 *   template <int dim>
4155 *   const std::pair<double, double> global_T_range,
4156 *   const double global_max_velocity,
4158 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
4159 *   Assembly::Scratch::TemperatureRHS<dim> & scratch,
4160 *   Assembly::CopyData::TemperatureRHS<dim> & data)
4161 *   {
4163 *  
4164 *   const unsigned int dofs_per_cell =
4165 *   scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
4166 *   const unsigned int n_q_points =
4167 *   scratch.temperature_fe_values.n_quadrature_points;
4168 *  
4170 *  
4171 *   data.local_rhs = 0;
4172 *   data.matrix_for_bc = 0;
4173 *   cell->get_dof_indices(data.local_dof_indices);
4174 *  
4175 *   scratch.temperature_fe_values.reinit(cell);
4176 *  
4178 *   cell->as_dof_handler_iterator(stokes_dof_handler);
4179 *   scratch.stokes_fe_values.reinit(stokes_cell);
4180 *  
4181 *   scratch.temperature_fe_values.get_function_values(
4182 *   old_temperature_solution, scratch.old_temperature_values);
4183 *   scratch.temperature_fe_values.get_function_values(
4184 *   old_old_temperature_solution, scratch.old_old_temperature_values);
4185 *  
4186 *   scratch.temperature_fe_values.get_function_gradients(
4187 *   old_temperature_solution, scratch.old_temperature_grads);
4188 *   scratch.temperature_fe_values.get_function_gradients(
4189 *   old_old_temperature_solution, scratch.old_old_temperature_grads);
4190 *  
4191 *   scratch.temperature_fe_values.get_function_laplacians(
4192 *   old_temperature_solution, scratch.old_temperature_laplacians);
4193 *   scratch.temperature_fe_values.get_function_laplacians(
4194 *   old_old_temperature_solution, scratch.old_old_temperature_laplacians);
4195 *  
4196 *   scratch.stokes_fe_values[velocities].get_function_values(
4197 *   stokes_solution, scratch.old_velocity_values);
4198 *   scratch.stokes_fe_values[velocities].get_function_values(
4199 *   old_stokes_solution, scratch.old_old_velocity_values);
4200 *   scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
4201 *   stokes_solution, scratch.old_strain_rates);
4202 *   scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
4203 *   old_stokes_solution, scratch.old_old_strain_rates);
4204 *  
4205 *   const double nu =
4206 *   compute_viscosity(scratch.old_temperature_values,
4207 *   scratch.old_old_temperature_values,
4208 *   scratch.old_temperature_grads,
4209 *   scratch.old_old_temperature_grads,
4210 *   scratch.old_temperature_laplacians,
4211 *   scratch.old_old_temperature_laplacians,
4212 *   scratch.old_velocity_values,
4213 *   scratch.old_old_velocity_values,
4214 *   scratch.old_strain_rates,
4215 *   scratch.old_old_strain_rates,
4217 *   global_T_range.second - global_T_range.first,
4218 *   0.5 * (global_T_range.second + global_T_range.first),
4220 *   cell->diameter());
4221 *  
4222 *   for (unsigned int q = 0; q < n_q_points; ++q)
4223 *   {
4224 *   for (unsigned int k = 0; k < dofs_per_cell; ++k)
4225 *   {
4226 *   scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
4227 *   scratch.grad_phi_T[k] =
4228 *   scratch.temperature_fe_values.shape_grad(k, q);
4229 *   }
4230 *  
4231 *  
4232 *   const double T_term_for_rhs =
4233 *   (use_bdf2_scheme ?
4234 *   (scratch.old_temperature_values[q] *
4235 *   (1 + time_step / old_time_step) -
4236 *   scratch.old_old_temperature_values[q] * (time_step * time_step) /
4238 *   scratch.old_temperature_values[q]);
4239 *  
4240 *   const double ext_T =
4241 *   (use_bdf2_scheme ? (scratch.old_temperature_values[q] *
4242 *   (1 + time_step / old_time_step) -
4243 *   scratch.old_old_temperature_values[q] *
4245 *   scratch.old_temperature_values[q]);
4246 *  
4247 *   const Tensor<1, dim> ext_grad_T =
4248 *   (use_bdf2_scheme ? (scratch.old_temperature_grads[q] *
4249 *   (1 + time_step / old_time_step) -
4250 *   scratch.old_old_temperature_grads[q] * time_step /
4251 *   old_time_step) :
4252 *   scratch.old_temperature_grads[q]);
4253 *  
4255 *   (use_bdf2_scheme ?
4256 *   (scratch.old_velocity_values[q] * (1 + time_step / old_time_step) -
4257 *   scratch.old_old_velocity_values[q] * time_step / old_time_step) :
4258 *   scratch.old_velocity_values[q]);
4259 *  
4261 *   (use_bdf2_scheme ?
4262 *   (scratch.old_strain_rates[q] * (1 + time_step / old_time_step) -
4263 *   scratch.old_old_strain_rates[q] * time_step / old_time_step) :
4264 *   scratch.old_strain_rates[q]);
4265 *  
4266 *   const double gamma =
4271 *  
4272 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
4273 *   {
4274 *   data.local_rhs(i) +=
4275 *   (T_term_for_rhs * scratch.phi_T[i] -
4276 *   time_step * extrapolated_u * ext_grad_T * scratch.phi_T[i] -
4277 *   time_step * nu * ext_grad_T * scratch.grad_phi_T[i] +
4278 *   time_step * gamma * scratch.phi_T[i]) *
4279 *   scratch.temperature_fe_values.JxW(q);
4280 *  
4281 *   if (temperature_constraints.is_inhomogeneously_constrained(
4282 *   data.local_dof_indices[i]))
4283 *   {
4284 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
4285 *   data.matrix_for_bc(j, i) +=
4286 *   (scratch.phi_T[i] * scratch.phi_T[j] *
4288 *   (time_step + old_time_step)) :
4289 *   1.) +
4290 *   scratch.grad_phi_T[i] * scratch.grad_phi_T[j] *
4292 *   scratch.temperature_fe_values.JxW(q);
4293 *   }
4294 *   }
4295 *   }
4296 *   }
4297 *  
4298 *  
4299 *   template <int dim>
4301 *   const Assembly::CopyData::TemperatureRHS<dim> &data)
4302 *   {
4303 *   temperature_constraints.distribute_local_to_global(data.local_rhs,
4304 *   data.local_dof_indices,
4306 *   data.matrix_for_bc);
4307 *   }
4308 *  
4309 *  
4310 *  
4311 * @endcode
4312 *
4314 * right hand side, we also generate the final matrix. As mentioned above,
4315 * it is a sum of the mass matrix and the Laplace matrix, times some time
4316 * step-dependent weight. This weight is specified by the BDF-2 time
4317 * integration scheme, see the introduction in @ref step_31 "step-31". What is new in this
4320 * preconditioner as well. The reason is that the setup of the Jacobi
4321 * preconditioner takes a noticeable time compared to the solver because we
4324 * consists of a diagonal, but in Trilinos it is derived from more general
4325 * framework for point relaxation preconditioners which is a bit
4327 * preconditioner, even though the matrix entries may slightly change
4329 * we remesh every few time steps (and regenerate the preconditioner then).
4330 *
4331 * @code
4332 *   template <int dim>
4334 *   const double maximal_velocity)
4335 *   {
4336 *   const bool use_bdf2_scheme = (timestep_number != 0);
4337 *  
4338 *   if (use_bdf2_scheme == true)
4339 *   {
4344 *   }
4345 *   else
4346 *   {
4349 *   }
4350 *  
4352 *   {
4354 *   std::make_shared<TrilinosWrappers::PreconditionJacobi>();
4355 *   T_preconditioner->initialize(temperature_matrix);
4357 *   }
4358 *  
4359 * @endcode
4360 *
4361 * The next part is computing the right hand side vectors. To do so, we
4362 * first compute the average temperature @f$T_m@f$ that we use for evaluating
4363 * the artificial viscosity stabilization through the residual @f$E(T) =
4364 * (T-T_m)^2@f$. We do this by defining the midpoint between maximum and
4366 * entropy viscosity. An alternative would be to use the integral average,
4367 * but the results are not very sensitive to this choice. The rest then
4368 * only requires calling WorkStream::run again, binding the arguments to
4370 * same in every call to the correct values:
4371 *
4372 * @code
4373 *   temperature_rhs = 0;
4374 *  
4375 *   const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
4376 *   const std::pair<double, double> global_T_range =
4378 *  
4379 *   const double average_temperature =
4380 *   0.5 * (global_T_range.first + global_T_range.second);
4381 *   const double global_entropy_variation =
4383 *  
4384 *   using CellFilter =
4386 *  
4387 *   auto worker =
4389 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
4390 *   Assembly::Scratch::TemperatureRHS<dim> & scratch,
4391 *   Assembly::CopyData::TemperatureRHS<dim> & data) {
4392 *   this->local_assemble_temperature_rhs(global_T_range,
4395 *   cell,
4396 *   scratch,
4397 *   data);
4398 *   };
4399 *  
4400 *   auto copier = [this](const Assembly::CopyData::TemperatureRHS<dim> &data) {
4402 *   };
4403 *  
4405 *   temperature_dof_handler.begin_active()),
4408 *   worker,
4409 *   copier,
4410 *   Assembly::Scratch::TemperatureRHS<dim>(
4412 *   Assembly::CopyData::TemperatureRHS<dim>(temperature_fe));
4413 *  
4415 *   }
4416 *  
4417 *  
4418 *  
4419 * @endcode
4420 *
4421 *
4422 * <a name="BoussinesqFlowProblemsolve"></a>
4423 * <h4>BoussinesqFlowProblem::solve</h4>
4424 *
4425
4426 *
4427 * This function solves the linear systems in each time step of the
4430 * function in @ref step_31 "step-31". However, there are a few changes here.
4431 *
4432
4433 *
4434 * The first change is related to the way we store our solution: we keep the
4435 * vectors with locally owned degrees of freedom plus ghost nodes on each
4436 * MPI node. When we enter a solver which is supposed to perform
4437 * matrix-vector products with a distributed matrix, this is not the
4438 * appropriate form, though. There, we will want to have the solution vector
4439 * to be distributed in the same way as the matrix, i.e. without any
4440 * ghosts. So what we do first is to generate a distributed vector called
4442 * dofs into that, which is neatly done by the <code>operator=</code> of the
4443 * Trilinos vector.
4444 *
4445
4446 *
4447 * Next, we scale the pressure solution (or rather, the initial guess) for
4448 * the solver so that it matches with the length scales in the matrices, as
4450 * solution back to the correct units after the solution is completed. We
4452 * also did in @ref step_31 "step-31" in order not to disturb the Schur complement by some
4453 * vector entries that actually are irrelevant during the solve stage. As a
4454 * difference to @ref step_31 "step-31", here we do it only for the locally owned pressure
4455 * dofs. After solving for the Stokes solution, each processor copies the
4456 * distributed solution back into the solution vector that also includes
4457 * ghost elements.
4458 *
4459
4460 *
4462 * Stokes solver: A fast solver that sometimes breaks down, and a robust
4463 * solver that is slower. This is what we already discussed in the
4465 * with the fast solver based on the simple preconditioner based on the AMG
4466 * V-cycle instead of an approximate solve (this is indicated by the
4469 * converge, everything is fine. If we do not converge, the solver control
4470 * object will throw an exception SolverControl::NoConvergence. Usually,
4471 * this would abort the program because we don't catch them in our usual
4472 * <code>solve()</code> functions. This is certainly not what we want to
4473 * happen here. Rather, we want to switch to the strong solver and continue
4474 * the solution process with whatever vector we got so far. Hence, we catch
4475 * the exception with the C++ try/catch mechanism. We then simply go through
4476 * the same solver sequence again in the <code>catch</code> clause, this
4477 * time passing the @p true flag to the preconditioner for the strong
4478 * solver, signaling an approximate CG solve.
4479 *
4480 * @code
4481 *   template <int dim>
4482 *   void BoussinesqFlowProblem<dim>::solve()
4483 *   {
4484 *   {
4485 *   TimerOutput::Scope timer_section(computing_timer,
4486 *   " Solve Stokes system");
4487 *  
4488 *   pcout << " Solving Stokes system... " << std::flush;
4489 *  
4490 *   TrilinosWrappers::MPI::BlockVector distributed_stokes_solution(
4491 *   stokes_rhs);
4492 *   distributed_stokes_solution = stokes_solution;
4493 *  
4494 *   distributed_stokes_solution.block(1) /= EquationData::pressure_scaling;
4495 *  
4496 *   const unsigned int
4497 *   start = (distributed_stokes_solution.block(0).size() +
4498 *   distributed_stokes_solution.block(1).local_range().first),
4499 *   end = (distributed_stokes_solution.block(0).size() +
4500 *   distributed_stokes_solution.block(1).local_range().second);
4501 *   for (unsigned int i = start; i < end; ++i)
4502 *   if (stokes_constraints.is_constrained(i))
4503 *   distributed_stokes_solution(i) = 0;
4504 *  
4505 *  
4506 *   PrimitiveVectorMemory<TrilinosWrappers::MPI::BlockVector> mem;
4507 *  
4508 *   unsigned int n_iterations = 0;
4509 *   const double solver_tolerance = 1e-8 * stokes_rhs.l2_norm();
4510 *   SolverControl solver_control(30, solver_tolerance);
4511 *  
4512 *   try
4513 *   {
4514 *   const LinearSolvers::BlockSchurPreconditioner<
4515 *   TrilinosWrappers::PreconditionAMG,
4516 *   TrilinosWrappers::PreconditionJacobi>
4517 *   preconditioner(stokes_matrix,
4518 *   stokes_preconditioner_matrix,
4519 *   *Mp_preconditioner,
4520 *   *Amg_preconditioner,
4521 *   false);
4522 *  
4523 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
4524 *   solver_control,
4525 *   mem,
4526 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
4527 *   30));
4528 *   solver.solve(stokes_matrix,
4529 *   distributed_stokes_solution,
4530 *   stokes_rhs,
4531 *   preconditioner);
4532 *  
4533 *   n_iterations = solver_control.last_step();
4534 *   }
4535 *  
4536 *   catch (SolverControl::NoConvergence &)
4537 *   {
4538 *   const LinearSolvers::BlockSchurPreconditioner<
4539 *   TrilinosWrappers::PreconditionAMG,
4540 *   TrilinosWrappers::PreconditionJacobi>
4541 *   preconditioner(stokes_matrix,
4542 *   stokes_preconditioner_matrix,
4543 *   *Mp_preconditioner,
4544 *   *Amg_preconditioner,
4545 *   true);
4546 *  
4547 *   SolverControl solver_control_refined(stokes_matrix.m(),
4548 *   solver_tolerance);
4549 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
4550 *   solver_control_refined,
4551 *   mem,
4552 *   SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
4553 *   50));
4554 *   solver.solve(stokes_matrix,
4555 *   distributed_stokes_solution,
4556 *   stokes_rhs,
4557 *   preconditioner);
4558 *  
4559 *   n_iterations =
4560 *   (solver_control.last_step() + solver_control_refined.last_step());
4561 *   }
4562 *  
4563 *  
4564 *   stokes_constraints.distribute(distributed_stokes_solution);
4565 *  
4566 *   distributed_stokes_solution.block(1) *= EquationData::pressure_scaling;
4567 *  
4568 *   stokes_solution = distributed_stokes_solution;
4569 *   pcout << n_iterations << " iterations." << std::endl;
4570 *   }
4571 *  
4572 *  
4573 * @endcode
4574 *
4575 * Now let's turn to the temperature part: First, we compute the time step
4576 * size. We found that we need smaller time steps for 3d than for 2d for
4577 * the shell geometry. This is because the cells are more distorted in
4578 * that case (it is the smallest edge length that determines the CFL
4579 * number). Instead of computing the time step from maximum velocity and
4580 * minimal mesh size as in @ref step_31 "step-31", we compute local CFL numbers, i.e., on
4581 * each cell we compute the maximum velocity times the mesh size, and
4582 * compute the maximum of them. Hence, we need to choose the factor in
4583 * front of the time step slightly smaller.
4584 *
4585
4586 *
4587 * After temperature right hand side assembly, we solve the linear system
4588 * for temperature (with fully distributed vectors without any ghosts),
4589 * apply constraints and copy the vector back to one with ghosts.
4590 *
4591
4592 *
4593 * In the end, we extract the temperature range similarly to @ref step_31 "step-31" to
4594 * produce some output (for example in order to help us choose the
4596 * difference is that we need to exchange maxima over all processors.
4597 *
4598 * @code
4599 *   {
4601 *   " Assemble temperature rhs");
4602 *  
4604 *  
4605 *   const double scaling = (dim == 3 ? 0.25 : 1.0);
4606 *   time_step = (scaling / (2.1 * dim * std::sqrt(1. * dim)) /
4607 *   (parameters.temperature_degree * get_cfl_number()));
4608 *  
4609 *   const double maximal_velocity = get_maximal_velocity();
4610 *   pcout << " Maximal velocity: "
4612 *   << " cm/year" << std::endl;
4613 *   pcout << " "
4614 *   << "Time step: " << time_step / EquationData::year_in_seconds
4615 *   << " years" << std::endl;
4616 *  
4619 *   }
4620 *  
4621 *   {
4623 *   " Solve temperature system");
4624 *  
4625 *   SolverControl solver_control(temperature_matrix.m(),
4626 *   1e-12 * temperature_rhs.l2_norm());
4627 *   SolverCG<TrilinosWrappers::MPI::Vector> cg(solver_control);
4628 *  
4632 *  
4633 *   cg.solve(temperature_matrix,
4636 *   *T_preconditioner);
4637 *  
4640 *  
4641 *   pcout << " " << solver_control.last_step()
4642 *   << " CG iterations for temperature" << std::endl;
4643 *  
4644 *   double temperature[2] = {std::numeric_limits<double>::max(),
4645 *   -std::numeric_limits<double>::max()};
4646 *   double global_temperature[2];
4647 *  
4648 *   for (unsigned int i =
4649 *   distributed_temperature_solution.local_range().first;
4650 *   i < distributed_temperature_solution.local_range().second;
4651 *   ++i)
4652 *   {
4653 *   temperature[0] =
4654 *   std::min<double>(temperature[0],
4656 *   temperature[1] =
4657 *   std::max<double>(temperature[1],
4659 *   }
4660 *  
4661 *   temperature[0] *= -1.0;
4663 *   global_temperature[0] *= -1.0;
4664 *  
4665 *   pcout << " Temperature range: " << global_temperature[0] << ' '
4666 *   << global_temperature[1] << std::endl;
4667 *   }
4668 *   }
4669 *  
4670 *  
4671 * @endcode
4672 *
4673 *
4674 * <a name="BoussinesqFlowProblemoutput_results"></a>
4676 *
4677
4678 *
4679 * Next comes the function that generates the output. The quantities to
4680 * output could be introduced manually like we did in @ref step_31 "step-31". An
4683 * DataOut. This allows us to output derived quantities from the solution,
4685 * virtual function DataPostprocessor::evaluate_vector_field(),
4687 * give it values of the numerical solution, its derivatives, normals to the
4688 * cell, the actual evaluation points and any additional quantities. This
4689 * follows the same procedure as discussed in @ref step_29 "step-29" and other programs.
4690 *
4691 * @code
4692 *   template <int dim>
4694 *   : public DataPostprocessor<dim>
4695 *   {
4696 *   public:
4697 *   Postprocessor(const unsigned int partition, const double minimal_pressure);
4698 *  
4699 *   virtual void evaluate_vector_field(
4701 *   std::vector<Vector<double>> &computed_quantities) const override;
4702 *  
4703 *   virtual std::vector<std::string> get_names() const override;
4704 *  
4705 *   virtual std::vector<
4707 *   get_data_component_interpretation() const override;
4708 *  
4709 *   virtual UpdateFlags get_needed_update_flags() const override;
4710 *  
4711 *   private:
4712 *   const unsigned int partition;
4713 *   const double minimal_pressure;
4714 *   };
4715 *  
4716 *  
4717 *   template <int dim>
4719 *   const unsigned int partition,
4720 *   const double minimal_pressure)
4721 *   : partition(partition)
4723 *   {}
4724 *  
4725 *  
4726 * @endcode
4727 *
4728 * Here we define the names for the variables we want to output. These are
4733 * all other quantities are scalar.
4734 *
4735 * @code
4736 *   template <int dim>
4737 *   std::vector<std::string>
4739 *   {
4740 *   std::vector<std::string> solution_names(dim, "velocity");
4741 *   solution_names.emplace_back("p");
4742 *   solution_names.emplace_back("T");
4743 *   solution_names.emplace_back("friction_heating");
4744 *   solution_names.emplace_back("partition");
4745 *  
4746 *   return solution_names;
4747 *   }
4748 *  
4749 *  
4750 *   template <int dim>
4751 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
4753 *   const
4754 *   {
4755 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
4756 *   interpretation(dim,
4758 *  
4763 *  
4764 *   return interpretation;
4765 *   }
4766 *  
4767 *  
4768 *   template <int dim>
4769 *   UpdateFlags
4771 *   {
4773 *   }
4774 *  
4775 *  
4776 * @endcode
4777 *
4779 * also did for the output, we rescale the velocity from its SI units to
4785 *
4786
4787 *
4788 * The quantities we output here are more for illustration, rather than for
4789 * actual scientific value. We come back to this briefly in the results
4790 * section of this program and explain what one may in fact be interested in.
4791 *
4792 * @code
4796 *   std::vector<Vector<double>> & computed_quantities) const
4797 *   {
4798 *   const unsigned int n_evaluation_points = inputs.solution_values.size();
4799 *   Assert(inputs.solution_gradients.size() == n_evaluation_points,
4800 *   ExcInternalError());
4802 *   ExcInternalError());
4803 *   Assert(inputs.solution_values[0].size() == dim + 2, ExcInternalError());
4804 *  
4805 *   for (unsigned int p = 0; p < n_evaluation_points; ++p)
4806 *   {
4807 *   for (unsigned int d = 0; d < dim; ++d)
4808 *   computed_quantities[p](d) = (inputs.solution_values[p](d) *
4810 *  
4811 *   const double pressure =
4812 *   (inputs.solution_values[p](dim) - minimal_pressure);
4813 *   computed_quantities[p](dim) = pressure;
4814 *  
4815 *   const double temperature = inputs.solution_values[p](dim + 1);
4816 *   computed_quantities[p](dim + 1) = temperature;
4817 *  
4819 *   for (unsigned int d = 0; d < dim; ++d)
4820 *   grad_u[d] = inputs.solution_gradients[p][d];
4822 *   computed_quantities[p](dim + 2) =
4824 *  
4825 *   computed_quantities[p](dim + 3) = partition;
4826 *   }
4827 *   }
4828 *  
4829 *  
4830 * @endcode
4831 *
4832 * The <code>output_results()</code> function has a similar task to the one
4833 * in @ref step_31 "step-31". However, here we are going to demonstrate a different
4834 * technique on how to merge output from different DoFHandler objects. The
4835 * way we're going to achieve this recombination is to create a joint
4836 * DoFHandler that collects both components, the Stokes solution and the
4837 * temperature solution. This can be nicely done by combining the finite
4838 * elements from the two systems to form one FESystem, and let this
4839 * collective system define a new DoFHandler object. To be sure that
4840 * everything was done correctly, we perform a sanity check that ensures
4841 * that we got all the dofs from both Stokes and temperature even in the
4842 * combined system. We then combine the data vectors. Unfortunately, there
4843 * is no straight-forward relation that tells us how to sort Stokes and
4844 * temperature vector into the joint vector. The way we can get around this
4845 * trouble is to rely on the information collected in the FESystem. For each
4846 * dof on a cell, the joint finite element knows to which equation component
4847 * (velocity component, pressure, or temperature) it belongs – that's the
4848 * information we need! So we step through all cells (with iterators into
4849 * all three DoFHandlers moving in sync), and for each joint cell dof, we
4850 * read out that component using the FiniteElement::system_to_base_index
4851 * function (see there for a description of what the various parts of its
4852 * return value contain). We also need to keep track whether we're on a
4853 * Stokes dof or a temperature dof, which is contained in
4854 * joint_fe.system_to_base_index(i).first.first. Eventually, the dof_indices
4855 * data structures on either of the three systems tell us how the relation
4856 * between global vector and local dofs looks like on the present cell,
4857 * which concludes this tedious work. We make sure that each processor only
4858 * works on the subdomain it owns locally (and not on ghost or artificial
4859 * cells) when building the joint solution vector. The same will then have
4860 * to be done in DataOut::build_patches(), but that function does so
4861 * automatically.
4862 *
4863
4864 *
4865 * What we end up with is a set of patches that we can write using the
4866 * functions in DataOutBase in a variety of output formats. Here, we then
4867 * have to pay attention that what each processor writes is really only its
4868 * own part of the domain, i.e. we will want to write each processor's
4870 * number to the filename when we write the solution. This is not really
4871 * new, we did it similarly in @ref step_40 "step-40". Note that we write in the compressed
4873 * storage.
4874 *
4875
4876 *
4877 * All the rest of the work is done in the PostProcessor class.
4878 *
4879 * @code
4880 *   template <int dim>
4882 *   {
4884 *  
4886 *  
4888 *   joint_dof_handler.distribute_dofs(joint_fe);
4889 *   Assert(joint_dof_handler.n_dofs() ==
4890 *   stokes_dof_handler.n_dofs() + temperature_dof_handler.n_dofs(),
4891 *   ExcInternalError());
4892 *  
4894 *   joint_solution.reinit(joint_dof_handler.locally_owned_dofs(),
4895 *   MPI_COMM_WORLD);
4896 *  
4897 *   {
4898 *   std::vector<types::global_dof_index> local_joint_dof_indices(
4899 *   joint_fe.n_dofs_per_cell());
4900 *   std::vector<types::global_dof_index> local_stokes_dof_indices(
4901 *   stokes_fe.n_dofs_per_cell());
4902 *   std::vector<types::global_dof_index> local_temperature_dof_indices(
4903 *   temperature_fe.n_dofs_per_cell());
4904 *  
4906 *   joint_cell = joint_dof_handler.begin_active(),
4908 *   stokes_cell = stokes_dof_handler.begin_active(),
4909 *   temperature_cell = temperature_dof_handler.begin_active();
4910 *   for (; joint_cell != joint_endc;
4912 *   if (joint_cell->is_locally_owned())
4913 *   {
4914 *   joint_cell->get_dof_indices(local_joint_dof_indices);
4915 *   stokes_cell->get_dof_indices(local_stokes_dof_indices);
4917 *  
4918 *   for (unsigned int i = 0; i < joint_fe.n_dofs_per_cell(); ++i)
4919 *   if (joint_fe.system_to_base_index(i).first.first == 0)
4920 *   {
4921 *   Assert(joint_fe.system_to_base_index(i).second <
4923 *   ExcInternalError());
4924 *  
4926 *   local_stokes_dof_indices[joint_fe.system_to_base_index(i)
4927 *   .second]);
4928 *   }
4929 *   else
4930 *   {
4931 *   Assert(joint_fe.system_to_base_index(i).first.first == 1,
4932 *   ExcInternalError());
4933 *   Assert(joint_fe.system_to_base_index(i).second <
4935 *   ExcInternalError());
4939 *   [joint_fe.system_to_base_index(i).second]);
4940 *   }
4941 *   }
4942 *   }
4943 *  
4945 *  
4951 *   MPI_COMM_WORLD);
4953 *  
4955 *   MPI_COMM_WORLD),
4956 *   stokes_solution.block(1).min());
4957 *  
4958 *   DataOut<dim> data_out;
4959 *   data_out.attach_dof_handler(joint_dof_handler);
4960 *   data_out.add_data_vector(locally_relevant_joint_solution, postprocessor);
4961 *   data_out.build_patches();
4962 *  
4963 *   static int out_index = 0;
4964 *   data_out.write_vtu_with_pvtu_record(
4965 *   "./", "solution", out_index, MPI_COMM_WORLD, 5);
4966 *  
4967 *   out_index++;
4968 *   }
4969 *  
4970 *  
4971 *  
4972 * @endcode
4973 *
4974 *
4975 * <a name="BoussinesqFlowProblemrefine_mesh"></a>
4977 *
4978
4979 *
4980 * This function isn't really new either. Since the <code>setup_dofs</code>
4981 * function that we call in the middle has its own timer section, we split
4982 * timing this function into two sections. It will also allow us to easily
4983 * identify which of the two is more expensive.
4984 *
4985
4986 *
4987 * One thing of note, however, is that we only want to compute error
4988 * indicators on the locally owned subdomain. In order to achieve this, we
4989 * pass one additional argument to the KellyErrorEstimator::estimate
4990 * function. Note that the vector for error estimates is resized to the
4991 * number of active cells present on the current process, which is less than
4992 * the total number of active cells on all processors (but more than the
4993 * number of locally owned active cells); each processor only has a few
4994 * coarse cells around the locally owned ones, as also explained in @ref step_40 "step-40".
4995 *
4996
4997 *
4998 * The local error estimates are then handed to a %parallel version of
4999 * GridRefinement (in namespace parallel::distributed::GridRefinement, see
5000 * also @ref step_40 "step-40") which looks at the errors and finds the cells that need
5001 * refinement by comparing the error values across processors. As in
5002 * @ref step_31 "step-31", we want to limit the maximum grid level. So in case some cells
5003 * have been marked that are already at the finest level, we simply clear
5004 * the refine flags.
5005 *
5006 * @code
5007 *   template <int dim>
5008 *   void
5009 *   BoussinesqFlowProblem<dim>::refine_mesh(const unsigned int max_grid_level)
5010 *   {
5011 *   parallel::distributed::SolutionTransfer<dim, TrilinosWrappers::MPI::Vector>
5012 *   temperature_trans(temperature_dof_handler);
5013 *   parallel::distributed::SolutionTransfer<dim,
5014 *   TrilinosWrappers::MPI::BlockVector>
5015 *   stokes_trans(stokes_dof_handler);
5016 *  
5017 *   {
5018 *   TimerOutput::Scope timer_section(computing_timer,
5019 *   "Refine mesh structure, part 1");
5020 *  
5021 *   Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
5022 *  
5023 *   KellyErrorEstimator<dim>::estimate(
5024 *   temperature_dof_handler,
5025 *   QGauss<dim - 1>(parameters.temperature_degree + 1),
5026 *   std::map<types::boundary_id, const Function<dim> *>(),
5027 *   temperature_solution,
5028 *   estimated_error_per_cell,
5029 *   ComponentMask(),
5030 *   nullptr,
5031 *   0,
5032 *   triangulation.locally_owned_subdomain());
5033 *  
5034 *   parallel::distributed::GridRefinement::refine_and_coarsen_fixed_fraction(
5035 *   triangulation, estimated_error_per_cell, 0.3, 0.1);
5036 *  
5037 *   if (triangulation.n_levels() > max_grid_level)
5038 *   for (typename Triangulation<dim>::active_cell_iterator cell =
5039 *   triangulation.begin_active(max_grid_level);
5040 *   cell != triangulation.end();
5041 *   ++cell)
5042 *   cell->clear_refine_flag();
5043 *  
5044 * @endcode
5045 *
5046 * With all flags marked as necessary, we can then tell the
5047 * parallel::distributed::SolutionTransfer objects to get ready to
5048 * transfer data from one mesh to the next, which they will do when
5049 * notified by
5050 * Triangulation as part of the @p execute_coarsening_and_refinement() call.
5051 * The syntax is similar to the non-%parallel solution transfer (with the
5052 * exception that here a pointer to the vector entries is enough). The
5053 * remainder of the function further down below is then concerned with
5054 * setting up the data structures again after mesh refinement and
5055 * restoring the solution vectors on the new mesh.
5056 *
5057 * @code
5058 *   std::vector<const TrilinosWrappers::MPI::Vector *> x_temperature(2);
5059 *   x_temperature[0] = &temperature_solution;
5060 *   x_temperature[1] = &old_temperature_solution;
5061 *   std::vector<const TrilinosWrappers::MPI::BlockVector *> x_stokes(2);
5062 *   x_stokes[0] = &stokes_solution;
5063 *   x_stokes[1] = &old_stokes_solution;
5064 *  
5065 *   triangulation.prepare_coarsening_and_refinement();
5066 *  
5067 *   temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
5068 *   stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
5069 *  
5070 *   triangulation.execute_coarsening_and_refinement();
5071 *   }
5072 *  
5073 *   setup_dofs();
5074 *  
5075 *   {
5076 *   TimerOutput::Scope timer_section(computing_timer,
5077 *   "Refine mesh structure, part 2");
5078 *  
5079 *   {
5080 *   TrilinosWrappers::MPI::Vector distributed_temp1(temperature_rhs);
5081 *   TrilinosWrappers::MPI::Vector distributed_temp2(temperature_rhs);
5082 *  
5083 *   std::vector<TrilinosWrappers::MPI::Vector *> tmp(2);
5084 *   tmp[0] = &(distributed_temp1);
5085 *   tmp[1] = &(distributed_temp2);
5086 *   temperature_trans.interpolate(tmp);
5087 *  
5088 * @endcode
5089 *
5090 * enforce constraints to make the interpolated solution conforming on
5091 * the new mesh:
5092 *
5093 * @code
5094 *   temperature_constraints.distribute(distributed_temp1);
5095 *   temperature_constraints.distribute(distributed_temp2);
5096 *  
5097 *   temperature_solution = distributed_temp1;
5098 *   old_temperature_solution = distributed_temp2;
5099 *   }
5100 *  
5101 *   {
5102 *   TrilinosWrappers::MPI::BlockVector distributed_stokes(stokes_rhs);
5103 *   TrilinosWrappers::MPI::BlockVector old_distributed_stokes(stokes_rhs);
5104 *  
5105 *   std::vector<TrilinosWrappers::MPI::BlockVector *> stokes_tmp(2);
5106 *   stokes_tmp[0] = &(distributed_stokes);
5107 *   stokes_tmp[1] = &(old_distributed_stokes);
5108 *  
5109 *   stokes_trans.interpolate(stokes_tmp);
5110 *  
5111 * @endcode
5112 *
5113 * enforce constraints to make the interpolated solution conforming on
5114 * the new mesh:
5115 *
5116 * @code
5117 *   stokes_constraints.distribute(distributed_stokes);
5118 *   stokes_constraints.distribute(old_distributed_stokes);
5119 *  
5120 *   stokes_solution = distributed_stokes;
5121 *   old_stokes_solution = old_distributed_stokes;
5122 *   }
5123 *   }
5124 *   }
5125 *  
5126 *  
5127 *  
5128 * @endcode
5129 *
5130 *
5131 * <a name="BoussinesqFlowProblemrun"></a>
5132 * <h4>BoussinesqFlowProblem::run</h4>
5133 *
5134
5135 *
5136 * This is the final and controlling function in this class. It, in fact,
5137 * runs the entire rest of the program and is, once more, very similar to
5138 * @ref step_31 "step-31". The only substantial difference is that we use a different mesh
5139 * now (a GridGenerator::hyper_shell instead of a simple cube geometry).
5140 *
5141 * @code
5142 *   template <int dim>
5143 *   void BoussinesqFlowProblem<dim>::run()
5144 *   {
5145 *   GridGenerator::hyper_shell(triangulation,
5146 *   Point<dim>(),
5147 *   EquationData::R0,
5148 *   EquationData::R1,
5149 *   (dim == 3) ? 96 : 12,
5150 *   true);
5151 *  
5152 *   global_Omega_diameter = GridTools::diameter(triangulation);
5153 *  
5154 *   triangulation.refine_global(parameters.initial_global_refinement);
5155 *  
5156 *   setup_dofs();
5157 *  
5158 *   unsigned int pre_refinement_step = 0;
5159 *  
5160 *   start_time_iteration:
5161 *  
5162 *   {
5163 *   TrilinosWrappers::MPI::Vector solution(
5164 *   temperature_dof_handler.locally_owned_dofs());
5165 * @endcode
5166 *
5167 * VectorTools::project supports parallel vector classes with most
5168 * standard finite elements via deal.II's own native MatrixFree framework:
5169 * since we use standard Lagrange elements of moderate order this function
5170 * works well here.
5171 *
5172 * @code
5175 *   QGauss<dim>(parameters.temperature_degree + 2),
5177 *   solution);
5178 * @endcode
5179 *
5183 * thing we will do is to compute the Stokes solution that only requires
5184 * the previous time step's temperature field. That said, nothing good can
5185 * come from not initializing the other vectors as well (especially since
5186 * it's a relatively cheap operation and we only have to do it once at the
5188 * method or physical model, and so we initialize
5191 * sure that the vectors on the left hand side (which where initialized to
5192 * contain ghost elements as well) also get the correct ghost elements. In
5194 * processors:
5195 *
5196 * @code
5197 *   temperature_solution = solution;
5198 *   old_temperature_solution = solution;
5199 *   old_old_temperature_solution = solution;
5200 *   }
5201 *  
5202 *   timestep_number = 0;
5203 *   time_step = old_time_step = 0;
5204 *  
5205 *   double time = 0;
5206 *  
5207 *   do
5208 *   {
5209 *   pcout << "Timestep " << timestep_number
5210 *   << ": t=" << time / EquationData::year_in_seconds << " years"
5211 *   << std::endl;
5212 *  
5216 *  
5217 *   solve();
5218 *  
5219 *   pcout << std::endl;
5220 *  
5221 *   if ((timestep_number == 0) &&
5222 *   (pre_refinement_step < parameters.initial_adaptive_refinement))
5223 *   {
5224 *   refine_mesh(parameters.initial_global_refinement +
5225 *   parameters.initial_adaptive_refinement);
5227 *   goto start_time_iteration;
5228 *   }
5229 *   else if ((timestep_number > 0) &&
5230 *   (timestep_number % parameters.adaptive_refinement_interval ==
5231 *   0))
5232 *   refine_mesh(parameters.initial_global_refinement +
5233 *   parameters.initial_adaptive_refinement);
5234 *  
5235 *   if ((parameters.generate_graphical_output == true) &&
5236 *   (timestep_number % parameters.graphical_output_interval == 0))
5237 *   output_results();
5238 *  
5239 * @endcode
5240 *
5241 * In order to speed up linear solvers, we extrapolate the solutions
5242 * from the old time levels to the new one. This gives a very good
5244 * by more than one half. We do not need to extrapolate in the last
5245 * iteration, so if we reached the final time, we stop here.
5246 *
5247
5248 *
5249 * As the last thing during a time step (before actually bumping up
5250 * the number of the time step), we check whether the current time
5251 * step number is divisible by 100, and if so we let the computing
5252 * timer print a summary of CPU times spent so far.
5253 *
5254 * @code
5255 *   if (time > parameters.end_time * EquationData::year_in_seconds)
5256 *   break;
5257 *  
5263 *   if (old_time_step > 0)
5264 *   {
5265 * @endcode
5266 *
5267 * Trilinos sadd does not like ghost vectors even as input. Copy
5268 * into distributed vectors for now:
5269 *
5270 * @code
5271 *   {
5280 *   }
5281 *   {
5290 *   }
5291 *   }
5292 *  
5293 *   if ((timestep_number > 0) && (timestep_number % 100 == 0))
5294 *   computing_timer.print_summary();
5295 *  
5296 *   time += time_step;
5297 *   ++timestep_number;
5298 *   }
5299 *   while (true);
5300 *  
5301 * @endcode
5302 *
5303 * If we are generating graphical output, do so also for the last time
5304 * step unless we had just done so before we left the do-while loop
5305 *
5306 * @code
5307 *   if ((parameters.generate_graphical_output == true) &&
5308 *   !((timestep_number - 1) % parameters.graphical_output_interval == 0))
5309 *   output_results();
5310 *   }
5311 *   } // namespace Step32
5312 *  
5313 *  
5314 *  
5315 * @endcode
5316 *
5317 *
5318 * <a name="Thecodemaincodefunction"></a>
5319 * <h3>The <code>main</code> function</h3>
5320 *
5321
5322 *
5323 * The main function is short as usual and very similar to the one in
5324 * @ref step_31 "step-31". Since we use a parameter file which is specified as an argument in
5325 * the command line, we have to read it in here and pass it on to the
5326 * Parameters class for parsing. If no filename is given in the command line,
5327 * we simply use the <code>step-32.prm</code> file which is distributed
5329 *
5330
5331 *
5333 * processors at them, the program defaults to 2d. You can get the 3d version
5334 * by changing the constant dimension below to 3.
5335 *
5336 * @code
5337 *   int main(int argc, char *argv[])
5338 *   {
5339 *   try
5340 *   {
5341 *   using namespace Step32;
5342 *   using namespace dealii;
5343 *  
5346 *  
5347 *   std::string parameter_filename;
5348 *   if (argc >= 2)
5349 *   parameter_filename = argv[1];
5350 *   else
5351 *   parameter_filename = "step-32.prm";
5352 *  
5353 *   const int dim = 2;
5356 *   flow_problem.run();
5357 *   }
5358 *   catch (std::exception &exc)
5359 *   {
5360 *   std::cerr << std::endl
5361 *   << std::endl
5362 *   << "----------------------------------------------------"
5363 *   << std::endl;
5364 *   std::cerr << "Exception on processing: " << std::endl
5365 *   << exc.what() << std::endl
5366 *   << "Aborting!" << std::endl
5367 *   << "----------------------------------------------------"
5368 *   << std::endl;
5369 *  
5370 *   return 1;
5371 *   }
5372 *   catch (...)
5373 *   {
5374 *   std::cerr << std::endl
5375 *   << std::endl
5376 *   << "----------------------------------------------------"
5377 *   << std::endl;
5378 *   std::cerr << "Unknown exception!" << std::endl
5379 *   << "Aborting!" << std::endl
5380 *   << "----------------------------------------------------"
5381 *   << std::endl;
5382 *   return 1;
5383 *   }
5384 *  
5385 *   return 0;
5386 *   }
5387 * @endcode
5388<a name="Results"></a><h1>Results</h1>
5389
5390
5392as @ref step_31 "step-31" did, though with an entirely different testcase.
5393
5394
5395<a name="Comparisonofresultswithstep31"></a><h3>Comparison of results with step-31</h3>
5396
5397
5398Before we go to this testcase, however, let us show a few results from a
5400testcase we used in @ref step_31 "step-31", just that we now solve it in parallel and with
5401much higher resolution. We show these results mainly for comparison.
5402
5403Here are two images that show this higher resolution if we choose a 3d
5406<code>n_pre_refinement_steps=4</code>. At the time steps shown, the
5407meshes had around 72,000 and 236,000 cells, for a total of 2,680,000
5408and 8,250,000 degrees of freedom, respectively, more than an order of
5409magnitude more than we had available in @ref step_31 "step-31":
5410
5411<table align="center" class="doxtable">
5412 <tr>
5413 <td>
5414 <img src="https://www.dealii.org/images/steps/developer/step-32.3d.cube.0.png" alt="">
5415 </td>
5416 </tr>
5417 <tr>
5418 <td>
5419 <img src="https://www.dealii.org/images/steps/developer/step-32.3d.cube.1.png" alt="">
5420 </td>
5421 </tr>
5422</table>
5423
5426
5427
5428<a name="Resultsfora2dcircularshelltestcase"></a><h3>Results for a 2d circular shell testcase</h3>
5429
5430
5431Next, we will run @ref step_32 "step-32" with the parameter file in the directory with one
5432change: we increase the final time to 1e9. Here we are using 16 processors. The
5433command to launch is (note that @ref step_32 "step-32".prm is the default):
5434
5435<code>
5436<pre>
5437\$ mpirun -np 16 ./step-32
5438</pre>
5439</code>
5440
5442scheduler, which we won't discuss here. The output will look roughly like
5443this:
5444
5445<code>
5446<pre>
5447\$ mpirun -np 16 ./step-32
5448Number of active cells: 12,288 (on 6 levels)
5449Number of degrees of freedom: 186,624 (99,840+36,864+49,920)
5450
5451Timestep 0: t=0 years
5452
5453 Rebuilding Stokes preconditioner...
5454 Solving Stokes system... 41 iterations.
5455 Maximal velocity: 60.4935 cm/year
5456 Time step: 18166.9 years
5457 17 CG iterations for temperature
5458 Temperature range: 973 4273.16
5459
5460Number of active cells: 15,921 (on 7 levels)
5461Number of degrees of freedom: 252,723 (136,640+47,763+68,320)
5462
5463Timestep 0: t=0 years
5464
5465 Rebuilding Stokes preconditioner...
5466 Solving Stokes system... 50 iterations.
5467 Maximal velocity: 60.3223 cm/year
5468 Time step: 10557.6 years
5469 19 CG iterations for temperature
5470 Temperature range: 973 4273.16
5471
5472Number of active cells: 19,926 (on 8 levels)
5473Number of degrees of freedom: 321,246 (174,312+59,778+87,156)
5474
5475Timestep 0: t=0 years
5476
5477 Rebuilding Stokes preconditioner...
5478 Solving Stokes system... 50 iterations.
5479 Maximal velocity: 57.8396 cm/year
5480 Time step: 5453.78 years
5481 18 CG iterations for temperature
5482 Temperature range: 973 4273.16
5483
5484Timestep 1: t=5453.78 years
5485
5486 Solving Stokes system... 49 iterations.
5487 Maximal velocity: 59.0231 cm/year
5488 Time step: 5345.86 years
5489 18 CG iterations for temperature
5490 Temperature range: 973 4273.16
5491
5492Timestep 2: t=10799.6 years
5493
5494 Solving Stokes system... 24 iterations.
5495 Maximal velocity: 60.2139 cm/year
5496 Time step: 5241.51 years
5497 17 CG iterations for temperature
5498 Temperature range: 973 4273.16
5499
5500[...]
5501
5502Timestep 100: t=272151 years
5503
5504 Solving Stokes system... 21 iterations.
5505 Maximal velocity: 161.546 cm/year
5506 Time step: 1672.96 years
5507 17 CG iterations for temperature
5508 Temperature range: 973 4282.57
5509
5510Number of active cells: 56,085 (on 8 levels)
5511Number of degrees of freedom: 903,408 (490,102+168,255+245,051)
5512
5513
5514
5515+---------------------------------------------+------------+------------+
5516| Total wallclock time elapsed since start | 115s | |
5517| | | |
5518| Section | no. calls | wall time | % of total |
5519+---------------------------------+-----------+------------+------------+
5520| Assemble Stokes system | 103 | 2.82s | 2.5% |
5521| Assemble temperature matrices | 12 | 0.452s | 0.39% |
5522| Assemble temperature rhs | 103 | 11.5s | 10% |
5523| Build Stokes preconditioner | 12 | 2.09s | 1.8% |
5524| Solve Stokes system | 103 | 90.4s | 79% |
5525| Solve temperature system | 103 | 1.53s | 1.3% |
5526| Postprocessing | 3 | 0.532s | 0.46% |
5527| Refine mesh structure, part 1 | 12 | 0.93s | 0.81% |
5528| Refine mesh structure, part 2 | 12 | 0.384s | 0.33% |
5529| Setup dof systems | 13 | 2.96s | 2.6% |
5530+---------------------------------+-----------+------------+------------+
5531
5532[...]
5533
5534+---------------------------------------------+------------+------------+
5535| Total wallclock time elapsed since start | 9.14e+04s | |
5536| | | |
5537| Section | no. calls | wall time | % of total |
5538+---------------------------------+-----------+------------+------------+
5539| Assemble Stokes system | 47045 | 2.05e+03s | 2.2% |
5540| Assemble temperature matrices | 4707 | 310s | 0.34% |
5541| Assemble temperature rhs | 47045 | 8.7e+03s | 9.5% |
5542| Build Stokes preconditioner | 4707 | 1.48e+03s | 1.6% |
5543| Solve Stokes system | 47045 | 7.34e+04s | 80% |
5544| Solve temperature system | 47045 | 1.46e+03s | 1.6% |
5545| Postprocessing | 1883 | 222s | 0.24% |
5546| Refine mesh structure, part 1 | 4706 | 641s | 0.7% |
5547| Refine mesh structure, part 2 | 4706 | 259s | 0.28% |
5548| Setup dof systems | 4707 | 1.86e+03s | 2% |
5549+---------------------------------+-----------+------------+------------+
5550</pre>
5551</code>
5552
5553The simulation terminates when the time reaches the 1 billion years
5554selected in the input file. You can extrapolate from this how long a
5555simulation would take for a different final time (the time step size
5556ultimately settles on somewhere around 20,000 years, so computing for
5557two billion years will take 100,000 time steps, give or take 20%). As
5558can be seen here, we spend most of the compute time in assembling
5559linear systems and &mdash; above all &mdash; in solving Stokes
5560systems.
5561
5562
5563To demonstrate the output we show the output from every 1250th time step here:
5564<table>
5565 <tr>
5566 <td>
5567 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-000.png" alt="">
5568 </td>
5569 <td>
5570 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-050.png" alt="">
5571 </td>
5572 <td>
5573 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-100.png" alt="">
5574 </td>
5575 </tr>
5576 <tr>
5577 <td>
5578 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-150.png" alt="">
5579 </td>
5580 <td>
5581 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-200.png" alt="">
5582 </td>
5583 <td>
5584 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-250.png" alt="">
5585 </td>
5586 </tr>
5587 <tr>
5588 <td>
5589 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-300.png" alt="">
5590 </td>
5591 <td>
5592 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-350.png" alt="">
5593 </td>
5594 <td>
5595 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-400.png" alt="">
5596 </td>
5597 </tr>
5598 <tr>
5599 <td>
5600 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-450.png" alt="">
5601 </td>
5602 <td>
5603 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-500.png" alt="">
5604 </td>
5605 <td>
5606 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-550.png" alt="">
5607 </td>
5608 </tr>
5609 <tr>
5610 <td>
5611 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-time-600.png" alt="">
5612 </td>
5613 <td>
5614 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-cells.png" alt="">
5615 </td>
5616 <td>
5617 <img src="https://www.dealii.org/images/steps/developer/step-32-2d-partition.png" alt="">
5618 </td>
5619 </tr>
5620</table>
5621
5622The last two images show the grid as well as the partitioning of the mesh for
5623the same computation with 16 subdomains and 16 processors. The full dynamics of
5624this simulation are really only visible by looking at an animation, for example
5625the one <a
5626href="https://www.dealii.org/images/steps/developer/step-32-2d-temperature.webm">shown
5627on this site</a>. This image is well worth watching due to its artistic quality
5628and entrancing depiction of the evolution of the magma plumes.
5629
5630If you watch the movie, you'll see that the convection pattern goes
5634stable situation, a few blobs start to separate from the hot boundary
5636dropping down from the outer boundary layer. During this phase, the solution
5639stirring in which all symmetries are lost. This is a pattern that then
5640continues to dominate flow.
5641
5643maximal velocity as a function of time in the simulation:
5644
5645<img src="https://www.dealii.org/images/steps/developer/step-32.2d.t_vs_vmax.png" alt="">
5646
5653
5654
5655<a name="Resultsfora3dsphericalshelltestcase"></a><h3>Results for a 3d spherical shell testcase</h3>
5656
5657
5664%Solver for Problems in Earth's ConvecTion</i>), that is being
5665developed independently of deal.II and that already incorporates some
5666of the extensions discussed below. The following two pictures show
5667isocontours of the temperature and the partition of the domain (along
5668with the mesh) onto 512 processors:
5669
5670<p align="center">
5671<img src="https://www.dealii.org/images/steps/developer/step-32.3d-sphere.solution.png" alt="">
5672
5673<img src="https://www.dealii.org/images/steps/developer/step-32.3d-sphere.partition.png" alt="">
5674</p>
5675
5676
5677<a name="extensions"></a>
5678<a name="Possibilitiesforextensions"></a><h3>Possibilities for extensions</h3>
5679
5680
5681There are many directions in which this program could be extended. As
5682mentioned at the end of the introduction, most of these are under active
5683development in the <i>ASPECT</i> (short for <i>Advanced %Solver for Problems
5684in Earth's ConvecTion</i>) code at the time this tutorial program is being
5687
5688<ul>
5693 everything. Yet, this doesn't match our expectation that things
5694 closer to the earth core should be hotter than closer to the
5695 surface. The reason is that the energy equation we have used does
5696 not include a term that describes adiabatic cooling and heating:
5697 rock, like gas, heats up as you compress it. Consequently, material
5698 that rises up cools adiabatically, and cold material that sinks down
5699 heats adiabatically. The correct temperature equation would
5700 therefore look somewhat like this:
5701 @f{eqnarray*}
5702 \frac{D T}{Dt}
5703 -
5704 \nabla \cdot \kappa \nabla T &=& \gamma + \tau\frac{Dp}{Dt},
5705 @f}
5706 or, expanding the advected derivative @f$\frac{D}{Dt} =
5707 \frac{\partial}{\partial t} + \mathbf u \cdot \nabla@f$:
5708 @f{eqnarray*}
5709 \frac{\partial T}{\partial t}
5710 +
5711 {\mathbf u} \cdot \nabla T
5712 -
5713 \nabla \cdot \kappa \nabla T &=& \gamma +
5714 \tau\left\{\frac{\partial
5715 p}{\partial t} + \mathbf u \cdot \nabla p \right\}.
5716 @f}
5717 In other words, as pressure increases in a rock volume
5718 (@f$\frac{Dp}{Dt}>0@f$) we get an additional heat source, and vice
5719 versa.
5720
5721 The time derivative of the pressure is a bit awkward to
5722 implement. If necessary, one could approximate using the fact
5723 outlined in the introduction that the pressure can be decomposed
5724 into a dynamic component due to temperature differences and the
5725 resulting flow, and a static component that results solely from the
5726 static pressure of the overlying rock. Since the latter is much
5727 bigger, one may approximate @f$p\approx p_{\text{static}}=-\rho_{\text{ref}}
5728 [1+\beta T_{\text{ref}}] \varphi@f$, and consequently
5729 @f$\frac{Dp}{Dt} \approx \left\{- \mathbf u \cdot \nabla \rho_{\text{ref}}
5730 [1+\beta T_{\text{ref}}]\varphi\right\} = \rho_{\text{ref}}
5731 [1+\beta T_{\text{ref}}] \mathbf u \cdot \mathbf g@f$.
5732 In other words, if the fluid is moving in the direction of gravity
5733 (downward) it will be compressed and because in that case @f$\mathbf u
5734 \cdot \mathbf g > 0@f$ we get a positive heat source. Conversely, the
5735 fluid will cool down if it moves against the direction of gravity.
5736
5737<li> <b>Compressibility:</b>
5738 As already hinted at in the temperature model above,
5739 mantle rocks are not incompressible. Rather, given the enormous pressures in
5740 the earth mantle (at the core-mantle boundary, the pressure is approximately
5741 140 GPa, equivalent to 1,400,000 times atmospheric pressure), rock actually
5742 does compress to something around 1.5 times the density it would have
5743 at surface pressure. Modeling this presents any number of
5744 difficulties. Primarily, the mass conservation equation is no longer
5745 @f$\textrm{div}\;\mathbf u=0@f$ but should read
5746 @f$\textrm{div}(\rho\mathbf u)=0@f$ where the density @f$\rho@f$ is now no longer
5747 spatially constant but depends on temperature and pressure. A consequence is
5748 that the model is now no longer linear; a linearized version of the Stokes
5749 equation is also no longer symmetric requiring us to rethink preconditioners
5750 and, possibly, even the discretization. We won't go into detail here as to
5752
5754 material parameters such as the density, the viscosity, and the various
5759 iterate this dependence out in each time step, rather than simply freezing
5760 coefficients at values extrapolated from the previous time step(s).
5761
5762<li> <b>Checkpoint/restart:</b> Running this program in 2d on a number of
5764 compute times are so large that one runs into two typical problems: (i) On
5766 jobs are to 2 or 3 days; (ii) losing the results of a computation due to
5768 running on hundreds of processors for a couple of days. Both of these
5772 state of the program is written to a permanent storage location (e.g. a hard
5775 data), but it can be made easier by realizing that one can save the state
5777 solution vectors; during restart one would then first re-enumerate degrees
5779 matrices. Nevertheless, given the distributed nature of the data structures
5783 one may wish to continue computing on a mesh that is finer than the one used
5785
5789 measure at the earth surface, in order to find which models are realistic
5790 and which are contradicted by reality. To this end, we need to compute
5791 quantities from our solution vectors that are related to what we can
5792 observe. Among these are, for example, heatfluxes at the surface of the
5795
5798boundary. This is because the boundary layer there is stronger than
5802solution we are really interested in better than the criterion used
5804able to.
5805</ul>
5806
5807
5810source code ASPECT (see https://aspect.geodynamics.org/ ) that constitutes the
5813 *
5814 *
5815<a name="PlainProg"></a>
5816<h1> The plain program</h1>
5817@include "step-32.cc"
5818*/
value_type * iterator
Definition array_view.h:97
iterator end() const
Definition array_view.h:603
value_type * data() const noexcept
Definition array_view.h:553
void reinit(value_type *starting_element, const std::size_t n_elements)
Definition array_view.h:413
std::size_t size() const
Definition array_view.h:576
virtual void build_patches(const unsigned int n_subdivisions=0)
Definition data_out.cc:1063
virtual void evaluate_vector_field(const DataPostprocessorInputs::Vector< dim > &input_data, std::vector< Vector< double > > &computed_quantities) const
active_cell_iterator begin_active(const unsigned int level=0) const
Definition fe_q.h:551
std::pair< std::pair< unsigned int, unsigned int >, unsigned int > system_to_base_index(const unsigned int index) const
Point< 3 > vertices[4]
Point< 2 > second
Definition grid_out.cc:4616
Point< 2 > first
Definition grid_out.cc:4615
unsigned int level
Definition grid_out.cc:4618
__global__ void set(Number *val, const Number s, const size_type N)
__global__ void sadd(const Number s, Number *val, const Number a, const Number *V_val, const size_type N)
#define Assert(cond, exc)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
typename ActiveSelector::active_cell_iterator active_cell_iterator
void loop(ITERATOR begin, std_cxx20::type_identity_t< ITERATOR > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
Definition loop.h:439
UpdateFlags
@ update_values
Shape function values.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
std::vector< value_type > split(const typename ::Triangulation< dim, spacedim >::cell_iterator &parent, const value_type parent_value)
const Event initial
Definition event.cc:65
const Event remesh
Definition event.cc:66
void apply(const Kokkos::TeamPolicy< MemorySpace::Default::kokkos_space::execution_space >::member_type &team_member, const Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_data, const ViewTypeIn in, ViewTypeOut out)
void approximate(SynchronousIterators< std::tuple< typename DoFHandler< dim, spacedim >::active_cell_iterator, Vector< float >::iterator > > const &cell, const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof_handler, const InputVector &solution, const unsigned int component)
Expression sign(const Expression &x)
IndexSet extract_locally_relevant_dofs(const DoFHandler< dim, spacedim > &dof_handler)
void extract_constant_modes(const DoFHandler< dim, spacedim > &dof_handler, const ComponentMask &component_mask, std::vector< std::vector< bool > > &constant_modes)
void extrapolate(const DoFHandler< dim, spacedim > &dof1, const InVector &z1, const DoFHandler< dim, spacedim > &dof2, OutVector &z2)
void refine(Triangulation< dim, spacedim > &tria, const Vector< Number > &criteria, const double threshold, const unsigned int max_to_mark=numbers::invalid_unsigned_int)
void scale(const double scaling_factor, Triangulation< dim, spacedim > &triangulation)
double volume(const Triangulation< dim, spacedim > &tria)
double diameter(const Triangulation< dim, spacedim > &tria)
Definition grid_tools.cc:88
@ valid
Iterator points to a valid object.
@ matrix
Contents is actually a matrix.
@ symmetric
Matrix is symmetric.
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim > > > &Du)
Definition divergence.h:472
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
Definition utilities.cc:189
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > b(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
void partition(const SparsityPattern &sparsity_pattern, const unsigned int n_partitions, std::vector< unsigned int > &partition_indices, const Partitioner partitioner=Partitioner::metis)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
void call(const std::function< RT()> &function, internal::return_value< RT > &ret_val)
std::vector< unsigned int > serial(const std::vector< unsigned int > &targets, const std::function< RequestType(const unsigned int)> &create_request, const std::function< AnswerType(const unsigned int, const RequestType &)> &answer_request, const std::function< void(const unsigned int, const AnswerType &)> &process_answer, const MPI_Comm comm)
T sum(const T &t, const MPI_Comm mpi_communicator)
T max(const T &t, const MPI_Comm mpi_communicator)
unsigned int this_mpi_process(const MPI_Comm mpi_communicator)
Definition mpi.cc:161
std::string compress(const std::string &input)
Definition utilities.cc:390
void project(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const AffineConstraints< typename VectorType::value_type > &constraints, const Quadrature< dim > &quadrature, const Function< spacedim, typename VectorType::value_type > &function, VectorType &vec, const bool enforce_zero_boundary=false, const Quadrature< dim - 1 > &q_boundary=(dim > 1 ? QGauss< dim - 1 >(2) :Quadrature< dim - 1 >(0)), const bool project_to_boundary_first=false)
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
void run(const std::vector< std::vector< Iterator > > &colored_iterators, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length=2 *MultithreadInfo::n_threads(), const unsigned int chunk_size=8)
void abort(const ExceptionBase &exc) noexcept
bool check(const ConstraintKinds kind_in, const unsigned int dim)
long double gamma(const unsigned int n)
DEAL_II_HOST constexpr TableIndices< 2 > merge(const TableIndices< 2 > &previous_indices, const unsigned int new_index, const unsigned int position)
void copy(const T *begin, const T *end, U *dest)
int(&) functions(const void *v1, const void *v2)
void assemble(const MeshWorker::DoFInfoBox< dim, DOFINFO > &dinfo, A *assembler)
Definition loop.h:71
static constexpr double PI
Definition numbers.h:259
static const unsigned int invalid_unsigned_int
Definition types.h:213
STL namespace.
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > cos(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sin(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
Definition types.h:33
unsigned int subdomain_id
Definition types.h:44
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation
DEAL_II_HOST constexpr SymmetricTensor< 2, dim, Number > symmetrize(const Tensor< 2, dim, Number > &t)