Reference documentation for deal.II version 9.5.1
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
step-69.h
Go to the documentation of this file.
1
1504 *   cell->get_dof_indices(dof_indices);
1505 *   std::transform(dof_indices.begin(),
1506 *   dof_indices.end(),
1507 *   dof_indices.begin(),
1508 *   [&](types::global_dof_index index) {
1509 *   return partitioner->global_to_local(index);
1510 *   });
1511 *  
1512 *   /* And simply add, for each dof, a coupling to all other "local"
1513 *   * dofs on the cell: */
1514 *   for (const auto dof : dof_indices)
1515 *   dsp.add_entries(dof, dof_indices.begin(), dof_indices.end());
1516 *   }
1517 *  
1518 *   sparsity_pattern.copy_from(dsp);
1519 *  
1520 *   lumped_mass_matrix.reinit(sparsity_pattern);
1521 *   norm_matrix.reinit(sparsity_pattern);
1522 *   for (auto &matrix : cij_matrix)
1523 *   matrix.reinit(sparsity_pattern);
1524 *   for (auto &matrix : nij_matrix)
1525 *   matrix.reinit(sparsity_pattern);
1526 *   }
1527 *   }
1528 *  
1529 * @endcode
1530 *
1532 * Next, we have to assemble various matrices. We define a number of
1533 * helper functions and data structures in an anonymous namespace.
1534 *
1535
1536 *
1537 *
1538 * @code
1539 *   namespace
1540 *   {
1541 * @endcode
1542 *
1543 * <code>CopyData</code> class that will be used to assemble the
1544 * offline data matrices using WorkStream. It acts as a container: it
1545 * is just a struct where WorkStream stores the local cell
1547 * <code>local_boundary_normal_map</code> used to store the local
1548 * contributions required to compute the normals at the boundary.
1549 *
1550
1551 *
1552 *
1553 * @code
1554 *   template <int dim>
1555 *   struct CopyData
1556 *   {
1557 *   bool is_artificial;
1558 *   std::vector<types::global_dof_index> local_dof_indices;
1561 *   std::array<FullMatrix<double>, dim> cell_cij_matrix;
1562 *   };
1563 *  
1564 * @endcode
1565 *
1566 * Next we introduce a number of helper functions that are all
1567 * concerned about reading and writing matrix and vector entries. They
1569 * <a href="https://en.wikipedia.org/wiki/Syntactic_sugar"> syntactic
1571 *
1572
1573 *
1574 * The first function we introduce, <code>get_entry()</code>, will be
1575 * used to read the value stored at the entry pointed by a
1580 * iterator already knows the global index of the corresponding matrix
1581 * entry in the low-level vector stored in the SparseMatrix object. Due
1582 * to the lack of an interface in the SparseMatrix for accessing the
1583 * element directly with a SparsityPattern iterator, we unfortunately
1584 * have to create a temporary SparseMatrix iterator. We simply hide
1585 * this in the <code>get_entry()</code> function.
1586 *
1587
1588 *
1589 *
1590 * @code
1591 *   template <typename IteratorType>
1593 *   get_entry(const SparseMatrix<double> &matrix, const IteratorType &it)
1594 *   {
1596 *   &matrix, it->global_index());
1597 *   return matrix_iterator->value();
1598 *   }
1599 *  
1600 * @endcode
1601 *
1602 * The <code>set_entry()</code> helper is the inverse operation of
1603 * <code>get_value()</code>: Given an iterator and a value, it sets the
1604 * entry pointed to by the iterator in the matrix.
1605 *
1606
1607 *
1608 *
1609 * @code
1610 *   template <typename IteratorType>
1611 *   DEAL_II_ALWAYS_INLINE inline void
1613 *   const IteratorType & it,
1615 *   {
1617 *   it->global_index());
1618 *   matrix_iterator->value() = value;
1619 *   }
1620 *  
1621 * @endcode
1622 *
1624 * \mathbb{R}^d@f$. If @f$d=2@f$ then @f$\mathbf{c}_{ij} =
1625 * [\mathbf{c}_{ij}^1,\mathbf{c}_{ij}^2]^\top@f$. Which basically implies
1626 * that we need one matrix per space dimension to store the
1627 * @f$\mathbf{c}_{ij}@f$ vectors. Similar observation follows for the
1629 * <code>gather_get_entry()</code> is to retrieve those entries and store
1631 *
1632
1633 *
1634 *
1635 * @code
1636 *   template <std::size_t k, typename IteratorType>
1638 *   gather_get_entry(const std::array<SparseMatrix<double>, k> &c_ij,
1639 *   const IteratorType it)
1640 *   {
1642 *   for (unsigned int j = 0; j < k; ++j)
1643 *   result[j] = get_entry(c_ij[j], it);
1644 *   return result;
1645 *   }
1646 *  
1647 * @endcode
1648 *
1649 * <code>gather()</code> (first interface): this first function
1650 * signature, having three input arguments, will be used to retrieve
1651 * the individual components <code>(i,l)</code> of a matrix. The
1654 * different: the function <code>gather()</code> does not rely on an
1656 * indices <code>(i,l)</code> of the entry in order to retrieve its
1660 * algebraic viscosity @f$d_{ij}@f$ in the particular case that when
1661 * both @f$i@f$ and @f$j@f$ lie at the boundary.
1662 *
1663
1664 *
1666 * <code>(i,l)</code> entry of a matrix (say for instance Trilinos or PETSc
1668 * want to keep an eye on complexity: we want this operation to have
1670 * using deal.II matrices.
1671 *
1672
1673 *
1674 *
1675 * @code
1676 *   template <std::size_t k>
1678 *   gather(const std::array<SparseMatrix<double>, k> &n_ij,
1679 *   const unsigned int i,
1680 *   const unsigned int j)
1681 *   {
1682 *   Tensor<1, k> result;
1683 *   for (unsigned int l = 0; l < k; ++l)
1684 *   result[l] = n_ij[l](i, j);
1685 *   return result;
1686 *   }
1687 *  
1688 * @endcode
1689 *
1690 * <code>gather()</code> (second interface): this second function
1691 * signature having two input arguments will be used to gather the
1692 * state at a node <code>i</code> and return it as a
1694 *
1695
1696 *
1697 *
1698 * @code
1699 *   template <std::size_t k>
1701 *   gather(const std::array<LinearAlgebra::distributed::Vector<double>, k> &U,
1702 *   const unsigned int i)
1703 *   {
1704 *   Tensor<1, k> result;
1705 *   for (unsigned int j = 0; j < k; ++j)
1706 *   result[j] = U[j].local_element(i);
1707 *   return result;
1708 *   }
1709 *  
1710 * @endcode
1711 *
1712 * <code>scatter()</code>: this function has three input arguments, the
1713 * first one is meant to be a "global object" (say a locally owned or
1716 * which represents a index of the global object. This function will be
1717 * primarily used to write the updated nodal values, stored as
1719 *
1720
1721 *
1722 *
1723 * @code
1725 *   DEAL_II_ALWAYS_INLINE inline void
1727 *   const Tensor<1, k2> & tensor,
1728 *   const unsigned int i)
1729 *   {
1730 *   static_assert(k == k2,
1731 *   "The dimensions of the input arguments must agree");
1732 *   for (unsigned int j = 0; j < k; ++j)
1733 *   U[j].local_element(i) = tensor[j];
1734 *   }
1735 *   } // namespace
1736 *  
1737 * @endcode
1738 *
1739 * We are now in a position to assemble all matrices stored in
1740 * <code>OfflineData</code>: the lumped mass entries @f$m_i@f$, the
1741 * vector-valued matrices @f$\mathbf{c}_{ij}@f$ and @f$\mathbf{n}_{ij} =
1742 * \frac{\mathbf{c}_{ij}}{|\mathbf{c}_{ij}|}@f$, and the boundary normals
1743 * @f$\boldsymbol{\nu}_i@f$.
1744 *
1745
1746 *
1748 * detailed in the @ref threads "Parallel computing with multiple processors"
1749 * accessing shared memory. As customary this requires
1750 * definition of
1751 * - Scratch data (i.e. input info required to carry out computations): in
1752 * this case it is <code>scratch_data</code>.
1753 * - The worker: in our case this is the <code>local_assemble_system()</code>
1754 * function that
1756 * scratch data.
1757 * - A copy data: a struct that contains all the local assembly
1758 * contributions, in this case <code>CopyData<dim>()</code>.
1759 * - A copy data routine: in this case it is
1761 * local contributions into the global objects (matrices and/or vectors)
1762 *
1763
1764 *
1765 * Most of the following lines are spent in the definition of the worker
1769 * well-documented in @ref step_9 "step-9", @ref step_13 "step-13" and @ref step_32 "step-32" among others.
1770 *
1771
1772 *
1773 * Finally, assuming that @f$\mathbf{x}_i@f$ is a support point at the boundary,
1774 * the (nodal) normals are defined as:
1775 *
1776
1777 *
1778 * @f{align*}
1780 * \frac{\int_{\partial\Omega} \phi_i \widehat{\boldsymbol{\nu}} \,
1781 * \, \mathrm{d}\mathbf{s}}{\big|\int_{\partial\Omega} \phi_i
1782 * \widehat{\boldsymbol{\nu}} \, \mathrm{d}\mathbf{s}\big|}
1783 * @f}
1784 *
1785
1786 *
1787 * We will compute the numerator of this expression first and store it in
1789 * vectors in a posterior loop.
1790 *
1791
1792 *
1793 *
1794 * @code
1795 *   template <int dim>
1797 *   {
1798 *   lumped_mass_matrix = 0.;
1799 *   norm_matrix = 0.;
1800 *   for (auto &matrix : cij_matrix)
1801 *   matrix = 0.;
1802 *   for (auto &matrix : nij_matrix)
1803 *   matrix = 0.;
1804 *  
1805 *   unsigned int dofs_per_cell =
1806 *   discretization->finite_element.n_dofs_per_cell();
1807 *   unsigned int n_q_points = discretization->quadrature.size();
1808 *  
1809 * @endcode
1810 *
1812 * WorkStream
1813 *
1814
1815 *
1816 *
1817 * @code
1818 *   MeshWorker::ScratchData<dim> scratch_data(
1819 *   discretization->mapping,
1820 *   discretization->finite_element,
1821 *   discretization->quadrature,
1824 *   discretization->face_quadrature,
1826 *  
1827 *   {
1830 *   "offline_data - assemble lumped mass matrix, and c_ij");
1831 *  
1832 *   const auto local_assemble_system =
1833 *   [&](const typename DoFHandler<dim>::cell_iterator &cell,
1834 *   MeshWorker::ScratchData<dim> & scratch,
1835 *   CopyData<dim> & copy) {
1836 *   copy.is_artificial = cell->is_artificial();
1837 *   if (copy.is_artificial)
1838 *   return;
1839 *  
1840 *   copy.local_boundary_normal_map.clear();
1841 *   copy.cell_lumped_mass_matrix.reinit(dofs_per_cell, dofs_per_cell);
1842 *   for (auto &matrix : copy.cell_cij_matrix)
1843 *   matrix.reinit(dofs_per_cell, dofs_per_cell);
1844 *  
1845 *   const auto &fe_values = scratch.reinit(cell);
1846 *  
1847 *   copy.local_dof_indices.resize(dofs_per_cell);
1848 *   cell->get_dof_indices(copy.local_dof_indices);
1849 *  
1850 *   std::transform(copy.local_dof_indices.begin(),
1851 *   copy.local_dof_indices.end(),
1852 *   copy.local_dof_indices.begin(),
1853 *   [&](types::global_dof_index index) {
1854 *   return partitioner->global_to_local(index);
1855 *   });
1856 *  
1857 * @endcode
1858 *
1859 * We compute the local contributions for the lumped mass matrix
1860 * entries @f$m_i@f$ and and vectors @f$c_{ij}@f$ in the usual fashion:
1861 *
1862 * @code
1863 *   for (unsigned int q_point = 0; q_point < n_q_points; ++q_point)
1864 *   {
1865 *   const auto JxW = fe_values.JxW(q_point);
1866 *  
1867 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
1868 *   {
1869 *   const auto value_JxW =
1870 *   fe_values.shape_value(j, q_point) * JxW;
1871 *   const auto grad_JxW = fe_values.shape_grad(j, q_point) * JxW;
1872 *  
1873 *   copy.cell_lumped_mass_matrix(j, j) += value_JxW;
1874 *  
1875 *   for (unsigned int i = 0; i < dofs_per_cell; ++i)
1876 *   {
1877 *   const auto value = fe_values.shape_value(i, q_point);
1878 *   for (unsigned int d = 0; d < dim; ++d)
1879 *   copy.cell_cij_matrix[d](i, j) += value * grad_JxW[d];
1880 *  
1881 *   } /* i */
1882 *   } /* j */
1883 *   } /* q */
1884 *  
1885 * @endcode
1886 *
1887 * Now we have to compute the boundary normals. Note that the
1888 * following loop does not do much unless the element has faces on
1889 * the boundary of the domain.
1890 *
1891 * @code
1892 *   for (const auto f : cell->face_indices())
1893 *   {
1894 *   const auto face = cell->face(f);
1895 *   const auto id = face->boundary_id();
1896 *  
1897 *   if (!face->at_boundary())
1898 *   continue;
1899 *  
1900 *   const auto &fe_face_values = scratch.reinit(cell, f);
1901 *  
1902 *   const unsigned int n_face_q_points =
1903 *   fe_face_values.get_quadrature().size();
1904 *  
1905 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
1906 *   {
1907 *   if (!discretization->finite_element.has_support_on_face(j, f))
1908 *   continue;
1909 *  
1910 * @endcode
1911 *
1913 * from one of the faces in the support of the shape
1914 * function phi_j. So we cannot normalize this local
1915 * contribution right here, we have to take it "as is",
1916 * store it and pass it to the copy data routine. The
1918 * nodes. This is done in the copy function below.
1919 *
1920 * @code
1921 *   Tensor<1, dim> normal;
1922 *   if (id == Boundaries::free_slip)
1923 *   {
1924 *   for (unsigned int q = 0; q < n_face_q_points; ++q)
1925 *   normal += fe_face_values.normal_vector(q) *
1926 *   fe_face_values.shape_value(j, q);
1927 *   }
1928 *  
1929 *   const auto index = copy.local_dof_indices[j];
1930 *  
1931 *   Point<dim> position;
1932 *   for (const auto v : cell->vertex_indices())
1933 *   if (cell->vertex_dof_index(v, 0) ==
1934 *   partitioner->local_to_global(index))
1935 *   {
1936 *   position = cell->vertex(v);
1937 *   break;
1938 *   }
1939 *  
1940 *   const auto old_id =
1941 *   std::get<1>(copy.local_boundary_normal_map[index]);
1942 *   copy.local_boundary_normal_map[index] =
1943 *   std::make_tuple(normal, std::max(old_id, id), position);
1944 *   }
1945 *   }
1946 *   };
1947 *  
1948 * @endcode
1949 *
1950 * Last, we provide a copy_local_to_global function as required for
1951 * the WorkStream
1952 *
1953 * @code
1954 *   const auto copy_local_to_global = [&](const CopyData<dim> &copy) {
1955 *   if (copy.is_artificial)
1956 *   return;
1957 *  
1958 *   for (const auto &it : copy.local_boundary_normal_map)
1959 *   {
1960 *   std::get<0>(boundary_normal_map[it.first]) +=
1961 *   std::get<0>(it.second);
1962 *   std::get<1>(boundary_normal_map[it.first]) =
1963 *   std::max(std::get<1>(boundary_normal_map[it.first]),
1964 *   std::get<1>(it.second));
1965 *   std::get<2>(boundary_normal_map[it.first]) = std::get<2>(it.second);
1966 *   }
1967 *  
1968 *   lumped_mass_matrix.add(copy.local_dof_indices,
1969 *   copy.cell_lumped_mass_matrix);
1970 *  
1971 *   for (int k = 0; k < dim; ++k)
1972 *   {
1973 *   cij_matrix[k].add(copy.local_dof_indices, copy.cell_cij_matrix[k]);
1974 *   nij_matrix[k].add(copy.local_dof_indices, copy.cell_cij_matrix[k]);
1975 *   }
1976 *   };
1977 *  
1978 *   WorkStream::run(dof_handler.begin_active(),
1979 *   dof_handler.end(),
1982 *   scratch_data,
1983 *   CopyData<dim>());
1984 *   }
1985 *  
1986 * @endcode
1987 *
1988 * At this point in time we are done with the computation of @f$m_i@f$ and
1991 * That's not what we really want: we have to normalize its entries. In
1992 * addition, we have not filled the entries of the matrix
1993 * <code>norm_matrix</code> and the vectors stored in the map
1994 * <code>OfflineData<dim>::BoundaryNormalMap</code> are not normalized.
1995 *
1996
1997 *
1998 * In principle, this is just offline data, it doesn't make much sense
2000 * over the many time steps that we are going to use. However,
2001 * computing/storing the entries of the matrix
2004 * - we want to visit every node @f$i@f$ in the mesh/sparsity graph,
2006 * @f$\mathbf{c}_{ij} \not \equiv 0@f$.
2007 *
2008
2009 *
2011 * every row in the matrix and for each one of these rows execute a loop on
2012 * the columns. Node-loops is a core theme of this tutorial step (see
2014 * again. That's why this is the right time to introduce them.
2015 *
2016
2017 *
2018 * We have the thread parallelization capability
2019 * parallel::apply_to_subranges() that is somehow more general than the
2020 * WorkStream framework. In particular, parallel::apply_to_subranges() can
2021 * be used for our node-loops. This functionality requires four input
2022 * arguments which we explain in detail (for the specific case of our
2023 * thread-parallel node loops):
2024 * - The iterator <code>indices.begin()</code> points to a row index.
2025 * - The iterator <code>indices.end()</code> points to a numerically higher
2026 * row index.
2027 * - The function <code>on_subranges(index_begin, index_end)</code>
2028 * (where <code>index_begin</code> and <code>index_end</code>
2029 * define a sub-range within the range spanned by
2030 * the begin and end iterators defined in the two previous bullets)
2031 * applies an operation to every iterator in such subrange. We may as
2032 * well call <code>on_subranges</code> the "worker".
2033 * - Grainsize: minimum number of iterators (in this case representing
2034 * rows) processed by each thread. We decided for a minimum of 4096
2035 * rows.
2036 *
2037
2038 *
2039 * A minor caveat here is that the iterators <code>indices.begin()</code>
2040 * and <code>indices.end()</code> supplied to
2041 * parallel::apply_to_subranges() have to be random access iterators:
2042 * internally, parallel::apply_to_subranges() will break the range
2043 * defined by the <code>indices.begin()</code> and
2044 * <code>indices.end()</code> iterators into subranges (we want to be
2045 * able to read any entry in those subranges with constant complexity).
2046 * In order to provide such iterators we resort to
2047 * std_cxx20::ranges::iota_view.
2048 *
2049
2050 *
2051 * The bulk of the following piece of code is spent defining
2052 * the "worker" <code>on_subranges</code>: i.e. the operation applied at
2053 * each row of the sub-range. Given a fixed <code>row_index</code>
2054 * we want to visit every column/entry in such row. In order to execute
2055 * such columns-loops we use
2056 * <a href="http://www.cplusplus.com/reference/algorithm/for_each/">
2057 * std::for_each</a>
2058 * from the standard library, where:
2059 * - <code>sparsity_pattern.begin(row_index)</code>
2060 * gives us an iterator starting at the first column of the row,
2061 * - <code>sparsity_pattern.end(row_index)</code> is an iterator pointing
2062 * at the last column of the row,
2063 * - the last argument required by `std::for_each` is the operation
2064 * applied at each nonzero entry (a lambda expression in this case)
2065 * of such row.
2066 *
2067
2068 *
2069 * We note that, parallel::apply_to_subranges() will operate on
2070 * disjoint sets of rows (the subranges) and our goal is to write into
2071 * these rows. Because of the simple nature of the operations we want
2072 * to carry out (computation and storage of normals, and normalization
2073 * of the @f$\mathbf{c}_{ij}@f$ of entries) threads cannot conflict
2074 * attempting to write the same entry (we do not need a scheduler).
2075 *
2076 * @code
2077 *   {
2078 *   TimerOutput::Scope scope(computing_timer,
2079 *   "offline_data - compute |c_ij|, and n_ij");
2080 *  
2081 *   const std_cxx20::ranges::iota_view<unsigned int, unsigned int> indices(
2082 *   0, n_locally_relevant);
2083 *  
2084 *   const auto on_subranges =
2085 *   [&](const auto index_begin, const auto index_end) {
2086 *   for (const auto row_index :
2087 *   std_cxx20::ranges::iota_view<unsigned int, unsigned int>(
2088 *   *index_begin, *index_end))
2089 *   {
2090 * @endcode
2091 *
2092 * First column-loop: we compute and store the entries of the
2093 * matrix norm_matrix and write normalized entries into the
2094 * matrix nij_matrix:
2095 *
2096 * @code
2097 *   std::for_each(sparsity_pattern.begin(row_index),
2098 *   sparsity_pattern.end(row_index),
2099 *   [&](const SparsityPatternIterators::Accessor &jt) {
2100 *   const auto c_ij =
2101 *   gather_get_entry(cij_matrix, &jt);
2102 *   const double norm = c_ij.norm();
2103 *  
2104 *   set_entry(norm_matrix, &jt, norm);
2105 *   for (unsigned int j = 0; j < dim; ++j)
2106 *   set_entry(nij_matrix[j], &jt, c_ij[j] / norm);
2107 *   });
2108 *   }
2109 *   };
2110 *  
2111 *   parallel::apply_to_subranges(indices.begin(),
2112 *   indices.end(),
2113 *   on_subranges,
2114 *   4096);
2115 *  
2116 * @endcode
2117 *
2118 * Finally, we normalize the vectors stored in
2119 * <code>OfflineData<dim>::BoundaryNormalMap</code>. This operation has
2120 * not been thread parallelized as it would neither illustrate any
2121 * important concept nor lead to any noticeable speed gain.
2122 *
2123 * @code
2124 *   for (auto &it : boundary_normal_map)
2125 *   {
2126 *   auto &normal = std::get<0>(it.second);
2127 *   normal /= (normal.norm() + std::numeric_limits<double>::epsilon());
2128 *   }
2129 *   }
2130 *   }
2131 *  
2132 * @endcode
2133 *
2134 * At this point we are very much done with anything related to offline data.
2135 *
2136
2137 *
2138 *
2139 * <a name="EquationofstateandapproximateRiemannsolver"></a>
2140 * <h4>Equation of state and approximate Riemann solver</h4>
2141 *
2142
2143 *
2144 * In this section we describe the implementation of the class members of
2145 * the <code>ProblemDescription</code> class. Most of the code here is
2146 * specific to the compressible Euler's equations with an ideal gas law.
2147 * If we wanted to re-purpose @ref step_69 "step-69" for a different conservation law
2151 * remain unchanged.
2152 *
2153
2154 *
2155 * We start by implementing a number of small member functions for
2160 *
2161
2162 *
2163 *
2164 * @code
2165 *   template <int dim>
2168 *   {
2170 *   std::copy_n(&U[1], dim, &result[0]);
2171 *   return result;
2172 *   }
2173 *  
2174 *   template <int dim>
2175 *   DEAL_II_ALWAYS_INLINE inline double
2177 *   {
2178 *   const double &rho = U[0];
2179 *   const auto m = momentum(U);
2180 *   const double &E = U[dim + 1];
2181 *   return E - 0.5 * m.norm_square() / rho;
2182 *   }
2183 *  
2184 *   template <int dim>
2185 *   DEAL_II_ALWAYS_INLINE inline double
2187 *   {
2188 *   return (gamma - 1.) * internal_energy(U);
2189 *   }
2190 *  
2191 *   template <int dim>
2192 *   DEAL_II_ALWAYS_INLINE inline double
2194 *   {
2195 *   const double &rho = U[0];
2196 *   const double p = pressure(U);
2197 *  
2198 *   return std::sqrt(gamma * p / rho);
2199 *   }
2200 *  
2201 *   template <int dim>
2204 *   {
2205 *   const double &rho = U[0];
2206 *   const auto m = momentum(U);
2207 *   const auto p = pressure(U);
2208 *   const double &E = U[dim + 1];
2209 *  
2211 *  
2212 *   result[0] = m;
2213 *   for (unsigned int i = 0; i < dim; ++i)
2214 *   {
2215 *   result[1 + i] = m * m[i] / rho;
2216 *   result[1 + i][i] += p;
2217 *   }
2218 *   result[dim + 1] = m / rho * (E + p);
2219 *  
2220 *   return result;
2221 *   }
2222 *  
2223 * @endcode
2224 *
2226 * (\mathbf{U}_i^{n},\mathbf{U}_j^{n}, \textbf{n}_{ij})@f$. The analysis
2231 * of our implementation functions and point to specific academic
2234 *
2235
2236 *
2239 * This is typically done with an iterative solver. In order to simplify
2243 * equations (2.11) (3.7), (3.8) and (4.3) of @cite GuermondPopov2016b
2245 * wavespeed. This estimate is returned by a call to the function
2249 * Equation (4.46), page 128 in @cite Toro2009.
2250 *
2251
2252 *
2254 * guaranteed to be an upper bound, it is in general quite sharp, and
2256 * situations (in particular when one of states is close to vacuum
2258 * used a second estimate to avoid this degeneracy that will be invoked
2259 * by a call to the function <code>lambda_max_expansion()</code>. The most
2264 *
2265
2266 *
2267 * We start again by defining a couple of helper functions:
2268 *
2269
2270 *
2271 * The first function takes a state <code>U</code> and a unit vector
2272 * <code>n_ij</code> and computes the <i>projected</i> 1d state in
2273 * direction of the unit vector.
2274 *
2275 * @code
2276 *   namespace
2277 *   {
2278 *   template <int dim>
2279 *   DEAL_II_ALWAYS_INLINE inline std::array<double, 4> riemann_data_from_state(
2280 *   const typename ProblemDescription<dim>::state_type U,
2281 *   const Tensor<1, dim> & n_ij)
2282 *   {
2284 *   projected_U[0] = U[0];
2285 *  
2286 * @endcode
2287 *
2289 * n_{ij}@f$ and have to subtract the kinetic energy of the
2290 * perpendicular part from the total energy:
2291 *
2292 * @code
2293 *   const auto m = ProblemDescription<dim>::momentum(U);
2294 *   projected_U[1] = n_ij * m;
2295 *  
2296 *   const auto perpendicular_m = m - projected_U[1] * n_ij;
2297 *   projected_U[2] = U[1 + dim] - 0.5 * perpendicular_m.norm_square() / U[0];
2298 *  
2299 * @endcode
2300 *
2301 * We return the 1d state in <i>primitive</i> variables instead of
2304 *
2305
2306 *
2307 *
2308 * @code
2309 *   return {{projected_U[0],
2310 *   projected_U[1] / projected_U[0],
2313 *   }
2314 *  
2315 * @endcode
2316 *
2317 * At this point we also define two small functions that return the
2318 * positive and negative part of a double.
2319 *
2320
2321 *
2322 *
2323 * @code
2324 *   DEAL_II_ALWAYS_INLINE inline double positive_part(const double number)
2325 *   {
2326 *   return std::max(number, 0.);
2327 *   }
2328 *  
2329 *  
2330 *   DEAL_II_ALWAYS_INLINE inline double negative_part(const double number)
2331 *   {
2332 *   return -std::min(number, 0.);
2333 *   }
2334 *  
2335 * @endcode
2336 *
2338 * primitive state @f$[\rho, u, p, a]@f$ and a given pressure @f$p^\ast@f$
2339 * @cite GuermondPopov2016 Eqn. (3.7):
2340 * @f{align*}
2341 * \lambda^- = u - a\,\sqrt{1 + \frac{\gamma+1}{2\gamma}
2342 * \left(\frac{p^\ast-p}{p}\right)_+}
2343 * @f}
2345 * argument.
2346 *
2347
2348 *
2349 *
2350 * @code
2351 *   DEAL_II_ALWAYS_INLINE inline double
2352 *   lambda1_minus(const std::array<double, 4> &riemann_data,
2353 *   const double p_star)
2354 *   {
2355 *   /* Implements formula (3.7) in Guermond-Popov-2016 */
2356 *  
2357 *   constexpr double gamma = ProblemDescription<1>::gamma;
2358 *   const auto u = riemann_data[1];
2359 *   const auto p = riemann_data[2];
2360 *   const auto a = riemann_data[3];
2361 *  
2362 *   const double factor = (gamma + 1.0) / 2.0 / gamma;
2363 *   const double tmp = positive_part((p_star - p) / p);
2364 *   return u - a * std::sqrt(1.0 + factor * tmp);
2365 *   }
2366 *  
2367 * @endcode
2368 *
2370 * @f{align*}
2371 * \lambda^+ = u + a\,\sqrt{1 + \frac{\gamma+1}{2\gamma}
2372 * \left(\frac{p^\ast-p}{p}\right)_+}
2373 * @f}
2374 *
2375
2376 *
2377 *
2378 * @code
2379 *   DEAL_II_ALWAYS_INLINE inline double
2380 *   lambda3_plus(const std::array<double, 4> &riemann_data, const double p_star)
2381 *   {
2382 *   /* Implements formula (3.8) in Guermond-Popov-2016 */
2383 *  
2384 *   constexpr double gamma = ProblemDescription<1>::gamma;
2385 *   const auto u = riemann_data[1];
2386 *   const auto p = riemann_data[2];
2387 *   const auto a = riemann_data[3];
2388 *  
2389 *   const double factor = (gamma + 1.0) / 2.0 / gamma;
2390 *   const double tmp = positive_part((p_star - p) / p);
2391 *   return u + a * std::sqrt(1.0 + factor * tmp);
2392 *   }
2393 *  
2394 * @endcode
2395 *
2396 * All that is left to do is to compute the maximum of @f$\lambda^-@f$ and
2397 * @f$\lambda^+@f$ computed from the left and right primitive state
2398 * (@cite GuermondPopov2016 Eqn. (2.11)), where @f$p^\ast@f$ is given by
2399 * @cite GuermondPopov2016 Eqn (4.3):
2400 *
2401
2402 *
2403 *
2404 * @code
2407 *   const std::array<double, 4> &riemann_data_j)
2408 *   {
2409 *   constexpr double gamma = ProblemDescription<1>::gamma;
2410 *   const auto u_i = riemann_data_i[1];
2411 *   const auto p_i = riemann_data_i[2];
2412 *   const auto a_i = riemann_data_i[3];
2413 *   const auto u_j = riemann_data_j[1];
2414 *   const auto p_j = riemann_data_j[2];
2415 *   const auto a_j = riemann_data_j[3];
2416 *  
2417 *   const double numerator = a_i + a_j - (gamma - 1.) / 2. * (u_j - u_i);
2418 *  
2419 *   const double denominator =
2420 *   a_i * std::pow(p_i / p_j, -1. * (gamma - 1.) / 2. / gamma) + a_j * 1.;
2421 *  
2422 *   /* Formula (4.3) in Guermond-Popov-2016 */
2423 *  
2424 *   const double p_star =
2425 *   p_j * std::pow(numerator / denominator, 2. * gamma / (gamma - 1));
2426 *  
2427 *   const double lambda1 = lambda1_minus(riemann_data_i, p_star);
2428 *   const double lambda3 = lambda3_plus(riemann_data_j, p_star);
2429 *  
2430 *   /* Formula (2.11) in Guermond-Popov-2016 */
2431 *  
2433 *   }
2434 *  
2435 * @endcode
2436 *
2441 * @f{align*}
2442 * \lambda_{\text{exp}} = \max(u_i,u_j) + 5. \max(a_i, a_j).
2443 * @f}
2445 * is <i>neither</i> an ad-hoc constant, <i>nor</i> a tuning parameter.
2446 * It defines an upper bound for any @f$\gamma \in [0,5/3]@f$. Do not play
2447 * with it!
2448 *
2449
2450 *
2451 *
2452 * @code
2453 *   DEAL_II_ALWAYS_INLINE inline double
2454 *   lambda_max_expansion(const std::array<double, 4> &riemann_data_i,
2455 *   const std::array<double, 4> &riemann_data_j)
2456 *   {
2457 *   const auto u_i = riemann_data_i[1];
2458 *   const auto a_i = riemann_data_i[3];
2459 *   const auto u_j = riemann_data_j[1];
2460 *   const auto a_j = riemann_data_j[3];
2461 *  
2462 *   return std::max(std::abs(u_i), std::abs(u_j)) + 5. * std::max(a_i, a_j);
2463 *   }
2464 *   } // namespace
2465 *  
2466 * @endcode
2467 *
2468 * The following is the main function that we are going to call in order to
2469 * compute @f$\lambda_{\text{max}} (\mathbf{U}_i^{n},\mathbf{U}_j^{n},
2470 * \textbf{n}_{ij})@f$. We simply compute both maximal wavespeed estimates
2471 * and return the minimum.
2472 *
2473
2474 *
2475 *
2476 * @code
2478 *   DEAL_II_ALWAYS_INLINE inline double
2480 *   const state_type & U_j,
2481 *   const Tensor<1, dim> &n_ij)
2482 *   {
2485 *  
2486 *   const double lambda_1 =
2488 *  
2489 *   const double lambda_2 =
2491 *  
2492 *   return std::min(lambda_1, lambda_2);
2493 *   }
2494 *  
2495 * @endcode
2496 *
2497 * We conclude this section by defining static arrays
2498 * <code>component_names</code> that contain strings describing the
2499 * component names of our state vector. We have template specializations
2500 * for dimensions one, two and three, that are used later in DataOut for
2501 * naming the corresponding components:
2502 *
2503
2504 *
2505 *
2506 * @code
2507 *   template <>
2508 *   const std::array<std::string, 3> ProblemDescription<1>::component_names{
2509 *   {"rho", "m", "E"}};
2510 *  
2511 *   template <>
2512 *   const std::array<std::string, 4> ProblemDescription<2>::component_names{
2513 *   {"rho", "m_1", "m_2", "E"}};
2514 *  
2515 *   template <>
2516 *   const std::array<std::string, 5> ProblemDescription<3>::component_names{
2517 *   {"rho", "m_1", "m_2", "m_3", "E"}};
2518 *  
2519 * @endcode
2520 *
2521 *
2522 * <a name="Initialvalues"></a>
2523 * <h4>Initial values</h4>
2524 *
2525
2526 *
2528 * forward Euler scheme, is to briefly implement the `InitialValues` class.
2529 *
2530
2531 *
2532 * In the constructor we initialize all parameters with default values,
2533 * declare all parameters for the `ParameterAcceptor` class and connect the
2534 * <code>parse_parameters_call_back</code> slot to the respective signal.
2535 *
2536
2537 *
2538 * The <code>parse_parameters_call_back</code> slot will be invoked from
2541 * parameters have to be postprocessed (in some sense) or some
2543 *
2544
2545 *
2546 *
2547 * @code
2548 *   template <int dim>
2549 *   InitialValues<dim>::InitialValues(const std::string &subsection)
2551 *   {
2552 *   /* We wire up the slot InitialValues<dim>::parse_parameters_callback to
2553 *   the ParameterAcceptor::parse_parameters_call_back signal: */
2555 *   [&]() { this->parse_parameters_callback(); });
2556 *  
2557 *   initial_direction[0] = 1.;
2558 *   add_parameter("initial direction",
2560 *   "Initial direction of the uniform flow field");
2561 *  
2563 *   initial_1d_state[1] = 3.;
2564 *   initial_1d_state[2] = 1.;
2565 *   add_parameter("initial 1d state",
2567 *   "Initial 1d state (rho, u, p) of the uniform flow field");
2568 *   }
2569 *  
2570 * @endcode
2571 *
2573 * default values for the two private members
2575 * added them to the parameter list. But we have not defined an
2578 * call to actually evaluate the initial solution at the mesh nodes). At
2579 * the top of the function, we have to ensure that the provided initial
2580 * direction is not the zero vector.
2581 *
2582
2583 *
2584 * @note As commented, we could have avoided using the method
2586 * <code>setup()</code> in order to define the implementation of
2590 *
2591
2592 *
2593 *
2594 * @code
2595 *   template <int dim>
2597 *   {
2598 *   AssertThrow(initial_direction.norm() != 0.,
2599 *   ExcMessage(
2600 *   "Initial shock front direction is set to the zero vector."));
2602 *  
2603 * @endcode
2604 *
2605 * Next, we implement the <code>initial_state</code> function object
2606 * with a lambda function computing a uniform flow field. For this we
2607 * have to translate a given primitive 1d state (density @f$\rho@f$,
2609 * (density @f$\rho@f$, momentum @f$\mathbf{m}@f$, and total energy @f$E@f$).
2610 *
2611
2612 *
2613 *
2614 * @code
2615 *   initial_state = [this](const Point<dim> & /*point*/, double /*t*/) {
2616 *   const double rho = initial_1d_state[0];
2617 *   const double u = initial_1d_state[1];
2618 *   const double p = initial_1d_state[2];
2619 *   static constexpr double gamma = ProblemDescription<dim>::gamma;
2620 *  
2621 *   state_type state;
2622 *  
2623 *   state[0] = rho;
2624 *   for (unsigned int i = 0; i < dim; ++i)
2625 *   state[1 + i] = rho * u * initial_direction[i];
2626 *  
2627 *   state[dim + 1] = p / (gamma - 1.) + 0.5 * rho * u * u;
2628 *  
2629 *   return state;
2630 *   };
2631 *   }
2632 *  
2633 * @endcode
2634 *
2635 *
2636 * <a name="TheForwardEulerstep"></a>
2637 * <h4>The Forward Euler step</h4>
2638 *
2639
2640 *
2642 * any surprising code:
2643 *
2644
2645 *
2646 *
2647 * @code
2648 *   template <int dim>
2650 *   const MPI_Comm mpi_communicator,
2654 *   const std::string & subsection /*= "TimeStepping"*/)
2656 *   , mpi_communicator(mpi_communicator)
2660 *   {
2661 *   cfl_update = 0.80;
2662 *   add_parameter("cfl update",
2663 *   cfl_update,
2664 *   "Relative CFL constant used for update");
2665 *   }
2666 *  
2667 * @endcode
2668 *
2669 * In the class member <code>prepare()</code> we initialize the temporary
2671 * vector <code>temp</code> will be used to store the solution update
2673 *
2674
2675 *
2676 *
2677 * @code
2678 *   template <int dim>
2679 *   void TimeStepping<dim>::prepare()
2680 *   {
2682 *   "time_stepping - prepare scratch space");
2683 *  
2684 *   for (auto &it : temporary_vector)
2685 *   it.reinit(offline_data->partitioner);
2686 *  
2687 *   dij_matrix.reinit(offline_data->sparsity_pattern);
2688 *   }
2689 *  
2690 * @endcode
2691 *
2692 * It is now time to implement the forward Euler step. Given a (writable
2693 * reference) to the old state <code>U</code> at time @f$t@f$ we update the
2694 * state <code>U</code> in place and return the chosen time-step size. We
2697 * variable names (e.g., <code>sparsity</code> instead of
2698 * <code>offline_data->sparsity_pattern</code>).
2699 *
2700
2701 *
2702 *
2703 * @code
2704 *   template <int dim>
2705 *   double TimeStepping<dim>::make_one_step(vector_type &U, const double t)
2706 *   {
2707 *   const auto &n_locally_owned = offline_data->n_locally_owned;
2708 *   const auto &n_locally_relevant = offline_data->n_locally_relevant;
2709 *  
2714 *  
2715 *   const auto &sparsity = offline_data->sparsity_pattern;
2716 *  
2717 *   const auto &lumped_mass_matrix = offline_data->lumped_mass_matrix;
2718 *   const auto &norm_matrix = offline_data->norm_matrix;
2719 *   const auto &nij_matrix = offline_data->nij_matrix;
2720 *   const auto &cij_matrix = offline_data->cij_matrix;
2721 *  
2722 *   const auto &boundary_normal_map = offline_data->boundary_normal_map;
2723 *  
2724 * @endcode
2725 *
2726 * <b>Step 1</b>: Computing the @f$d_{ij}@f$ graph viscosity matrix.
2727 *
2728
2729 *
2731 * symmetric, i.e., @f$d_{ij} = d_{ji}@f$. In this regard we note here that
2733 * \int_{\Omega} \nabla \phi_i \phi_j \, \mathrm{d}\mathbf{x}@f$ (or
2736 * from the boundary. In this case we can check that
2737 * @f$\lambda_{\text{max}} (\mathbf{U}_i^{n}, \mathbf{U}_j^{n},
2738 * \textbf{n}_{ij}) = \lambda_{\text{max}} (\mathbf{U}_j^{n},
2740 * the property @f$d_{ij} = d_{ji}@f$.
2741 *
2742
2743 *
2744 * However, if both support points @f$\mathbf{x}_i@f$ or @f$\mathbf{x}_j@f$
2745 * happen to lie on the boundary, then, the equalities @f$\mathbf{c}_{ij} =
2746 * - \mathbf{c}_{ji}@f$ and @f$\lambda_{\text{max}} (\mathbf{U}_i^{n},
2747 * \mathbf{U}_j^{n}, \textbf{n}_{ij}) = \lambda_{\text{max}}
2748 * (\mathbf{U}_j^{n}, \mathbf{U}_i^{n}, \textbf{n}_{ji})@f$ do not
2749 * necessarily hold true. The only mathematically safe solution for this
2750 * dilemma is to compute both of them @f$d_{ij}@f$ and @f$d_{ji}@f$ and
2751 * take the maximum.
2752 *
2753
2754 *
2756 * order to save some computing time we exploit the fact that the viscosity
2758 * the upper-triangular entries of @f$d_{ij}@f$ and copy the
2760 *
2761
2762 *
2768 *
2769
2770 *
2771 * We define again a "worker" function <code>on_subranges</code> that
2772 * computes the viscosity @f$d_{ij}@f$ for a subrange `[*index_begin,
2773 * *index_end)` of column indices:
2774 *
2775 * @code
2776 *   {
2778 *   "time_stepping - 1 compute d_ij");
2779 *  
2780 *   const auto on_subranges =
2781 *   [&](const auto index_begin, const auto index_end) {
2782 *   for (const auto i :
2783 *   std_cxx20::ranges::iota_view<unsigned int, unsigned int>(
2784 *   *index_begin, *index_end))
2785 *   {
2786 *   const auto U_i = gather(U, i);
2787 *  
2788 * @endcode
2789 *
2790 * For a given column index i we iterate over the columns of the
2791 * sparsity pattern from <code>sparsity.begin(i)</code> to
2792 * <code>sparsity.end(i)</code>:
2793 *
2794 * @code
2795 *   for (auto jt = sparsity.begin(i); jt != sparsity.end(i); ++jt)
2796 *   {
2797 *   const auto j = jt->column();
2798 *  
2799 * @endcode
2800 *
2801 * We only compute @f$d_{ij}@f$ if @f$j < i@f$ (upper triangular
2802 * entries) and later copy the values over to @f$d_{ji}@f$.
2803 *
2804 * @code
2805 *   if (j >= i)
2806 *   continue;
2807 *  
2808 *   const auto U_j = gather(U, j);
2809 *  
2810 *   const auto n_ij = gather_get_entry(nij_matrix, jt);
2811 *   const double norm = get_entry(norm_matrix, jt);
2812 *  
2813 *   const auto lambda_max =
2815 *  
2816 *   double d = norm * lambda_max;
2817 *  
2818 * @endcode
2819 *
2820 * If both support points happen to be at the boundary we
2821 * have to compute @f$d_{ji}@f$ as well and then take
2822 * @f$\max(d_{ij},d_{ji})@f$. After this we can finally set the
2824 *
2825 * @code
2826 *   if (boundary_normal_map.count(i) != 0 &&
2827 *   boundary_normal_map.count(j) != 0)
2828 *   {
2829 *   const auto n_ji = gather(nij_matrix, j, i);
2830 *   const auto lambda_max_2 =
2832 *   U_i,
2833 *   n_ji);
2834 *   const double norm_2 = norm_matrix(j, i);
2835 *  
2836 *   d = std::max(d, norm_2 * lambda_max_2);
2837 *   }
2838 *  
2839 *   set_entry(dij_matrix, jt, d);
2840 *   dij_matrix(j, i) = d;
2841 *   }
2842 *   }
2843 *   };
2844 *  
2847 *   on_subranges,
2848 *   4096);
2849 *   }
2850 *  
2851 * @endcode
2852 *
2853 * <b>Step 2</b>: Compute diagonal entries @f$d_{ii}@f$ and
2854 * @f$\tau_{\text{max}}@f$.
2855 *
2856
2857 *
2858 * So far we have computed all off-diagonal entries of the matrix
2859 * <code>dij_matrix</code>. We still have to fill its diagonal entries
2860 * defined as @f$d_{ii}^n = - \sum_{j \in \mathcal{I}(i)\backslash \{i\}}
2863 * largest admissible time-step, which is defined as
2864 * \f[
2866 * \left(\frac{m_i}{-2\,d_{ii}^{n}}\right) \, .
2867 * \f]
2869 * global, it operates on all nodes: first we have to take the minimum
2870 * over all threads (of a given node) and then we have to take the
2873 * <a
2874 * href="http://www.cplusplus.com/reference/atomic/atomic/"><code>std::atomic<double></code></a>.
2875 * The internal implementation of <code>std::atomic</code> will take
2878 * same time.
2879 * - In order to take the minimum over all MPI process we use the utility
2880 * function <code>Utilities::MPI::min</code>.
2881 *
2882
2883 *
2884 *
2885 * @code
2886 *   std::atomic<double> tau_max{std::numeric_limits<double>::infinity()};
2887 *  
2888 *   {
2890 *   "time_stepping - 2 compute d_ii, and tau_max");
2891 *  
2892 * @endcode
2893 *
2894 * on_subranges() will be executed on every thread individually. The
2895 * variable <code>tau_max_on_subrange</code> is thus stored thread
2896 * locally.
2897 *
2898
2899 *
2900 *
2901 * @code
2904 *   double tau_max_on_subrange = std::numeric_limits<double>::infinity();
2905 *  
2906 *   for (const auto i :
2907 *   std_cxx20::ranges::iota_view<unsigned int, unsigned int>(
2908 *   *index_begin, *index_end))
2909 *   {
2910 *   double d_sum = 0.;
2911 *  
2912 *   for (auto jt = sparsity.begin(i); jt != sparsity.end(i); ++jt)
2913 *   {
2914 *   const auto j = jt->column();
2915 *  
2916 *   if (j == i)
2917 *   continue;
2918 *  
2920 *   }
2921 *  
2922 * @endcode
2923 *
2924 * We store the negative sum of the d_ij entries at the
2925 * diagonal position
2926 *
2927 * @code
2928 *   dij_matrix.diag_element(i) = d_sum;
2929 * @endcode
2930 *
2931 * and compute the maximal local time-step size
2932 * <code>tau</code>:
2933 *
2934 * @code
2935 *   const double mass = lumped_mass_matrix.diag_element(i);
2936 *   const double tau = cfl_update * mass / (-2. * d_sum);
2938 *   }
2939 *  
2940 * @endcode
2941 *
2943 * time-step size computed for the (thread local) subrange. At this
2944 * point we have to synchronize the value over all threads. This is
2945 * were we use the <a
2946 * href="http://www.cplusplus.com/reference/atomic/atomic/"><code>std::atomic<double></code></a>
2947 * <i>compare exchange</i> update mechanism:
2948 *
2949 * @code
2950 *   double current_tau_max = tau_max.load();
2952 *   !tau_max.compare_exchange_weak(current_tau_max,
2954 *   ;
2955 *   };
2956 *  
2959 *   on_subranges,
2960 *   4096);
2961 *  
2962 * @endcode
2963 *
2965 * value over all MPI processes:
2966 *
2967
2968 *
2969 *
2970 * @code
2971 *   tau_max.store(Utilities::MPI::min(tau_max.load(), mpi_communicator));
2972 *  
2973 * @endcode
2974 *
2976 * <code>tau_max</code> is indeed a valid floating point number.
2977 *
2978 * @code
2979 *   AssertThrow(
2980 *   !std::isnan(tau_max.load()) && !std::isinf(tau_max.load()) &&
2981 *   tau_max.load() > 0.,
2982 *   ExcMessage(
2983 *   "I'm sorry, Dave. I'm afraid I can't do that. - We crashed."));
2984 *   }
2985 *  
2986 * @endcode
2987 *
2988 * <b>Step 3</b>: Perform update.
2989 *
2990
2991 *
2992 * At this point, we have computed all viscosity coefficients @f$d_{ij}@f$
2993 * and we know the maximal admissible time-step size
2994 * @f$\tau_{\text{max}}@f$. This means we can now compute the update:
2995 *
2996
2997 *
2998 * \f[
2999 * \mathbf{U}_i^{n+1} = \mathbf{U}_i^{n} - \frac{\tau_{\text{max}} }{m_i}
3000 * \sum_{j \in \mathcal{I}(i)} (\mathbb{f}(\mathbf{U}_j^{n}) -
3001 * \mathbb{f}(\mathbf{U}_i^{n})) \cdot \mathbf{c}_{ij} - d_{ij}
3002 * (\mathbf{U}_j^{n} - \mathbf{U}_i^{n})
3003 * \f]
3004 *
3005
3006 *
3007 * This update formula is slightly different from what was discussed in
3010 * same numerical values). We favor this second formula since it has
3012 * artifacts.
3013 *
3014
3015 *
3016 *
3017 * @code
3018 *   {
3020 *   "time_stepping - 3 perform update");
3021 *  
3022 *   const auto on_subranges =
3023 *   [&](const auto index_begin, const auto index_end) {
3024 *   for (const auto i :
3026 *   {
3028 *  
3029 *   const auto U_i = gather(U, i);
3030 *  
3031 *   const auto f_i = ProblemDescription<dim>::flux(U_i);
3032 *   const double m_i = lumped_mass_matrix.diag_element(i);
3033 *  
3034 *   auto U_i_new = U_i;
3035 *  
3036 *   for (auto jt = sparsity.begin(i); jt != sparsity.end(i); ++jt)
3037 *   {
3038 *   const auto j = jt->column();
3039 *  
3040 *   const auto U_j = gather(U, j);
3041 *   const auto f_j = ProblemDescription<dim>::flux(U_j);
3042 *  
3043 *   const auto c_ij = gather_get_entry(cij_matrix, jt);
3044 *   const auto d_ij = get_entry(dij_matrix, jt);
3045 *  
3046 *   for (unsigned int k = 0; k < n_solution_variables; ++k)
3047 *   {
3048 *   U_i_new[k] +=
3049 *   tau_max / m_i *
3050 *   (-(f_j[k] - f_i[k]) * c_ij + d_ij * (U_j[k] - U_i[k]));
3051 *   }
3052 *   }
3053 *  
3055 *   }
3056 *   };
3057 *  
3059 *   indices_owned.end(),
3060 *   on_subranges,
3061 *   4096);
3062 *   }
3063 *  
3064 * @endcode
3065 *
3066 * <b>Step 4</b>: Fix up boundary states.
3067 *
3068
3069 *
3070 * As a last step in the Forward Euler method, we have to fix up all
3071 * boundary states. As discussed in the intro we
3072 * - advance in time satisfying no boundary condition at all,
3073 * - at the end of the time step enforce boundary conditions strongly
3074 * in a post-processing step.
3075 *
3076
3077 *
3078 * Here, we compute the correction
3079 * \f[
3081 * \mathbf{m}_i) \boldsymbol{\nu}_i,
3082 * \f]
3083 * which removes the normal component of @f$\mathbf{m}@f$.
3084 *
3085
3086 *
3087 *
3088 * @code
3089 *   {
3091 *   "time_stepping - 4 fix boundary states");
3092 *  
3093 *   for (const auto &it : boundary_normal_map)
3094 *   {
3095 *   const auto i = it.first;
3096 *  
3097 * @endcode
3098 *
3099 * We only iterate over the locally owned subset:
3100 *
3101 * @code
3102 *   if (i >= n_locally_owned)
3103 *   continue;
3104 *  
3105 *   const auto &normal = std::get<0>(it.second);
3106 *   const auto &id = std::get<1>(it.second);
3107 *   const auto &position = std::get<2>(it.second);
3108 *  
3109 *   auto U_i = gather(temporary_vector, i);
3110 *  
3111 * @endcode
3112 *
3113 * On free slip boundaries we remove the normal component of the
3114 * momentum:
3115 *
3116 * @code
3117 *   if (id == Boundaries::free_slip)
3118 *   {
3120 *   m -= (m * normal) * normal;
3121 *   for (unsigned int k = 0; k < dim; ++k)
3122 *   U_i[k + 1] = m[k];
3123 *   }
3124 *  
3125 * @endcode
3126 *
3128 * strongly:
3129 *
3130 * @code
3131 *   else if (id == Boundaries::dirichlet)
3132 *   {
3133 *   U_i = initial_values->initial_state(position, t + tau_max);
3134 *   }
3135 *  
3137 *   }
3138 *   }
3139 *  
3140 * @endcode
3141 *
3142 * <b>Step 5</b>: We now update the ghost layer over all MPI ranks,
3143 * swap the temporary vector with the solution vector <code>U</code>
3144 * (that will get returned by reference) and return the chosen
3145 * time-step size @f$\tau_{\text{max}}@f$:
3146 *
3147
3148 *
3149 *
3150 * @code
3151 *   for (auto &it : temporary_vector)
3152 *   it.update_ghost_values();
3153 *  
3154 *   U.swap(temporary_vector);
3155 *  
3156 *   return tau_max;
3157 *   }
3158 *  
3159 * @endcode
3160 *
3161 *
3162 * <a name="Schlierenpostprocessing"></a>
3164 *
3165
3166 *
3167 * At various intervals we will output the current state <code>U</code>
3168 * of the solution together with a so-called Schlieren plot.
3170 * contains no surprises. We simply supply default values to and register
3171 * two parameters:
3172 * - schlieren_beta:
3173 * is an ad-hoc positive amplification factor in order to enhance the
3175 * taste.
3177 * state @f$[\rho, \mathbf{m},E]@f$ are we going to use in order to generate
3179 *
3180
3181 *
3182 *
3183 * @code
3184 *   template <int dim>
3186 *   const MPI_Comm mpi_communicator,
3189 *   const std::string & subsection /*= "SchlierenPostprocessor"*/)
3191 *   , mpi_communicator(mpi_communicator)
3194 *   {
3195 *   schlieren_beta = 10.;
3196 *   add_parameter("schlieren beta",
3198 *   "Beta factor used in Schlieren-type postprocessor");
3199 *  
3200 *   schlieren_index = 0;
3201 *   add_parameter("schlieren index",
3203 *   "Use the corresponding component of the state vector for the "
3204 *   "schlieren plot");
3205 *   }
3206 *  
3207 * @endcode
3208 *
3211 *
3212
3213 *
3214 *
3215 * @code
3216 *   template <int dim>
3218 *   {
3220 *   "schlieren_postprocessor - prepare scratch space");
3221 *  
3222 *   r.reinit(offline_data->n_locally_relevant);
3223 *   schlieren.reinit(offline_data->partitioner);
3224 *   }
3225 *  
3226 * @endcode
3227 *
3230 * basically takes a component of the state vector <code>U</code> and
3231 * computes the Schlieren indicator for such component (the formula of the
3234 * that this formula requires the "nodal gradients" @f$\nabla r_j@f$.
3237 * gradients are not defined for @f$W^{1,p}(\Omega)@f$ functions. The
3238 * simplest technique we can use to recover gradients at nodes is
3239 * weighted-averaging i.e.
3240 *
3241
3242 *
3243 * \f[ \nabla r_j \dealcoloneq \frac{1}{\int_{S_i} \omega_i(\mathbf{x}) \,
3244 * \mathrm{d}\mathbf{x}}
3245 * \int_{S_i} r_h(\mathbf{x}) \omega_i(\mathbf{x}) \, \mathrm{d}\mathbf{x}
3246 * \ \ \ \ \ \mathbf{(*)} \f]
3247 *
3248
3249 *
3250 * where @f$S_i@f$ is the support of the shape function @f$\phi_i@f$, and
3251 * @f$\omega_i(\mathbf{x})@f$ is the weight. The weight could be any
3252 * positive function such as
3254 * notion of mean value). But as usual, the goal is to reuse the off-line
3256 * choice of weight is @f$\omega_i = \phi_i@f$. Inserting this choice of
3257 * weight and the expansion @f$r_h(\mathbf{x}) = \sum_{j \in \mathcal{V}}
3258 * r_j \phi_j(\mathbf{x})@f$ into @f$\mathbf{(*)}@f$ we get :
3259 *
3260
3261 *
3263 * \mathbf{c}_{ij} \ \ \ \ \ \mathbf{(**)} \, . \f]
3264 *
3265
3266 *
3267 * Using this last formula we can recover averaged nodal gradients without
3268 * resorting to any form of quadrature. This idea aligns quite well with
3269 * the whole spirit of edge-based schemes (or algebraic schemes) where
3270 * we want to operate on matrices and vectors as directly as
3272 * forms, cell-loops, quadrature, or any other
3273 * intermediate construct/operation between the input arguments (the state
3274 * from the previous time-step) and the actual matrices and vectors
3275 * required to compute the update.
3276 *
3277
3278 *
3279 * The second thing to note is that we have to compute global minimum and
3281 * same ideas used to compute the time step size in the class member
3282 * <code>%TimeStepping\<dim>::%step()</code> we define @f$\max_j |\nabla r_j|@f$
3283 * and @f$\min_j |\nabla r_j|@f$ as atomic doubles in order to resolve any
3284 * conflicts between threads. As usual, we use
3286 * <code>Utilities::MPI::min()</code> to find the global maximum/minimum
3287 * among all MPI processes.
3288 *
3289
3290 *
3292 * loop over all nodes. The entire operation requires two loops over nodes:
3293 *
3294
3295 *
3296 * - The first loop computes @f$|\nabla r_i|@f$ for all @f$i \in \mathcal{V}@f$ in
3297 * the mesh, and the bounds @f$\max_j |\nabla r_j|@f$ and
3298 * @f$\min_j |\nabla r_j|@f$.
3299 * - The second loop finally computes the Schlieren indicator using the
3300 * formula
3301 *
3302
3303 *
3304 * \f[ \text{schlieren}[i] = e^{\beta \frac{ |\nabla r_i|
3305 * - \min_j |\nabla r_j| }{\max_j |\nabla r_j| - \min_j |\nabla r_j| } }
3306 * \, . \f]
3307 *
3308
3309 *
3311 * <code>on_subranges</code> for each one of these stages.
3312 *
3313
3314 *
3315 *
3316 * @code
3317 *   template <int dim>
3318 *   void SchlierenPostprocessor<dim>::compute_schlieren(const vector_type &U)
3319 *   {
3321 *   computing_timer, "schlieren_postprocessor - compute schlieren plot");
3322 *  
3323 *   const auto &sparsity = offline_data->sparsity_pattern;
3324 *   const auto &lumped_mass_matrix = offline_data->lumped_mass_matrix;
3325 *   const auto &cij_matrix = offline_data->cij_matrix;
3326 *   const auto &boundary_normal_map = offline_data->boundary_normal_map;
3327 *   const auto &n_locally_owned = offline_data->n_locally_owned;
3328 *  
3329 *   const auto indices =
3332 *  
3333 * @endcode
3334 *
3335 * We define the r_i_max and r_i_min in the current MPI process as
3336 * atomic doubles in order to avoid race conditions between threads:
3337 *
3338 * @code
3339 *   std::atomic<double> r_i_max{0.};
3340 *   std::atomic<double> r_i_min{std::numeric_limits<double>::infinity()};
3341 *  
3342 * @endcode
3343 *
3344 * First loop: compute the averaged gradient at each node and the
3345 * global maxima and minima of the gradients.
3346 *
3347 * @code
3348 *   {
3349 *   const auto on_subranges =
3350 *   [&](const auto index_begin, const auto index_end) {
3351 *   double r_i_max_on_subrange = 0.;
3352 *   double r_i_min_on_subrange = std::numeric_limits<double>::infinity();
3353 *  
3354 *   for (const auto i :
3356 *   {
3358 *  
3360 *  
3361 *   for (auto jt = sparsity.begin(i); jt != sparsity.end(i); ++jt)
3362 *   {
3363 *   const auto j = jt->column();
3364 *  
3365 *   if (i == j)
3366 *   continue;
3367 *  
3368 *   const auto U_js = U[schlieren_index].local_element(j);
3369 *   const auto c_ij = gather_get_entry(cij_matrix, jt);
3370 *   r_i += c_ij * U_js;
3371 *   }
3372 *  
3373 * @endcode
3374 *
3376 * how we fixed up boundary states in the forward Euler step.
3379 *
3380
3381 *
3382 *
3383 * @code
3384 *   const auto bnm_it = boundary_normal_map.find(i);
3385 *   if (bnm_it != boundary_normal_map.end())
3386 *   {
3387 *   const auto &normal = std::get<0>(bnm_it->second);
3388 *   const auto &id = std::get<1>(bnm_it->second);
3389 *  
3390 *   if (id == Boundaries::free_slip)
3391 *   r_i -= 1. * (r_i * normal) * normal;
3392 *   else
3393 *   r_i = 0.;
3394 *   }
3395 *  
3396 * @endcode
3397 *
3399 * gradients per se. We only want their norms in order to
3401 * mass matrix @f$m_i@f$):
3402 *
3403 * @code
3404 *   const double m_i = lumped_mass_matrix.diag_element(i);
3405 *   r[i] = r_i.norm() / m_i;
3408 *   }
3409 *  
3410 * @endcode
3411 *
3414 * process) and update them if necessary:
3415 *
3416
3417 *
3418 *
3419 * @code
3420 *   double current_r_i_max = r_i_max.load();
3422 *   !r_i_max.compare_exchange_weak(current_r_i_max,
3424 *   ;
3425 *  
3426 *   double current_r_i_min = r_i_min.load();
3428 *   !r_i_min.compare_exchange_weak(current_r_i_min,
3430 *   ;
3431 *   };
3432 *  
3434 *   indices.end(),
3435 *   on_subranges,
3436 *   4096);
3437 *   }
3438 *  
3439 * @endcode
3440 *
3442 * all MPI processes.
3443 *
3444
3445 *
3446 *
3447 * @code
3448 *   r_i_max.store(Utilities::MPI::max(r_i_max.load(), mpi_communicator));
3449 *   r_i_min.store(Utilities::MPI::min(r_i_min.load(), mpi_communicator));
3450 *  
3451 * @endcode
3452 *
3453 * Second loop: we now have the vector <code>r</code> and the scalars
3455 * are thus in a position to actually compute the Schlieren indicator.
3456 *
3457
3458 *
3459 *
3460 * @code
3461 *   {
3462 *   const auto on_subranges =
3463 *   [&](const auto index_begin, const auto index_end) {
3464 *   for (const auto i :
3466 *   {
3468 *  
3469 *   schlieren.local_element(i) =
3470 *   1. - std::exp(-schlieren_beta * (r[i] - r_i_min) /
3471 *   (r_i_max - r_i_min));
3472 *   }
3473 *   };
3474 *  
3476 *   indices.end(),
3477 *   on_subranges,
3478 *   4096);
3479 *   }
3480 *  
3481 * @endcode
3482 *
3483 * And finally, exchange ghost elements.
3484 *
3485 * @code
3486 *   schlieren.update_ghost_values();
3487 *   }
3488 *  
3489 * @endcode
3490 *
3491 *
3492 * <a name="Themainloop"></a>
3493 * <h4>The main loop</h4>
3494 *
3495
3496 *
3497 * With all classes implemented it is time to create an instance of
3500 * <code>SchlierenPostprocessor<dim></code>, and run the forward Euler
3501 * step in a loop.
3502 *
3503
3504 *
3505 * In the constructor of <code>MainLoop<dim></code> we now initialize an
3506 * instance of all classes, and declare a number of parameters
3507 * controlling output. Most notable, we declare a boolean parameter
3509 * restart from an interrupted computation, or not.
3510 *
3511
3512 *
3513 *
3514 * @code
3515 *   template <int dim>
3516 *   MainLoop<dim>::MainLoop(const MPI_Comm mpi_communicator)
3517 *   : ParameterAcceptor("A - MainLoop")
3518 *   , mpi_communicator(mpi_communicator)
3519 *   , computing_timer(mpi_communicator,
3520 *   timer_output,
3523 *   , pcout(std::cout, Utilities::MPI::this_mpi_process(mpi_communicator) == 0)
3524 *   , discretization(mpi_communicator, computing_timer, "B - Discretization")
3525 *   , offline_data(mpi_communicator,
3528 *   "C - OfflineData")
3529 *   , initial_values("D - InitialValues")
3530 *   , time_stepping(mpi_communicator,
3532 *   offline_data,
3534 *   "E - TimeStepping")
3535 *   , schlieren_postprocessor(mpi_communicator,
3537 *   offline_data,
3538 *   "F - SchlierenPostprocessor")
3539 *   {
3540 *   base_name = "test";
3541 *   add_parameter("basename", base_name, "Base name for all output files");
3542 *  
3543 *   t_final = 4.;
3544 *   add_parameter("final time", t_final, "Final time");
3545 *  
3546 *   output_granularity = 0.02;
3547 *   add_parameter("output granularity",
3549 *   "time interval for output");
3550 *  
3551 *   asynchronous_writeback = true;
3552 *   add_parameter("asynchronous writeback",
3554 *   "Write out solution in a background thread performing IO");
3555 *  
3556 *   resume = false;
3557 *   add_parameter("resume", resume, "Resume an interrupted computation.");
3558 *   }
3559 *  
3560 * @endcode
3561 *
3563 * in an anonymous namespace that is used to output messages in the
3565 *
3566
3567 *
3568 *
3569 * @code
3570 *   namespace
3571 *   {
3573 *   const std::string & header,
3574 *   const std::string & secondary = "")
3575 *   {
3576 *   const auto header_size = header.size();
3577 *   const auto padded_header = std::string((34 - header_size) / 2, ' ') +
3578 *   header +
3579 *   std::string((35 - header_size) / 2, ' ');
3580 *  
3581 *   const auto secondary_size = secondary.size();
3582 *   const auto padded_secondary =
3583 *   std::string((34 - secondary_size) / 2, ' ') + secondary +
3584 *   std::string((35 - secondary_size) / 2, ' ');
3585 *  
3586 *   /* clang-format off */
3587 *   pcout << std::endl;
3588 *   pcout << " ####################################################" << std::endl;
3589 *   pcout << " ######### #########" << std::endl;
3590 *   pcout << " #########" << padded_header << "#########" << std::endl;
3591 *   pcout << " #########" << padded_secondary << "#########" << std::endl;
3592 *   pcout << " ######### #########" << std::endl;
3593 *   pcout << " ####################################################" << std::endl;
3594 *   pcout << std::endl;
3595 *   /* clang-format on */
3596 *   }
3597 *   } // namespace
3598 *  
3599 * @endcode
3600 *
3603 * program.
3604 *
3605
3606 *
3607 *
3608 * @code
3609 *   template <int dim>
3610 *   void MainLoop<dim>::run()
3611 *   {
3612 * @endcode
3613 *
3614 * We start by reading in parameters and initializing all objects. We
3616 * all parameters from the parameter file (whose name is given as a
3617 * string argument). ParameterAcceptor handles a global
3618 * ParameterHandler that is initialized with subsections and parameter
3619 * declarations for all class instances that are derived from
3620 * ParameterAceptor. The call to initialize enters the subsection for
3623 *
3624
3625 *
3626 *
3627 * @code
3628 *   pcout << "Reading parameters and allocating objects... " << std::flush;
3629 *  
3630 *   ParameterAcceptor::initialize("step-69.prm");
3631 *   pcout << "done" << std::endl;
3632 *  
3633 * @endcode
3634 *
3635 * Next we create the triangulation, assemble all matrices, set up
3636 * scratch space, and initialize the DataOut<dim> object. All of these
3639 *
3640
3641 *
3642 *
3643 * @code
3644 *   {
3645 *   print_head(pcout, "create triangulation");
3646 *  
3647 *   discretization.setup();
3648 *  
3649 *   if (resume)
3650 *   discretization.triangulation.load(base_name + "-checkpoint.mesh");
3651 *   else
3652 *   discretization.triangulation.refine_global(discretization.refinement);
3653 *  
3654 *   pcout << "Number of active cells: "
3655 *   << discretization.triangulation.n_global_active_cells()
3656 *   << std::endl;
3657 *  
3658 *   print_head(pcout, "compute offline data");
3659 *   offline_data.setup();
3660 *   offline_data.assemble();
3661 *  
3662 *   pcout << "Number of degrees of freedom: "
3663 *   << offline_data.dof_handler.n_dofs() << std::endl;
3664 *  
3665 *   print_head(pcout, "set up time step");
3666 *   time_stepping.prepare();
3667 *   schlieren_postprocessor.prepare();
3668 *   }
3669 *  
3670 * @endcode
3671 *
3672 * We will store the current time and state in the variable
3673 * <code>t</code> and vector <code>U</code>:
3674 *
3675
3676 *
3677 *
3678 * @code
3679 *   double t = 0.;
3680 *   unsigned int output_cycle = 0;
3681 *  
3682 *   vector_type U;
3683 *   for (auto &it : U)
3684 *   it.reinit(offline_data.partitioner);
3685 *  
3686 * @endcode
3687 *
3688 *
3689 * <a name="Resume"></a>
3690 * <h5>Resume</h5>
3691 *
3692
3693 *
3694 * By default the boolean <code>resume</code> is set to false, i.e. the
3698 * an old state consisting of <code>t</code>,
3700 * checkpoint files.
3701 *
3702
3703 *
3704 * A this point we have already read in the stored refinement history
3705 * of our parallel distributed mesh. What is missing are the actual
3706 * state vector <code>U</code>, the time and output cycle. We use the
3708 * distributed::Triangulation::load() /
3709 * distributed::Triangulation::save() mechanism to read in the state
3710 * vector. A separate <code>boost::archive</code> is used to retrieve
3712 * will be created in the <code>output()</code> routine discussed
3713 * below.
3714 *
3715
3716 *
3717 *
3718 * @code
3719 *   if (resume)
3720 *   {
3721 *   print_head(pcout, "resume interrupted computation");
3722 *  
3725 *   solution_transfer(offline_data.dof_handler);
3726 *  
3727 *   std::vector<LinearAlgebra::distributed::Vector<double> *> vectors;
3728 *   std::transform(U.begin(),
3729 *   U.end(),
3730 *   std::back_inserter(vectors),
3731 *   [](auto &it) { return &it; });
3732 *   solution_transfer.deserialize(vectors);
3733 *  
3734 *   for (auto &it : U)
3735 *   it.update_ghost_values();
3736 *  
3737 *   std::ifstream file(base_name + "-checkpoint.metadata",
3738 *   std::ios::binary);
3739 *  
3740 *   boost::archive::binary_iarchive ia(file);
3741 *   ia >> t >> output_cycle;
3742 *   }
3743 *   else
3744 *   {
3745 *   print_head(pcout, "interpolate initial values");
3747 *   }
3748 *  
3749 * @endcode
3750 *
3751 * With either the initial state set up, or an interrupted state
3752 * restored it is time to enter the main loop:
3753 *
3754
3755 *
3756 *
3757 * @code
3758 *   output(U, base_name, t, output_cycle++);
3759 *  
3760 *   print_head(pcout, "enter main loop");
3761 *  
3762 *   unsigned int timestep_number = 1;
3763 *   while (t < t_final)
3764 *   {
3765 * @endcode
3766 *
3767 * We first print an informative status message
3768 *
3769
3770 *
3771 *
3772 * @code
3773 *   std::ostringstream head;
3774 *   std::ostringstream secondary;
3775 *  
3776 *   head << "Cycle " << Utilities::int_to_string(timestep_number, 6)
3777 *   << " ("
3778 *   << std::fixed << std::setprecision(1) << t / t_final * 100
3779 *   << "%)";
3780 *   secondary << "at time t = " << std::setprecision(8) << std::fixed << t;
3781 *  
3782 *   print_head(pcout, head.str(), secondary.str());
3783 *  
3784 * @endcode
3785 *
3786 * and then perform a single forward Euler step. Note that the
3787 * state vector <code>U</code> is updated in place and that
3788 * <code>time_stepping.make_one_step()</code> returns the chosen step
3789 * size.
3790 *
3791
3792 *
3793 *
3794 * @code
3795 *   t += time_stepping.make_one_step(U, t);
3796 *  
3797 * @endcode
3798 *
3800 * state is a CPU and IO intensive task that we cannot afford to do
3801 * every time step - in particular with explicit time stepping. We
3802 * thus only schedule output by calling the <code>output()</code>
3803 * function if we are past a threshold set by
3805 *
3806
3807 *
3808 *
3809 * @code
3811 *   {
3812 *   checkpoint(U, base_name, t, output_cycle);
3813 *   output(U, base_name, t, output_cycle);
3814 *   ++output_cycle;
3815 *   }
3816 *  
3817 *   ++timestep_number;
3818 *   }
3819 *  
3820 * @endcode
3821 *
3822 * We wait for any remaining background output thread to finish before
3823 * printing a summary and exiting.
3824 *
3825 * @code
3826 *   if (background_thread_state.valid())
3827 *   background_thread_state.wait();
3828 *  
3829 *   computing_timer.print_summary();
3830 *   pcout << timer_output.str() << std::endl;
3831 *   }
3832 *  
3833 * @endcode
3834 *
3836 * as input argument and populates a state vector <code>U</code> with the
3838 *
3839
3840 *
3841 *
3842 * @code
3843 *   template <int dim>
3846 *   {
3847 *   pcout << "MainLoop<dim>::interpolate_initial_values(t = " << t << ')'
3848 *   << std::endl;
3850 *   "main_loop - setup scratch space");
3851 *  
3852 *   vector_type U;
3853 *  
3854 *   for (auto &it : U)
3855 *   it.reinit(offline_data.partitioner);
3856 *  
3857 *   constexpr auto n_solution_variables =
3859 *  
3860 * @endcode
3861 *
3862 * The function signature of
3864 * for VectorTools::interpolate(). We work around this issue by, first,
3865 * creating a lambda function that for a given position <code>x</code>
3866 * returns just the value of the <code>i</code>th component. This
3869 *
3870 * @code
3871 *   for (unsigned int i = 0; i < n_solution_variables; ++i)
3874 *   [&](const Point<dim> &x) {
3875 *   return initial_values.initial_state(x, t)[i];
3876 *   }),
3877 *   U[i]);
3878 *  
3879 *   for (auto &it : U)
3880 *   it.update_ghost_values();
3881 *  
3882 *   return U;
3883 *   }
3884 *  
3885 * @endcode
3886 *
3887 *
3888 * <a name="Outputandcheckpointing"></a>
3890 *
3891
3892 *
3893 * We checkpoint the current state by doing the precise inverse
3894 * operation to what we discussed for the <a href="Resume">resume
3895 * logic</a>:
3896 *
3897
3898 *
3899 *
3900 * @code
3901 *   template <int dim>
3903 *   const std::string &name,
3904 *   const double t,
3905 *   const unsigned int cycle)
3906 *   {
3907 *   print_head(pcout, "checkpoint computation");
3908 *  
3911 *   solution_transfer(offline_data.dof_handler);
3912 *  
3913 *   std::vector<const LinearAlgebra::distributed::Vector<double> *> vectors;
3914 *   std::transform(U.begin(),
3915 *   U.end(),
3916 *   std::back_inserter(vectors),
3917 *   [](auto &it) { return &it; });
3918 *  
3919 *   solution_transfer.prepare_for_serialization(vectors);
3920 *  
3921 *   discretization.triangulation.save(name + "-checkpoint.mesh");
3922 *  
3923 *   if (Utilities::MPI::this_mpi_process(mpi_communicator) == 0)
3924 *   {
3925 *   std::ofstream file(name + "-checkpoint.metadata", std::ios::binary);
3926 *   boost::archive::binary_oarchive oa(file);
3927 *   oa << t << cycle;
3928 *   }
3929 *   }
3930 *  
3931 * @endcode
3932 *
3934 * stall the main loop for a while. In order to avoid this we use an <a
3935 * href="https://en.wikipedia.org/wiki/Asynchronous_I/O">asynchronous
3936 * IO</a> strategy by creating a background thread that will perform IO
3937 * while the main loop is allowed to continue. In order for this to work
3939 * - Before running the <code>output_worker</code> thread, we have to create
3940 * a copy of the state vector <code>U</code>. We store it in the
3942 * - We have to avoid any MPI communication in the background thread,
3944 * run the postprocessing outside of the worker thread.
3945 *
3946
3947 *
3948 *
3949 * @code
3950 *   template <int dim>
3951 *   void MainLoop<dim>::output(const typename MainLoop<dim>::vector_type &U,
3952 *   const std::string & name,
3953 *   const double t,
3954 *   const unsigned int cycle)
3955 *   {
3956 *   pcout << "MainLoop<dim>::output(t = " << t << ')' << std::endl;
3957 *  
3958 * @endcode
3959 *
3961 * thread performing all the slow IO to disc. In that case we have to
3962 * make sure that the background thread actually finished running. If
3963 * not, we have to wait to for it to finish. We launch said background
3964 * thread with <a
3965 * href="https://en.cppreference.com/w/cpp/thread/async"><code>std::async()</code></a>
3966 * that returns a <a
3967 * href="https://en.cppreference.com/w/cpp/thread/future"><code>std::future</code></a>
3968 * object. This <code>std::future</code> object contains the return
3969 * value of the function, which is in our case simply
3970 * <code>void</code>.
3971 *
3972
3973 *
3974 *
3975 * @code
3976 *   if (background_thread_state.valid())
3977 *   {
3978 *   TimerOutput::Scope timer(computing_timer, "main_loop - stalled output");
3979 *   background_thread_state.wait();
3980 *   }
3981 *  
3982 *   constexpr auto n_solution_variables =
3984 *  
3985 * @endcode
3986 *
3987 * At this point we make a copy of the state vector, run the schlieren
3988 * postprocessor, and run DataOut<dim>::build_patches() The actual
3989 * output code is standard: We create a DataOut instance, attach all
3990 * data vectors we want to output and call
3991 * DataOut<dim>::build_patches(). There is one twist, however. In order
3992 * to perform asynchronous IO on a background thread we create the
3993 * DataOut<dim> object as a shared pointer that we pass on to the
3994 * worker thread to ensure that once we exit this function and the
3995 * worker thread finishes the DataOut<dim> object gets destroyed again.
3996 *
3997
3998 *
3999 *
4000 * @code
4001 *   for (unsigned int i = 0; i < n_solution_variables; ++i)
4002 *   {
4003 *   output_vector[i] = U[i];
4004 *   output_vector[i].update_ghost_values();
4005 *   }
4006 *  
4007 *   schlieren_postprocessor.compute_schlieren(output_vector);
4008 *  
4009 *   std::unique_ptr<DataOut<dim>> data_out = std::make_unique<DataOut<dim>>();
4010 *   data_out->attach_dof_handler(offline_data.dof_handler);
4011 *  
4012 *   for (unsigned int i = 0; i < n_solution_variables; ++i)
4013 *   data_out->add_data_vector(output_vector[i],
4015 *  
4016 *   data_out->add_data_vector(schlieren_postprocessor.schlieren,
4017 *   "schlieren_plot");
4018 *  
4019 *   data_out->build_patches(discretization.mapping,
4020 *   discretization.finite_element.degree - 1);
4021 *  
4022 * @endcode
4023 *
4024 * Next we create a lambda function for the background thread. We <a
4025 * href="https://en.cppreference.com/w/cpp/language/lambda">capture</a>
4026 * the <code>this</code> pointer as well as most of the arguments of
4027 * the output function by value so that we have access to them inside
4028 * the lambda function.
4029 *
4030
4031 *
4033 * in essence creates a local variable inside the lambda function into
4034 * which we "move" the `data_out` variable from above. The way this works
4035 * is that we create a `std::unique_ptr` above that points to the DataOut
4036 * object. But we have no use for it any more after this point: We really
4038 * function implemented in the following few lines. We could have done
4039 * this by using a `std::shared_ptr` above and giving the lambda function
4040 * a copy of that shared pointer; once the current function returns (but
4041 * maybe while the lambda function is still running), our local shared
4042 * pointer would go out of scope and stop pointing at the actual
4043 * object, at which point the lambda function simply becomes the sole
4044 * owner. But using the `std::unique_ptr` is conceptually cleaner as it
4045 * makes it clear that the current function's `data_out` variable isn't
4046 * even pointing to the object any more.
4047 *
4048 * @code
4049 *   auto output_worker =
4050 *   [data_out_copy = std::move(data_out), this, name, t, cycle]() {
4051 *   const DataOutBase::VtkFlags flags(
4053 *   data_out_copy->set_flags(flags);
4054 *  
4055 *   data_out_copy->write_vtu_with_pvtu_record(
4056 *   "", name + "-solution", cycle, mpi_communicator, 6);
4057 *   };
4058 *  
4059 * @endcode
4060 *
4062 * background thread with the help of
4063 * <a
4064 * href="https://en.cppreference.com/w/cpp/thread/async"><code>std::async</code></a>
4065 * function. The function returns a <a
4066 * href="https://en.cppreference.com/w/cpp/thread/future"><code>std::future</code></a>
4067 * object that we can use to query the status of the background thread.
4068 * At this point we can return from the <code>output()</code> function
4069 * and resume with the time stepping in the main loop - the thread will
4070 * run in the background.
4071 *
4072 * @code
4074 *   {
4076 *   std::async(std::launch::async, std::move(output_worker));
4077 *   }
4078 *   else
4079 *   {
4080 *   output_worker();
4081 *   }
4082 *   }
4083 *  
4084 *   } // namespace Step69
4085 *  
4086 * @endcode
4087 *
4088 * And finally, the main function.
4089 *
4090
4091 *
4092 *
4093 * @code
4094 *   int main(int argc, char *argv[])
4095 *   {
4096 *   try
4097 *   {
4098 *   constexpr int dim = 2;
4099 *  
4100 *   using namespace dealii;
4101 *   using namespace Step69;
4102 *  
4104 *  
4105 *   MPI_Comm mpi_communicator(MPI_COMM_WORLD);
4106 *   MainLoop<dim> main_loop(mpi_communicator);
4107 *  
4108 *   main_loop.run();
4109 *   }
4110 *   catch (std::exception &exc)
4111 *   {
4112 *   std::cerr << std::endl
4113 *   << std::endl
4114 *   << "----------------------------------------------------"
4115 *   << std::endl;
4116 *   std::cerr << "Exception on processing: " << std::endl
4117 *   << exc.what() << std::endl
4118 *   << "Aborting!" << std::endl
4119 *   << "----------------------------------------------------"
4120 *   << std::endl;
4121 *   return 1;
4122 *   }
4123 *   catch (...)
4124 *   {
4125 *   std::cerr << std::endl
4126 *   << std::endl
4127 *   << "----------------------------------------------------"
4128 *   << std::endl;
4129 *   std::cerr << "Unknown exception!" << std::endl
4130 *   << "Aborting!" << std::endl
4131 *   << "----------------------------------------------------"
4132 *   << std::endl;
4133 *   return 1;
4134 *   };
4135 *   }
4136 * @endcode
4137<a name="Results"></a>
4138<a name="Results"></a><h1>Results</h1>
4139
4140
4141Running the program with default parameters in release mode takes about 1
4143@verbatim
4144# mpirun -np 4 ./step-69 | tee output
4145Reading parameters and allocating objects... done
4146
4147 ####################################################
4148 ######### #########
4149 ######### create triangulation #########
4150 ######### #########
4151 ####################################################
4152
4153Number of active cells: 36864
4154
4155 ####################################################
4156 ######### #########
4157 ######### compute offline data #########
4158 ######### #########
4159 ####################################################
4160
4161Number of degrees of freedom: 37376
4162
4163 ####################################################
4164 ######### #########
4165 ######### set up time step #########
4166 ######### #########
4167 ####################################################
4168
4169 ####################################################
4170 ######### #########
4171 ######### interpolate initial values #########
4172 ######### #########
4173 ######### #########
4174 ####################################################
4175
4177TimeLoop<dim>::output(t = 0, checkpoint = 0)
4178
4179 ####################################################
4180 ######### #########
4181 ######### enter main loop #########
4182 ######### #########
4183 ######### #########
4184 ####################################################
4185
4186 ####################################################
4187 ######### #########
4188 ######### Cycle 000001 (0.0%) #########
4189 ######### at time t = 0.00000000 #########
4190 ######### #########
4191 ####################################################
4192
4193[...]
4194
4195 ####################################################
4196 ######### #########
4197 ######### Cycle 007553 (100.0%) #########
4198 ######### at time t = 3.99984036 #########
4199 ######### #########
4200 ####################################################
4201
4202TimeLoop<dim>::output(t = 4.00038, checkpoint = 1)
4203
4204+------------------------------------------------------------------------+------------+------------+
4205| Total CPU time elapsed since start | 357s | |
4206| | | |
4207| Section | no. calls | CPU time | % of total |
4208+------------------------------------------------------------+-----------+------------+------------+
4209| discretization - setup | 1 | 0.113s | 0% |
4210| offline_data - assemble lumped mass matrix, and c_ij | 1 | 0.167s | 0% |
4211| offline_data - compute |c_ij|, and n_ij | 1 | 0.00255s | 0% |
4212| offline_data - create sparsity pattern and set up matrices | 1 | 0.0224s | 0% |
4213| offline_data - distribute dofs | 1 | 0.0617s | 0% |
4214| offline_data - fix slip boundary c_ij | 1 | 0.0329s | 0% |
4215| schlieren_postprocessor - compute schlieren plot | 201 | 0.811s | 0.23% |
4216| schlieren_postprocessor - prepare scratch space | 1 | 7.6e-05s | 0% |
4217| time_loop - setup scratch space | 1 | 0.127s | 0% |
4218| time_loop - stalled output | 200 | 0.000685s | 0% |
4219| time_step - 1 compute d_ij | 7553 | 240s | 67% |
4220| time_step - 2 compute d_ii, and tau_max | 7553 | 11.5s | 3.2% |
4221| time_step - 3 perform update | 7553 | 101s | 28% |
4222| time_step - 4 fix boundary states | 7553 | 0.724s | 0.2% |
4223| time_step - prepare scratch space | 1 | 0.00245s | 0% |
4224+------------------------------------------------------------+-----------+------------+------------+
4226
4228thirds of the execution time computing the graph viscosity d_ij and about a
4233
4234<img src="https://www.dealii.org/images/steps/developer/step-69.coarse.gif" alt="" height="300">
4235
4238here is a "reference" computation with a second-order method and about 9.5M
4239gridpoints (<a href="https://github.com/conservation-laws/ryujin">github
4240project page</a>):
4241
4242<img src="https://www.dealii.org/images/steps/developer/step-69.2nd-order.t400.jpg" alt="" height="300">
4243
4244So, we give the first-order method a second chance and run it with about
42452.4M gridpoints on a small compute server:
4246
4247@verbatim
4248# mpirun -np 16 ./step-69 | tee output
4249
4250[...]
4251
4252 ####################################################
4253 ######### #########
4254 ######### Cycle 070216 (100.0%) #########
4255 ######### at time t = 3.99999231 #########
4256 ######### #########
4257 ####################################################
4258
4259TimeLoop<dim>::output(t = 4.00006, checkpoint = 1)
4260
4261[...]
4262
4263+------------------------------------------------------------------------+------------+------------+
4264| Total wallclock time elapsed since start | 6.75e+03s | |
4265| | | |
4266| Section | no. calls | wall time | % of total |
4267+------------------------------------------------------------+-----------+------------+------------+
4268| discretization - setup | 1 | 1.97s | 0% |
4269| offline_data - assemble lumped mass matrix, and c_ij | 1 | 1.19s | 0% |
4270| offline_data - compute |c_ij|, and n_ij | 1 | 0.0172s | 0% |
4271| offline_data - create sparsity pattern and set up matrices | 1 | 0.413s | 0% |
4272| offline_data - distribute dofs | 1 | 1.05s | 0% |
4273| offline_data - fix slip boundary c_ij | 1 | 0.252s | 0% |
4274| schlieren_postprocessor - compute schlieren plot | 201 | 1.82s | 0% |
4275| schlieren_postprocessor - prepare scratch space | 1 | 0.000497s | 0% |
4276| time_loop - setup scratch space | 1 | 1.45s | 0% |
4277| time_loop - stalled output | 200 | 0.00342s | 0% |
4278| time_step - 1 compute d_ij | 70216 | 4.38e+03s | 65% |
4279| time_step - 2 compute d_ii, and tau_max | 70216 | 419s | 6.2% |
4280| time_step - 3 perform update | 70216 | 1.87e+03s | 28% |
4281| time_step - 4 fix boundary states | 70216 | 24s | 0.36% |
4282| time_step - prepare scratch space | 1 | 0.0227s | 0% |
4283+------------------------------------------------------------+-----------+------------+------------+
4285
4287
4288<img src="https://www.dealii.org/images/steps/developer/step-69.fine.gif" alt="" height="300">
4289
4290That's substantially better, although of course at the price of having run
4291the code for roughly 2 hours on 16 cores.
4292
4293
4294
4295<a name="extensions"></a>
4296<a name="Possibilitiesforextensions"></a><h3>Possibilities for extensions</h3>
4297
4298
4299The program showcased here is really only first-order accurate, as
4300discussed above. The pictures above illustrate how much diffusion that
4301introduces and how far the solution is from one that actually resolves
4302the features we care about.
4303
4304This can be fixed, but it would exceed what a *tutorial* is about.
4305Nevertheless, it is worth showing what one can achieve by adding a
4306second-order scheme. For example, here is a video computed with <a
4307href=https://conservation-laws.org/>the following research code</a>
4308that shows (with a different color scheme) a 2d simulation that corresponds
4309to the cases shown above:
4310
4311@htmlonly
4312<p align="center">
4313 <iframe width="560" height="315" src="https://www.youtube.com/embed/xIwJZlsXpZ4"
4314 frameborder="0"
4315 allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
4316 allowfullscreen></iframe>
4317 </p>
4318@endhtmlonly
4319
4320This simulation was done with 38 million degrees of freedom
4321(continuous @f$Q_1@f$ finite elements) per component of the solution
4322vector. The exquisite detail of the solution is remarkable for these
4323kinds of simulations, including in the sub-sonic region behind the
4324obstacle.
4325
4326One can also with relative ease further extend this to the 3d case:
4327
4328@htmlonly
4329<p align="center">
4330 <iframe width="560" height="315" src="https://www.youtube.com/embed/vBCRAF_c8m8"
4331 frameborder="0"
4332 allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
4333 allowfullscreen></iframe>
4334 </p>
4335@endhtmlonly
4336
4337Solving this becomes expensive, however: The simulation was done with
43381,817 million degrees of freedom (continuous @f$Q_1@f$ finite elements)
4339per component (for a total of 9.09 billion spatial degrees of freedom)
4340and ran on 30,720 MPI ranks. The code achieved an average throughput of
4341969M grid points per second (0.04M gridpoints per second per CPU). The
4342front and back wall show a "Schlieren plot": the magnitude of the
4343gradient of the density on an exponential scale from white (low) to
4344black (high). All other cutplanes and the surface of the obstacle show
4345the magnitude of the vorticity on a white (low) - yellow (medium) -
4346red (high) scale. (The scales of the individual cutplanes have been
4347adjusted for a nicer visualization.)
4348 *
4349 *
4350<a name="PlainProg"></a>
4351<h1> The plain program</h1>
4352@include "step-69.cc"
4353*/
iterator begin() const
Definition array_view.h:594
iterator end() const
Definition array_view.h:603
value_type * data() const noexcept
Definition array_view.h:553
void reinit(value_type *starting_element, const std::size_t n_elements)
Definition array_view.h:413
std::size_t size() const
Definition array_view.h:576
virtual void build_patches(const unsigned int n_subdivisions=0)
Definition data_out.cc:1063
static void initialize(const std::string &filename="", const std::string &output_filename="", const ParameterHandler::OutputStyle output_style_for_output_filename=ParameterHandler::Short, ParameterHandler &prm=ParameterAcceptor::prm, const ParameterHandler::OutputStyle output_style_for_filename=ParameterHandler::DefaultStyle)
boost::signals2::signal< void()> parse_parameters_call_back
void add_parameter(const std::string &entry, ParameterType &parameter, const std::string &documentation="", ParameterHandler &prm_=prm, const Patterns::PatternBase &pattern= *Patterns::Tools::Convert< ParameterType >::to_pattern())
@ cpu_and_wall_times
Definition timer.h:656
#define DEAL_II_ALWAYS_INLINE
Definition config.h:106
Point< 2 > second
Definition grid_out.cc:4616
Point< 2 > first
Definition grid_out.cc:4615
unsigned int level
Definition grid_out.cc:4618
unsigned int vertex_indices[2]
__global__ void set(Number *val, const Number s, const size_type N)
#define Assert(cond, exc)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
typename ActiveSelector::cell_iterator cell_iterator
void loop(ITERATOR begin, std_cxx20::type_identity_t< ITERATOR > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
Definition loop.h:439
@ update_values
Shape function values.
@ update_normal_vectors
Normal vectors.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
IteratorRange< BaseIterator > make_iterator_range(const BaseIterator &begin, const std_cxx20::type_identity_t< BaseIterator > &end)
const Event initial
Definition event.cc:65
void interpolate(const DoFHandler< dim, spacedim > &dof1, const InVector &u1, const DoFHandler< dim, spacedim > &dof2, OutVector &u2)
@ valid
Iterator points to a valid object.
@ matrix
Contents is actually a matrix.
@ symmetric
Matrix is symmetric.
@ diagonal
Matrix is diagonal.
@ general
No special properties.
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim > > > &Du)
Definition divergence.h:472
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
Definition utilities.cc:189
SymmetricTensor< 2, dim, Number > C(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > E(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
Tensor< 2, dim, Number > l(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
SymmetricTensor< 2, dim, Number > b(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
void call(const std::function< RT()> &function, internal::return_value< RT > &ret_val)
VectorType::value_type * end(VectorType &V)
VectorType::value_type * begin(VectorType &V)
void free(T *&pointer)
Definition cuda.h:97
T sum(const T &t, const MPI_Comm mpi_communicator)
T max(const T &t, const MPI_Comm mpi_communicator)
T min(const T &t, const MPI_Comm mpi_communicator)
unsigned int this_mpi_process(const MPI_Comm mpi_communicator)
Definition mpi.cc:161
T scatter(const MPI_Comm comm, const std::vector< T > &objects_to_send, const unsigned int root_process=0)
std::string int_to_string(const unsigned int value, const unsigned int digits=numbers::invalid_unsigned_int)
Definition utilities.cc:471
void interpolate(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const Function< spacedim, typename VectorType::value_type > &function, VectorType &vec, const ComponentMask &component_mask=ComponentMask())
void project(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const AffineConstraints< typename VectorType::value_type > &constraints, const Quadrature< dim > &quadrature, const Function< spacedim, typename VectorType::value_type > &function, VectorType &vec, const bool enforce_zero_boundary=false, const Quadrature< dim - 1 > &q_boundary=(dim > 1 ? QGauss< dim - 1 >(2) :Quadrature< dim - 1 >(0)), const bool project_to_boundary_first=false)
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
void run(const std::vector< std::vector< Iterator > > &colored_iterators, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length=2 *MultithreadInfo::n_threads(), const unsigned int chunk_size=8)
bool check(const ConstraintKinds kind_in, const unsigned int dim)
long double gamma(const unsigned int n)
void copy(const T *begin, const T *end, U *dest)
int(&) functions(const void *v1, const void *v2)
void assemble(const MeshWorker::DoFInfoBox< dim, DOFINFO > &dinfo, A *assembler)
Definition loop.h:71
void reinit(MatrixBlock< MatrixType > &v, const BlockSparsityPattern &p)
void apply_to_subranges(const tbb::blocked_range< Iterator > &range, const Function &f)
Definition parallel.h:353
void apply_to_subranges(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, const Function &f, const unsigned int grainsize)
Definition parallel.h:435
boost::integer_range< IncrementableType > iota_view
Definition iota_view.h:46
STL namespace.
::VectorizedArray< Number, width > exp(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &, const Number p)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation
void swap(SmartPointer< T, P > &t1, SmartPointer< T, Q > &t2)
DEAL_II_HOST constexpr Number trace(const SymmetricTensor< 2, dim2, Number > &)
void advance(std::tuple< I1, I2 > &t, const unsigned int n)
void gather(VectorizedArray< Number, width > &out, const std::array< Number *, width > &ptrs, const unsigned int offset)