481 *
const unsigned int = 0)
const override
487 *
template <
typename number>
490 *
const unsigned int = 0)
const
511 *
const unsigned int = 0)
const override;
513 *
template <
typename number>
515 *
const unsigned int = 0)
const;
517 *
template <
typename number>
527 *
template <
typename number>
537 *
for (
int d = 0;
d < dim; ++
d)
548 *
template <
typename number>
551 *
const unsigned int)
const
556 *
for (
int d = 0;
d < dim; ++
d)
557 *
if (p[d][i] < -0.5)
559 *
return_value[i] = 100.0;
564 *
return return_value;
570 *
template <
typename number>
575 *
for (
unsigned int i = 0; i < points.size(); ++i)
576 *
average +=
value(points[i]);
577 *
average /= points.
size();
585 *
template <
typename number>
586 *
std::shared_ptr<Table<2, VectorizedArray<number>>>
591 *
std::make_shared<Table<2, VectorizedArray<number>>>();
596 *
const unsigned int n_q_points = fe_eval.n_q_points;
600 *
for (
unsigned int cell = 0; cell <
n_cells; ++cell)
605 *
for (
unsigned int q = 0;
q < n_q_points; ++
q)
606 *
average_value +=
value(fe_eval.quadrature_point(
q));
607 *
average_value /= n_q_points;
620 * <a name=
"Runtimeparameters"></a>
652 *
bool Settings::try_parse(
const std::string &
prm_filename)
656 *
prm.declare_entry(
"n_steps",
659 *
"Number of adaptive refinement steps.");
660 *
prm.declare_entry(
"smoother dampen",
663 *
"Dampen factor for the smoother.");
664 *
prm.declare_entry(
"smoother steps",
667 *
"Number of smoother steps.");
668 *
prm.declare_entry(
"solver",
671 *
"Switch between matrix-free GMG, "
672 *
"matrix-based GMG, and AMG.");
673 *
prm.declare_entry(
"output",
676 *
"Output graphical results.");
680 *
std::cout <<
"**** Error: No input file provided!\n"
681 *
<<
"**** Error: Call this program as './step-50 input.prm\n"
683 *
<<
"**** You may want to use one of the input files in this\n"
684 *
<<
"**** directory, or use the following default values\n"
685 *
<<
"**** to create an input file:\n";
695 *
catch (std::exception &e)
698 *
std::cerr <<
e.what() << std::endl;
702 *
if (prm.get(
"solver") ==
"MF")
704 *
else if (prm.get(
"solver") ==
"MB")
706 *
else if (prm.get(
"solver") ==
"AMG")
707 *
this->solver =
amg;
711 *
this->dimension = prm.get_integer(
"dim");
712 *
this->
n_steps = prm.get_integer(
"n_steps");
715 *
this->output = prm.get_bool(
"output");
725 * <a name=
"LaplaceProblemclass"></a>
734 *
the polynomial degree
is a
template parameter
of this class.
This is
738 *
template <
int dim,
int degree>
753 *
using MatrixType = LA::MPI::SparseMatrix;
754 *
using VectorType = LA::MPI::Vector;
780 *
void refine_grid();
798 *
MatrixType system_matrix;
800 *
VectorType solution;
821 *
template <
int dim,
int degree>
828 *
(
settings.solver == Settings::amg) ?
846 * <a name=
"LaplaceProblemsetup_system"></a>
863 *
dof_handler.distribute_dofs(fe);
866 *
locally_owned_dofs = dof_handler.locally_owned_dofs();
868 *
solution.
reinit(locally_owned_dofs, mpi_communicator);
875 *
constraints.close();
879 *
case Settings::gmg_mf:
882 *
additional_data.tasks_parallel_scheme =
884 *
additional_data.mapping_update_flags =
886 *
std::shared_ptr<MatrixFree<dim, double>>
mf_storage =
887 *
std::make_shared<MatrixFree<dim, double>>();
903 *
case Settings::gmg_mb:
904 *
case Settings::amg:
911 *
locally_owned_dofs,
915 *
system_matrix.
reinit(locally_owned_dofs,
916 *
locally_owned_dofs,
921 *
locally_owned_dofs,
940 * <a name=
"LaplaceProblemsetup_multigrid"></a>
948 * distributed sparsity patterns.
959 *
template <
int dim,
int degree>
964 *
dof_handler.distribute_mg_dofs();
966 *
mg_constrained_dofs.clear();
967 *
mg_constrained_dofs.initialize(dof_handler);
970 *
mg_constrained_dofs.make_zero_boundary_constraints(dof_handler, boundary_ids);
972 *
const unsigned int n_levels =
triangulation.n_global_levels();
976 *
case Settings::gmg_mf:
987 *
level_constraints.add_lines(
988 *
mg_constrained_dofs.get_boundary_indices(
level));
989 *
level_constraints.close();
992 *
additional_data.tasks_parallel_scheme =
994 *
additional_data.mapping_update_flags =
997 *
additional_data.mg_level =
level;
1002 *
level_constraints,
1007 *
mg_constrained_dofs,
1020 *
case Settings::gmg_mb:
1040 *
dof_handler.locally_owned_mg_dofs(
level),
1045 *
dof_handler.locally_owned_mg_dofs(
level),
1046 *
dof_handler.locally_owned_mg_dofs(
level),
1048 *
mpi_communicator);
1051 *
dof_handler.locally_owned_mg_dofs(
level),
1052 *
dof_handler.locally_owned_mg_dofs(
level),
1054 *
mpi_communicator);
1066 *
mg_constrained_dofs,
1072 *
dof_handler.locally_owned_mg_dofs(
level),
1077 *
dof_handler.locally_owned_mg_dofs(
level),
1078 *
dof_handler.locally_owned_mg_dofs(
level),
1080 *
mpi_communicator);
1083 *
dof_handler.locally_owned_mg_dofs(
level),
1084 *
dof_handler.locally_owned_mg_dofs(
level),
1086 *
mpi_communicator);
1089 *
mg_constrained_dofs,
1109 * <a name=
"LaplaceProblemassemble_system"></a>
1127 *
template <
int dim,
int degree>
1139 *
const unsigned int dofs_per_cell = fe.n_dofs_per_cell();
1145 *
std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
1149 *
std::vector<double>
rhs_values(n_q_points);
1151 *
for (
const auto &cell : dof_handler.active_cell_iterators())
1152 *
if (cell->is_locally_owned())
1157 *
fe_values.
reinit(cell);
1160 *
coefficient.average_value(fe_values.get_quadrature_points());
1161 *
rhs.value_list(fe_values.get_quadrature_points(),
rhs_values);
1164 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
1166 *
for (
unsigned int j = 0;
j < dofs_per_cell; ++
j)
1169 *
fe_values.shape_grad(i,
q_point) *
1170 *
fe_values.shape_grad(
j,
q_point) *
1174 *
fe_values.shape_value(i,
q_point) *
1179 *
cell->get_dof_indices(local_dof_indices);
1180 *
constraints.distribute_local_to_global(cell_matrix,
1182 *
local_dof_indices,
1195 * <a name=
"LaplaceProblemassemble_multigrid"></a>
1219 *
const unsigned int dofs_per_cell = fe.n_dofs_per_cell();
1224 *
std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
1236 *
mg_constrained_dofs.get_refinement_edge_indices(
level));
1238 *
mg_constrained_dofs.get_boundary_indices(
level));
1243 *
for (
const auto &cell : dof_handler.cell_iterators())
1244 *
if (cell->level_subdomain_id() ==
triangulation.locally_owned_subdomain())
1247 *
fe_values.
reinit(cell);
1250 *
coefficient.average_value(fe_values.get_quadrature_points());
1253 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
1254 *
for (
unsigned int j = 0;
j < dofs_per_cell; ++
j)
1259 *
cell->get_mg_dof_indices(local_dof_indices);
1262 *
cell_matrix, local_dof_indices,
mg_matrix[cell->level()]);
1264 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
1265 *
for (
unsigned int j = 0;
j < dofs_per_cell; ++
j)
1266 *
if (mg_constrained_dofs.is_interface_matrix_entry(
1267 *
cell->level(), local_dof_indices[i], local_dof_indices[
j]))
1269 *
local_dof_indices[
j],
1273 *
for (
unsigned int i = 0; i <
triangulation.n_global_levels(); ++i)
1285 * <a name=
"LaplaceProblemassemble_rhs"></a>
1292 *
framework,
we don't have to assemble the matrix and can get away
1293 * with only assembling the right hand side. We could do this by extracting the
1294 * code from the `assemble_system()` function above that deals with the right
1295 * hand side, but we decide instead to go all in on the matrix-free approach and
1296 * do the assembly using that way as well.
1300 * The result is a function that is similar
1301 * to the one found in the "Use FEEvaluation::read_dof_values_plain()
1302 * to avoid resolving constraints" subsection in the "Possibilities
1303 * for extensions" section of @ref step_37 "step-37".
1307 * The reason for this function is that the MatrixFree operators do not take
1308 * into account non-homogeneous Dirichlet constraints, instead treating all
1309 * Dirichlet constraints as homogeneous. To account for this, the right-hand
1310 * side here is assembled as the residual @f$r_0 = f-Au_0@f$, where @f$u_0@f$ is a
1311 * zero vector except in the Dirichlet values. Then when solving, we have that
1312 * the solution is @f$u = u_0 + A^{-1}r_0@f$. This can be seen as a Newton
1313 * iteration on a linear system with initial guess @f$u_0@f$. The CG solve in the
1314 * `solve()` function below computes @f$A^{-1}r_0@f$ and the call to
1315 * `constraints.distribute()` (which directly follows) adds the @f$u_0@f$.
1319 * Obviously, since we are considering a problem with zero Dirichlet boundary,
1320 * we could have taken a similar approach to @ref step_37 "step-37" `assemble_rhs()`, but this
1321 * additional work allows us to change the problem declaration if we so
1326 * This function has two parts in the integration loop: applying the negative
1327 * of matrix @f$A@f$ to @f$u_0@f$ by submitting the negative of the gradient, and adding
1328 * the right-hand side contribution by submitting the value @f$f@f$. We must be sure
1329 * to use `read_dof_values_plain()` for evaluating @f$u_0@f$ as `read_dof_values()`
1330 * would set all Dirichlet values to zero.
1334 * Finally, the system_rhs vector is of type LA::MPI::Vector, but the
1335 * MatrixFree class only work for
1336 * LinearAlgebra::distributed::Vector. Therefore we must
1337 * compute the right-hand side using MatrixFree functionality and then
1338 * use the functions in the `ChangeVectorType` namespace to copy it to
1342 * template <int dim, int degree>
1343 * void LaplaceProblem<dim, degree>::assemble_rhs()
1345 * TimerOutput::Scope timing(computing_timer, "Assemble right-hand side");
1347 * MatrixFreeActiveVector solution_copy;
1348 * MatrixFreeActiveVector right_hand_side_copy;
1349 * mf_system_matrix.initialize_dof_vector(solution_copy);
1350 * mf_system_matrix.initialize_dof_vector(right_hand_side_copy);
1352 * solution_copy = 0.;
1353 * constraints.distribute(solution_copy);
1354 * solution_copy.update_ghost_values();
1355 * right_hand_side_copy = 0;
1356 * const Table<2, VectorizedArray<double>> &coefficient =
1357 * *(mf_system_matrix.get_coefficient());
1359 * RightHandSide<dim> right_hand_side_function;
1361 * FEEvaluation<dim, degree, degree + 1, 1, double> phi(
1362 * *mf_system_matrix.get_matrix_free());
1364 * for (unsigned int cell = 0;
1365 * cell < mf_system_matrix.get_matrix_free()->n_cell_batches();
1369 * phi.read_dof_values_plain(solution_copy);
1370 * phi.evaluate(EvaluationFlags::gradients);
1372 * for (unsigned int q = 0; q < phi.n_q_points; ++q)
1374 * phi.submit_gradient(-1.0 *
1375 * (coefficient(cell, 0) * phi.get_gradient(q)),
1378 * right_hand_side_function.value(phi.quadrature_point(q)), q);
1381 * phi.integrate_scatter(EvaluationFlags::values |
1382 * EvaluationFlags::gradients,
1383 * right_hand_side_copy);
1386 * right_hand_side_copy.compress(VectorOperation::add);
1388 * ChangeVectorTypes::copy(right_hand_side, right_hand_side_copy);
1396 * <a name="LaplaceProblemsolve"></a>
1397 * <h4>LaplaceProblem::solve()</h4>
1401 * Here we set up the multigrid preconditioner, test the timing of a single
1402 * V-cycle, and solve the linear system. Unsurprisingly, this is one of the
1403 * places where the three methods differ the most.
1406 * template <int dim, int degree>
1407 * void LaplaceProblem<dim, degree>::solve()
1409 * TimerOutput::Scope timing(computing_timer, "Solve");
1411 * SolverControl solver_control(1000, 1.e-10 * right_hand_side.l2_norm());
1412 * solver_control.enable_history_data();
1418 * The solver for the matrix-free GMG method is similar to @ref step_37 "step-37", apart
1419 * from adding some interface matrices in complete analogy to @ref step_16 "step-16".
1422 * switch (settings.solver)
1424 * case Settings::gmg_mf:
1426 * computing_timer.enter_subsection("Solve: Preconditioner setup");
1428 * MGTransferMatrixFree<dim, float> mg_transfer(mg_constrained_dofs);
1429 * mg_transfer.build(dof_handler);
1431 * SolverControl coarse_solver_control(1000, 1e-12, false, false);
1432 * SolverCG<MatrixFreeLevelVector> coarse_solver(coarse_solver_control);
1433 * PreconditionIdentity identity;
1434 * MGCoarseGridIterativeSolver<MatrixFreeLevelVector,
1435 * SolverCG<MatrixFreeLevelVector>,
1436 * MatrixFreeLevelMatrix,
1437 * PreconditionIdentity>
1438 * coarse_grid_solver(coarse_solver, mf_mg_matrix[0], identity);
1440 * using Smoother = PreconditionJacobi<MatrixFreeLevelMatrix>;
1441 * MGSmootherPrecondition<MatrixFreeLevelMatrix,
1443 * MatrixFreeLevelVector>
1445 * smoother.initialize(mf_mg_matrix,
1446 * typename Smoother::AdditionalData(
1447 * settings.smoother_dampen));
1448 * smoother.set_steps(settings.smoother_steps);
1450 * mg::Matrix<MatrixFreeLevelVector> mg_m(mf_mg_matrix);
1453 * MatrixFreeOperators::MGInterfaceOperator<MatrixFreeLevelMatrix>>
1454 * mg_interface_matrices;
1455 * mg_interface_matrices.resize(0, triangulation.n_global_levels() - 1);
1456 * for (unsigned int level = 0; level < triangulation.n_global_levels();
1458 * mg_interface_matrices[level].initialize(mf_mg_matrix[level]);
1459 * mg::Matrix<MatrixFreeLevelVector> mg_interface(mg_interface_matrices);
1461 * Multigrid<MatrixFreeLevelVector> mg(
1462 * mg_m, coarse_grid_solver, mg_transfer, smoother, smoother);
1463 * mg.set_edge_matrices(mg_interface, mg_interface);
1465 * PreconditionMG<dim,
1466 * MatrixFreeLevelVector,
1467 * MGTransferMatrixFree<dim, float>>
1468 * preconditioner(dof_handler, mg, mg_transfer);
1472 * Copy the solution vector and right-hand side from LA::MPI::Vector
1473 * to LinearAlgebra::distributed::Vector so that we can solve.
1476 * MatrixFreeActiveVector solution_copy;
1477 * MatrixFreeActiveVector right_hand_side_copy;
1478 * mf_system_matrix.initialize_dof_vector(solution_copy);
1479 * mf_system_matrix.initialize_dof_vector(right_hand_side_copy);
1481 * ChangeVectorTypes::copy(solution_copy, solution);
1482 * ChangeVectorTypes::copy(right_hand_side_copy, right_hand_side);
1483 * computing_timer.leave_subsection("Solve: Preconditioner setup");
1487 * Timing for 1 V-cycle.
1491 * TimerOutput::Scope timing(computing_timer,
1492 * "Solve: 1 multigrid V-cycle");
1493 * preconditioner.vmult(solution_copy, right_hand_side_copy);
1495 * solution_copy = 0.;
1499 * Solve the linear system, update the ghost values of the solution,
1500 * copy back to LA::MPI::Vector and distribute constraints.
1504 * SolverCG<MatrixFreeActiveVector> solver(solver_control);
1506 * TimerOutput::Scope timing(computing_timer, "Solve: CG");
1507 * solver.solve(mf_system_matrix,
1509 * right_hand_side_copy,
1513 * solution_copy.update_ghost_values();
1514 * ChangeVectorTypes::copy(solution, solution_copy);
1515 * constraints.distribute(solution);
1522 * Solver for the matrix-based GMG method, similar to @ref step_16 "step-16", only
1523 * using a Jacobi smoother instead of a SOR smoother (which is not
1524 * implemented in parallel).
1527 * case Settings::gmg_mb:
1529 * computing_timer.enter_subsection("Solve: Preconditioner setup");
1531 * MGTransferPrebuilt<VectorType> mg_transfer(mg_constrained_dofs);
1532 * mg_transfer.build(dof_handler);
1534 * SolverControl coarse_solver_control(1000, 1e-12, false, false);
1535 * SolverCG<VectorType> coarse_solver(coarse_solver_control);
1536 * PreconditionIdentity identity;
1537 * MGCoarseGridIterativeSolver<VectorType,
1538 * SolverCG<VectorType>,
1540 * PreconditionIdentity>
1541 * coarse_grid_solver(coarse_solver, mg_matrix[0], identity);
1543 * using Smoother = LA::MPI::PreconditionJacobi;
1544 * MGSmootherPrecondition<MatrixType, Smoother, VectorType> smoother;
1546 * #ifdef USE_PETSC_LA
1547 * smoother.initialize(mg_matrix);
1549 * settings.smoother_dampen == 1.0,
1550 * ExcNotImplemented(
1553 * smoother.initialize(mg_matrix, settings.smoother_dampen);
1556 * smoother.set_steps(settings.smoother_steps);
1558 * mg::Matrix<VectorType> mg_m(mg_matrix);
1559 * mg::Matrix<VectorType> mg_in(mg_interface_in);
1560 * mg::Matrix<VectorType> mg_out(mg_interface_in);
1562 * Multigrid<VectorType> mg(
1563 * mg_m, coarse_grid_solver, mg_transfer, smoother, smoother);
1564 * mg.set_edge_matrices(mg_out, mg_in);
1567 * PreconditionMG<dim, VectorType, MGTransferPrebuilt<VectorType>>
1568 * preconditioner(dof_handler, mg, mg_transfer);
1570 * computing_timer.leave_subsection("Solve:
Preconditioner setup
");
1574 * Timing for 1 V-cycle.
1578 * TimerOutput::Scope timing(computing_timer,
1579 * "Solve: 1 multigrid V-cycle
");
1580 * preconditioner.vmult(solution, right_hand_side);
1586 * Solve the linear system and distribute constraints.
1590 * SolverCG<VectorType> solver(solver_control);
1592 * TimerOutput::Scope timing(computing_timer, "Solve: CG
");
1593 * solver.solve(system_matrix,
1599 * constraints.distribute(solution);
1606 * Solver for the AMG method, similar to @ref step_40 "step-40
".
1609 * case Settings::amg:
1613 * PreconditionAMG preconditioner;
1614 * PreconditionAMG::AdditionalData Amg_data;
1616 * #ifdef USE_PETSC_LA
1617 * Amg_data.symmetric_operator = true;
1619 * Amg_data.elliptic = true;
1620 * Amg_data.smoother_type = "Jacobi
";
1621 * Amg_data.higher_order_elements = true;
1622 * Amg_data.smoother_sweeps = settings.smoother_steps;
1623 * Amg_data.aggregation_threshold = 0.02;
1626 * Amg_data.output_details = false;
1628 * preconditioner.initialize(system_matrix, Amg_data);
1633 * Timing for 1 V-cycle.
1637 * TimerOutput::Scope timing(computing_timer,
1638 * "Solve: 1 multigrid V-cycle
");
1639 * preconditioner.vmult(solution, right_hand_side);
1645 * Solve the linear system and distribute constraints.
1649 * SolverCG<VectorType> solver(solver_control);
1651 * TimerOutput::Scope timing(computing_timer, "Solve: CG
");
1652 * solver.solve(system_matrix,
1657 * constraints.distribute(solution);
1663 * Assert(false, ExcInternalError());
1666 * pcout << " Number
of CG
iterations:
" << solver_control.last_step()
1675 * <h3>The error estimator</h3>
1679 * We use the FEInterfaceValues class to assemble an error estimator to decide
1680 * which cells to refine. See the exact definition of the cell and face
1681 * integrals in the introduction. To use the method, we define Scratch and
1682 * Copy objects for the MeshWorker::mesh_loop() with much of the following code
1683 * being in essence as was set up in @ref step_12 "step-12
" already (or at least similar in
1687 * template <int dim>
1688 * struct ScratchData
1690 * ScratchData(const Mapping<dim> & mapping,
1691 * const FiniteElement<dim> &fe,
1692 * const unsigned int quadrature_degree,
1693 * const UpdateFlags update_flags,
1694 * const UpdateFlags interface_update_flags)
1695 * : fe_values(mapping, fe, QGauss<dim>(quadrature_degree), update_flags)
1696 * , fe_interface_values(mapping,
1698 * QGauss<dim - 1>(quadrature_degree),
1699 * interface_update_flags)
1703 * ScratchData(const ScratchData<dim> &scratch_data)
1704 * : fe_values(scratch_data.fe_values.get_mapping(),
1705 * scratch_data.fe_values.get_fe(),
1706 * scratch_data.fe_values.get_quadrature(),
1707 * scratch_data.fe_values.get_update_flags())
1708 * , fe_interface_values(scratch_data.fe_values.get_mapping(),
1709 * scratch_data.fe_values.get_fe(),
1710 * scratch_data.fe_interface_values.get_quadrature(),
1711 * scratch_data.fe_interface_values.get_update_flags())
1714 * FEValues<dim> fe_values;
1715 * FEInterfaceValues<dim> fe_interface_values;
1723 * : cell_index(numbers::invalid_unsigned_int)
1729 * unsigned int cell_indices[2];
1733 * unsigned int cell_index;
1735 * std::vector<FaceData> face_data;
1739 * template <int dim, int degree>
1740 * void LaplaceProblem<dim, degree>::estimate()
1742 * TimerOutput::Scope timing(computing_timer, "Estimate");
1744 * VectorType temp_solution;
1745 * temp_solution.reinit(locally_owned_dofs,
1746 * locally_relevant_dofs,
1747 * mpi_communicator);
1748 * temp_solution = solution;
1750 * const Coefficient<dim> coefficient;
1752 * estimated_error_square_per_cell.reinit(triangulation.n_active_cells());
1754 * using Iterator = typename DoFHandler<dim>::active_cell_iterator;
1758 * Assembler for cell residual @f$h^2 \| f + \epsilon \triangle u \|_K^2@f$
1761 * auto cell_worker = [&](const Iterator & cell,
1762 * ScratchData<dim> &scratch_data,
1763 * CopyData & copy_data) {
1764 * FEValues<dim> &fe_values = scratch_data.fe_values;
1765 * fe_values.reinit(cell);
1767 * RightHandSide<dim> rhs;
1768 * const double rhs_value = rhs.value(cell->center());
1770 * const double nu = coefficient.value(cell->center());
1772 * std::vector<Tensor<2, dim>> hessians(fe_values.n_quadrature_points);
1773 * fe_values.get_function_hessians(temp_solution, hessians);
1775 * copy_data.cell_index = cell->active_cell_index();
1777 * double residual_norm_square = 0.;
1778 * for (unsigned k = 0; k < fe_values.n_quadrature_points; ++k)
1780 * const double residual = (rhs_value + nu * trace(hessians[k]));
1781 * residual_norm_square += residual * residual * fe_values.JxW(k);
1785 * cell->diameter() * cell->diameter() * residual_norm_square;
1790 * Assembler for face term @f$\sum_F h_F \| \jump{\epsilon \nabla u \cdot n}
1794 * auto face_worker = [&](const Iterator & cell,
1795 * const unsigned int &f,
1796 * const unsigned int &sf,
1797 * const Iterator & ncell,
1798 * const unsigned int &nf,
1799 * const unsigned int &nsf,
1800 * ScratchData<dim> & scratch_data,
1801 * CopyData & copy_data) {
1802 * FEInterfaceValues<dim> &fe_interface_values =
1803 * scratch_data.fe_interface_values;
1804 * fe_interface_values.reinit(cell, f, sf, ncell, nf, nsf);
1806 * copy_data.face_data.emplace_back();
1807 * CopyData::FaceData ©_data_face = copy_data.face_data.back();
1809 * copy_data_face.cell_indices[0] = cell->active_cell_index();
1810 * copy_data_face.cell_indices[1] = ncell->active_cell_index();
1812 * const double coeff1 = coefficient.value(cell->center());
1813 * const double coeff2 = coefficient.value(ncell->center());
1815 * std::vector<Tensor<1, dim>> grad_u[2];
1817 * for (unsigned int i = 0; i < 2; ++i)
1819 * grad_u[i].resize(fe_interface_values.n_quadrature_points);
1820 * fe_interface_values.get_fe_face_values(i).get_function_gradients(
1821 * temp_solution, grad_u[i]);
1824 * double jump_norm_square = 0.;
1826 * for (unsigned int qpoint = 0;
1827 * qpoint < fe_interface_values.n_quadrature_points;
1830 * const double jump =
1831 * coeff1 * grad_u[0][qpoint] * fe_interface_values.normal(qpoint) -
1832 * coeff2 * grad_u[1][qpoint] * fe_interface_values.normal(qpoint);
1834 * jump_norm_square += jump * jump * fe_interface_values.JxW(qpoint);
1837 * const double h = cell->face(f)->measure();
1838 * copy_data_face.values[0] = 0.5 * h * jump_norm_square;
1839 * copy_data_face.values[1] = copy_data_face.values[0];
1842 * auto copier = [&](const CopyData ©_data) {
1843 * if (copy_data.cell_index != numbers::invalid_unsigned_int)
1844 * estimated_error_square_per_cell[copy_data.cell_index] += copy_data.value;
1846 * for (auto &cdf : copy_data.face_data)
1847 * for (unsigned int j = 0; j < 2; ++j)
1848 * estimated_error_square_per_cell[cdf.cell_indices[j]] += cdf.values[j];
1851 * const unsigned int n_gauss_points = degree + 1;
1852 * ScratchData<dim> scratch_data(mapping,
1855 * update_hessians | update_quadrature_points |
1856 * update_JxW_values,
1857 * update_values | update_gradients |
1858 * update_JxW_values | update_normal_vectors);
1859 * CopyData copy_data;
1863 * We need to assemble each interior face once but we need to make sure that
1864 * both processes assemble the face term between a locally owned and a ghost
1865 * cell. This is achieved by setting the
1866 * MeshWorker::assemble_ghost_faces_both flag. We need to do this, because
1867 * we do not communicate the error estimator contributions here.
1870 * MeshWorker::mesh_loop(dof_handler.begin_active(),
1871 * dof_handler.end(),
1876 * MeshWorker::assemble_own_cells |
1877 * MeshWorker::assemble_ghost_faces_both |
1878 * MeshWorker::assemble_own_interior_faces_once,
1879 * /*boundary_worker=*/nullptr,
1882 * const double global_error_estimate =
1883 * std::sqrt(Utilities::MPI::sum(estimated_error_square_per_cell.l1_norm(),
1884 * mpi_communicator));
1885 * pcout << " Global error estimate:
" << global_error_estimate
1894 * <h4>LaplaceProblem::refine_grid()</h4>
1898 * We use the cell-wise estimator stored in the vector @p estimate_vector and
1899 * refine a fixed number of cells (chosen here to roughly double the number of
1900 * DoFs in each step).
1903 * template <int dim, int degree>
1904 * void LaplaceProblem<dim, degree>::refine_grid()
1906 * TimerOutput::Scope timing(computing_timer, "Refine grid
");
1908 * const double refinement_fraction = 1. / (std::pow(2.0, dim) - 1.);
1909 * parallel::distributed::GridRefinement::refine_and_coarsen_fixed_number(
1910 * triangulation, estimated_error_square_per_cell, refinement_fraction, 0.0);
1912 * triangulation.execute_coarsening_and_refinement();
1920 * <h4>LaplaceProblem::output_results()</h4>
1924 * The output_results() function is similar to the ones found in many of the
1925 * tutorials (see @ref step_40 "step-40
" for example).
1928 * template <int dim, int degree>
1929 * void LaplaceProblem<dim, degree>::output_results(const unsigned int cycle)
1931 * TimerOutput::Scope timing(computing_timer, "Output results
");
1933 * VectorType temp_solution;
1934 * temp_solution.reinit(locally_owned_dofs,
1935 * locally_relevant_dofs,
1936 * mpi_communicator);
1937 * temp_solution = solution;
1939 * DataOut<dim> data_out;
1940 * data_out.attach_dof_handler(dof_handler);
1941 * data_out.add_data_vector(temp_solution, "solution
");
1943 * Vector<float> subdomain(triangulation.n_active_cells());
1944 * for (unsigned int i = 0; i < subdomain.size(); ++i)
1945 * subdomain(i) = triangulation.locally_owned_subdomain();
1946 * data_out.add_data_vector(subdomain, "subdomain");
1948 * Vector<float> level(triangulation.n_active_cells());
1949 * for (const auto &cell : triangulation.active_cell_iterators())
1950 * level(cell->active_cell_index()) = cell->level();
1951 * data_out.add_data_vector(level, "level");
1953 * if (estimated_error_square_per_cell.size() > 0)
1954 * data_out.add_data_vector(estimated_error_square_per_cell,
1957 * data_out.build_patches();
1959 * const std::string pvtu_filename = data_out.write_vtu_with_pvtu_record(
1960 * "", "solution
", cycle, mpi_communicator, 2 /*n_digits*/, 1 /*n_groups*/);
1962 * pcout << " Wrote " << pvtu_filename << std::endl;
1970 * <h4>LaplaceProblem::run()</h4>
1974 * As in most tutorials, this function calls the various functions defined
1975 * above to set up, assemble, solve, and output the results.
1978 * template <int dim, int degree>
1979 * void LaplaceProblem<dim, degree>::run()
1981 * for (unsigned int cycle = 0; cycle < settings.n_steps; ++cycle)
1983 * pcout << "Cycle
" << cycle << ':' << std::endl;
1987 * pcout << " Number
of active cells:
"
1988 * << triangulation.n_global_active_cells();
1992 * We only output level cell data for the GMG methods (same with DoF
1993 * data below). Note that the partition efficiency is irrelevant for AMG
1994 * since the level hierarchy is not distributed or used during the
1998 * if (settings.solver == Settings::gmg_mf ||
1999 * settings.solver == Settings::gmg_mb)
2000 * pcout << " (
" << triangulation.n_global_levels() << " global levels)
"
2003 * << 1.0 / MGTools::workload_imbalance(triangulation);
2004 * pcout << std::endl;
2010 * Only set up the multilevel hierarchy for GMG.
2013 * if (settings.solver == Settings::gmg_mf ||
2014 * settings.solver == Settings::gmg_mb)
2015 * setup_multigrid();
2017 * pcout << " Number
of degrees
of freedom:
" << dof_handler.n_dofs();
2018 * if (settings.solver == Settings::gmg_mf ||
2019 * settings.solver == Settings::gmg_mb)
2022 * for (unsigned int level = 0; level < triangulation.n_global_levels();
2024 * pcout << dof_handler.n_dofs(level)
2025 * << (level == triangulation.n_global_levels() - 1 ? ")
" :
2028 * pcout << std::endl;
2032 * For the matrix-free method, we only assemble the right-hand side.
2033 * For both matrix-based methods, we assemble both active matrix and
2034 * right-hand side, and only assemble the multigrid matrices for
2038 * if (settings.solver == Settings::gmg_mf)
2040 * else /*gmg_mb or amg*/
2042 * assemble_system();
2043 * if (settings.solver == Settings::gmg_mb)
2044 * assemble_multigrid();
2050 * if (settings.output)
2051 * output_results(cycle);
2053 * computing_timer.print_summary();
2054 * computing_timer.reset();
2063 * <h3>The main() function</h3>
2067 * This is a similar main function to @ref step_40 "step-40
", with the exception that
2068 * we require the user to pass a .prm file as a sole command line
2069 * argument (see @ref step_29 "step-29
" and the documentation of the ParameterHandler
2070 * class for a complete discussion of parameter files).
2073 * int main(int argc, char *argv[])
2075 * using namespace dealii;
2076 * Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
2078 * Settings settings;
2079 * if (!settings.try_parse((argc > 1) ? (argv[1]) : ""))
2084 * constexpr unsigned int fe_degree = 2;
2086 * switch (settings.dimension)
2090 * LaplaceProblem<2, fe_degree> test(settings);
2098 * LaplaceProblem<3, fe_degree> test(settings);
2108 * catch (std::exception &exc)
2110 * std::cerr << std::endl
2112 * << "----------------------------------------------------
"
2115 * << exc.what() << std::endl
2117 * << "----------------------------------------------------
"
2119 * MPI_Abort(MPI_COMM_WORLD, 1);
2124 * std::cerr << std::endl
2126 * << "----------------------------------------------------
"
2128 * std::cerr << "Unknown exception!
" << std::endl
2130 * << "----------------------------------------------------
"
2132 * MPI_Abort(MPI_COMM_WORLD, 2);
2139<a name="Results"></a><h1>Results</h1>
2142When you run the program using the following command
2144mpirun -np 16 ./step-50 gmg_mf_2d.prm
2146the screen output should look like the following:
2149 Number of active cells: 12 (2 global levels)
2150 Partition efficiency: 0.1875
2151 Number of degrees of freedom: 65 (by level: 21, 65)
2152 Number of CG iterations: 10
2153 Global error estimate: 0.355373
2154 Wrote solution_00.pvtu
2157+---------------------------------------------+------------+------------+
2158| Total wallclock time elapsed since start | 0.0163s | |
2160| Section | no. calls | wall time | % of total |
2161+---------------------------------+-----------+------------+------------+
2162| Assemble right-hand side | 1 | 0.000374s | 2.3% |
2163| Estimate | 1 | 0.000724s | 4.4% |
2164| Output results | 1 | 0.00277s | 17% |
2165| Setup | 1 | 0.00225s | 14% |
2166| Setup multigrid | 1 | 0.00181s | 11% |
2167| Solve | 1 | 0.00364s | 22% |
2168| Solve: 1 multigrid V-cycle | 1 | 0.000354s | 2.2% |
2169| Solve: CG | 1 | 0.00151s | 9.3% |
2170| Solve: Preconditioner setup | 1 | 0.00125s | 7.7% |
2171+---------------------------------+-----------+------------+------------+
2174 Number of active cells: 24 (3 global levels)
2175 Partition efficiency: 0.276786
2176 Number of degrees of freedom: 139 (by level: 21, 65, 99)
2177 Number of CG iterations: 10
2178 Global error estimate: 0.216726
2179 Wrote solution_01.pvtu
2182+---------------------------------------------+------------+------------+
2183| Total wallclock time elapsed since start | 0.0169s | |
2185| Section | no. calls | wall time | % of total |
2186+---------------------------------+-----------+------------+------------+
2187| Assemble right-hand side | 1 | 0.000309s | 1.8% |
2188| Estimate | 1 | 0.00156s | 9.2% |
2189| Output results | 1 | 0.00222s | 13% |
2190| Refine grid | 1 | 0.00278s | 16% |
2191| Setup | 1 | 0.00196s | 12% |
2192| Setup multigrid | 1 | 0.0023s | 14% |
2193| Solve | 1 | 0.00565s | 33% |
2194| Solve: 1 multigrid V-cycle | 1 | 0.000349s | 2.1% |
2195| Solve: CG | 1 | 0.00285s | 17% |
2196| Solve: Preconditioner setup | 1 | 0.00195s | 12% |
2197+---------------------------------+-----------+------------+------------+
2200 Number of active cells: 51 (4 global levels)
2201 Partition efficiency: 0.41875
2202 Number of degrees of freedom: 245 (by level: 21, 65, 225, 25)
2203 Number of CG iterations: 11
2204 Global error estimate: 0.112098
2205 Wrote solution_02.pvtu
2208+---------------------------------------------+------------+------------+
2209| Total wallclock time elapsed since start | 0.0183s | |
2211| Section | no. calls | wall time | % of total |
2212+---------------------------------+-----------+------------+------------+
2213| Assemble right-hand side | 1 | 0.000274s | 1.5% |
2214| Estimate | 1 | 0.00127s | 6.9% |
2215| Output results | 1 | 0.00227s | 12% |
2216| Refine grid | 1 | 0.0024s | 13% |
2217| Setup | 1 | 0.00191s | 10% |
2218| Setup multigrid | 1 | 0.00295s | 16% |
2219| Solve | 1 | 0.00702s | 38% |
2220| Solve: 1 multigrid V-cycle | 1 | 0.000398s | 2.2% |
2221| Solve: CG | 1 | 0.00376s | 21% |
2222| Solve: Preconditioner setup | 1 | 0.00238s | 13% |
2223+---------------------------------+-----------+------------+------------+
2228Here, the timing of the `solve()` function is split up in 3 parts: setting
2229up the multigrid preconditioner, execution of a single multigrid V-cycle, and
2230the CG solver. The V-cycle that is timed is unnecessary for the overall solve
2231and only meant to give an insight at the different costs for AMG and GMG.
2233is not included in the output since the hierarchy of coarse meshes is not
2236All results in this section are gathered on Intel Xeon Platinum 8280 (Cascade
2237Lake) nodes which have 56 cores and 192GB per node and support AVX-512 instructions,
2238allowing for vectorization over 8 doubles (vectorization used only in the matrix-free
2239computations). The code is compiled using gcc 7.1.0 with intel-mpi 17.0.3. Trilinos
224012.10.1 is used for the matrix-based GMG/AMG computations.
2242We can then gather a variety of information by calling the program
2243with the input files that are provided in the directory in which
2244@ref step_50 "step-50
" is located. Using these, and adjusting the number of mesh
2245refinement steps, we can produce information about how well the
2248The following table gives weak scaling timings for this program on up to 256M DoFs
2249and 7,168 processors. (Recall that weak scaling keeps the number of
2250degrees of freedom per processor constant while increasing the number of
2251processors; i.e., it considers larger and larger problems.)
2252Here, @f$\mathbb{E}@f$ is the partition efficiency from the
2253 introduction (also equal to 1.0/workload imbalance), "Setup" is a combination
2254of setup, setup multigrid, assemble, and assemble multigrid from the timing blocks,
2255and "Prec" is the preconditioner setup. Ideally all times would stay constant
2256over each problem size for the individual solvers, but since the partition
2257efficiency decreases from 0.371 to 0.161 from largest to smallest problem size,
2258we expect to see an approximately @f$0.371/0.161=2.3@f$ times increase in timings
2259for GMG. This is, in fact, pretty close to what we really get:
2263 <th colspan="4
"></th>
2265 <th colspan="4
">MF-GMG</th>
2267 <th colspan="4
">MB-GMG</th>
2269 <th colspan="4
">AMG</th>
2272 <th align="right
">Procs</th>
2273 <th align="right
">Cycle</th>
2274 <th align="right
">DoFs</th>
2275 <th align="right
">@f$\mathbb{E}@f$</th>
2277 <th align="right
">Setup</th>
2278 <th align="right
">Prec</th>
2279 <th align="right
">Solve</th>
2280 <th align="right
">Total</th>
2282 <th align="right
">Setup</th>
2283 <th align="right
">Prec</th>
2284 <th align="right
">Solve</th>
2285 <th align="right
">Total</th>
2287 <th align="right
">Setup</th>
2288 <th align="right
">Prec</th>
2289 <th align="right
">Solve</th>
2290 <th align="right
">Total</th>
2293 <td align="right
">112</th>
2294 <td align="right
">13</th>
2295 <td align="right
">4M</th>
2296 <td align="right
">0.37</th>
2298 <td align="right
">0.742</th>
2299 <td align="right
">0.393</th>
2300 <td align="right
">0.200</th>
2301 <td align="right
">1.335</th>
2303 <td align="right
">1.714</th>
2304 <td align="right
">2.934</th>
2305 <td align="right
">0.716</th>
2306 <td align="right
">5.364</th>
2308 <td align="right
">1.544</th>
2309 <td align="right
">0.456</th>
2310 <td align="right
">1.150</th>
2311 <td align="right
">3.150</th>
2314 <td align="right
">448</th>
2315 <td align="right
">15</th>
2316 <td align="right
">16M</th>
2317 <td align="right
">0.29</th>
2319 <td align="right
">0.884</th>
2320 <td align="right
">0.535</th>
2321 <td align="right
">0.253</th>
2322 <td align="right
">1.672</th>
2324 <td align="right
">1.927</th>
2325 <td align="right
">3.776</th>
2326 <td align="right
">1.190</th>
2327 <td align="right
">6.893</th>
2329 <td align="right
">1.544</th>
2330 <td align="right
">0.456</th>
2331 <td align="right
">1.150</th>
2332 <td align="right
">3.150</th>
2335 <td align="right
">1,792</th>
2336 <td align="right
">17</th>
2337 <td align="right
">65M</th>
2338 <td align="right
">0.22</th>
2340 <td align="right
">1.122</th>
2341 <td align="right
">0.686</th>
2342 <td align="right
">0.309</th>
2343 <td align="right
">2.117</th>
2345 <td align="right
">2.171</th>
2346 <td align="right
">4.862</th>
2347 <td align="right
">1.660</th>
2348 <td align="right
">8.693</th>
2350 <td align="right
">1.654</th>
2351 <td align="right
">0.546</th>
2352 <td align="right
">1.460</th>
2353 <td align="right
">3.660</th>
2356 <td align="right
">7,168</th>
2357 <td align="right
">19</th>
2358 <td align="right
">256M</th>
2359 <td align="right
">0.16</th>
2361 <td align="right
">1.214</th>
2362 <td align="right
">0.893</th>
2363 <td align="right
">0.521</th>
2364 <td align="right
">2.628</th>
2366 <td align="right
">2.386</th>
2367 <td align="right
">7.260</th>
2368 <td align="right
">2.560</th>
2369 <td align="right
">12.206</th>
2371 <td align="right
">1.844</th>
2372 <td align="right
">1.010</th>
2373 <td align="right
">1.890</th>
2374 <td align="right
">4.744</th>
2378On the other hand, the algebraic multigrid in the last set of columns
2379is relatively unaffected by the increasing imbalance of the mesh
2380hierarchy (because it doesn't use the mesh hierarchy) and the growth
2381in time is rather driven by other factors that are well documented in
2382the literature (most notably that the algorithmic complexity of
2383some parts of algebraic multigrid methods appears to be @f${\cal O}(N
2384\log N)@f$ instead of @f${\cal O}(N)@f$ for geometric multigrid).
2386The upshort of the table above is that the matrix-free geometric multigrid
2387method appears to be the fastest approach to solving this equation if
2388not by a huge margin. Matrix-based methods, on the other hand, are
2389consistently the worst.
2391The following figure provides strong scaling results for each method, i.e.,
2392we solve the same problem on more and more processors. Specifically,
2393we consider the problems after 16 mesh refinement cycles
2394(32M DoFs) and 19 cycles (256M DoFs), on between 56 to 28,672 processors:
2419<a name=
"Coarsesolver"></a><
h4>
Coarse solver </
h4>
2445<a name=
"PlainProg"></a>
void reinit(value_type *starting_element, const std::size_t n_elements)
__global__ void set(Number *val, const Number s, const size_type N)
static ::ExceptionBase & ExcNotImplemented()
#define Assert(cond, exc)
#define AssertThrow(cond, exc)
void make_hanging_node_constraints(const DoFHandler< dim, spacedim > &dof_handler, AffineConstraints< number > &constraints)
void make_sparsity_pattern(const DoFHandler< dim, spacedim > &dof_handler, SparsityPatternBase &sparsity_pattern, const AffineConstraints< number > &constraints=AffineConstraints< number >(), const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
@ update_values
Shape function values.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
std::vector< value_type > split(const typename ::Triangulation< dim, spacedim >::cell_iterator &parent, const value_type parent_value)
void hyper_L(Triangulation< dim > &tria, const double left=-1., const double right=1., const bool colorize=false)
@ matrix
Contents is actually a matrix.
PETScWrappers::PreconditionBoomerAMG PreconditionAMG
void cell_matrix(FullMatrix< double > &M, const FEValuesBase< dim > &fe, const FEValuesBase< dim > &fetest, const ArrayView< const std::vector< double > > &velocity, const double factor=1.)
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
void call(const std::function< RT()> &function, internal::return_value< RT > &ret_val)
@ construct_multigrid_hierarchy
unsigned int this_mpi_process(const MPI_Comm mpi_communicator)
std::string compress(const std::string &input)
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
unsigned int n_cells(const internal::TriangulationImplementation::NumberCache< 1 > &c)
int(&) functions(const void *v1, const void *v2)
void assemble(const MeshWorker::DoFInfoBox< dim, DOFINFO > &dinfo, A *assembler)
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation
const TriangulationDescription::Settings settings