24#include <deal.II/matrix_free/dof_info.templates.h>
32 namespace MatrixFreeFunctions
36 static_assert(std::is_same<compressed_constraint_kind, std::uint8_t>::value,
37 "Unexpected type for compressed hanging node indicators!");
66 for (
unsigned int i = 0; i < 3; ++i)
83 const unsigned int cell,
87 const unsigned int fe_index =
103 unsigned int total_size = 0;
106 const unsigned int ib =
108 const unsigned int ie =
122 auto do_copy = [&](
const unsigned int *begin,
123 const unsigned int *end) {
124 const unsigned int shift = total_size;
125 total_size += (end - begin);
132 const unsigned int *begin =
134 const unsigned int *end =
143 const unsigned int *begin =
179 std::vector<std::pair<types::global_dof_index, unsigned int>>
181 for (std::size_t i = 0; i <
n_ghosts; ++i)
190 for (std::size_t i = 1; i <
n_ghosts; ++i)
211 for (std::size_t i = 0; i <
n_ghosts; ++i)
238 bool has_hanging_nodes =
false;
240 const unsigned int fe_index =
255 if (has_hanging_nodes ||
275 std::vector<types::global_dof_index> empty;
283 vec_part->set_ghost_indices(ghost_indices);
291 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
301 const std::vector<unsigned int> & constraint_pool_row_index,
304 (void)constraint_pool_row_index;
313 for (
unsigned int cell = 0;
317 const unsigned int n_comp =
323 unsigned int fe_index =
325 for (
unsigned int j = 1;
j <
n_comp; ++
j)
341 std::vector<std::pair<unsigned int, unsigned int>>
new_row_starts(
346 std::vector<std::pair<unsigned short, unsigned short>>
374 const unsigned int n_vect =
379 this->dofs_per_cell[0];
381 for (
unsigned int j = 0;
j <
n_vect; ++
j)
386 bool has_hanging_nodes =
false;
498 const std::pair<unsigned short, unsigned short> *
509 constraint_pool_row_index.
size() - 1);
514 unsigned int n_active_cells = 0;
563 static_cast<unsigned int>(
569 const unsigned int ndofs =
571 const unsigned int n_comp =
576 for (
unsigned int j = 0;
j <
n_comp; ++
j)
592 for (
unsigned int j = 0;
j <
n_comp; ++
j)
596 this->dof_indices.data() +
602 for (
unsigned int i = 1; i <
ndofs; ++i)
615 this->dof_indices.data() +
617 for (
unsigned int k = 0;
620 for (
unsigned int j = 0;
j <
n_comp; ++
j)
632 for (
unsigned int j = 0;
j <
n_comp; ++
j)
652 for (
unsigned int j = 0;
j <
n_comp; ++
j)
660 for (
unsigned int j = 0;
j <
n_comp; ++
j)
671 for (
unsigned int j = 0;
j <
n_comp; ++
j)
674 for (
unsigned int k = 0;
677 for (
unsigned int j = 0;
j <
n_comp; ++
j)
689 for (
unsigned int j = 0;
j <
n_comp; ++
j)
693 for (
unsigned int j = 0;
j <
n_comp; ++
j)
697 for (
unsigned int j = 0;
j <
n_comp; ++
j)
715 this->dof_indices.data() +
734 for (
unsigned int l = 1; l <
n_comp; ++l)
744 for (
unsigned int j = 0;
j < l; ++
j)
776 index_kinds[
static_cast<unsigned int>(variant)] > 0)
861 const unsigned int ndofs =
870 this->dof_indices_interleaved.
size());
880 for (
unsigned int k = 0;
k <
ndofs; ++
k)
883 const unsigned int *end =
898 const unsigned int n_lanes,
909 std::vector<types::global_dof_index> ghost_indices;
920 const unsigned int fe_index =
929 ghost_indices.push_back(
932 std::sort(ghost_indices.begin(), ghost_indices.end());
933 ghost_indices.erase(std::unique(ghost_indices.begin(),
934 ghost_indices.end()),
935 ghost_indices.end());
937 compressed_set.add_indices(ghost_indices.begin(), ghost_indices.end());
940 Utilities::MPI::min<int>(
static_cast<int>(
943 part.get_mpi_communicator()) != 0;
945 std::shared_ptr<const Utilities::MPI::Partitioner>
temp_0;
951 temp_0 = std::make_shared<Utilities::MPI::Partitioner>(
952 part.locally_owned_range(),
part.get_mpi_communicator());
963 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
968 std::vector<FaceToCellTopology<1>>
all_faces(inner_faces);
974 2 * shape_info(0, 0).n_dimensions);
989 [&](
const std::function<
990 void(
const unsigned int,
const unsigned int,
const bool)> &
fu) {
991 for (
const auto &face : inner_faces)
994 fu(face.cells_exterior[0], face.exterior_face_no,
false );
999 [&](
const std::function<
1000 void(
const unsigned int,
const unsigned int,
const bool)> &
fu) {
1027 std::shared_ptr<const Utilities::MPI::Partitioner>
1029 const std::function<void(
1030 const std::function<
void(
1031 const unsigned int,
const unsigned int,
const bool)> &)> &loop) {
1039 if (!si.nodal_at_cell_boundaries ||
1051 loop([&](
const unsigned int cell_no,
1054 const unsigned int index =
1059 const unsigned int stride =
1060 dof_indices_interleave_strides[dof_access_cell][cell_no];
1062 for (unsigned int e = 0; e < n_base_elements; ++e)
1063 for (unsigned int c = 0; c < n_components[e]; ++c)
1065 const ShapeInfo<double> &shape =
1066 shape_info(global_base_element_offset + e, 0);
1067 for (unsigned int j = 0;
1068 j < shape.dofs_per_component_on_face;
1070 ghost_indices.push_back(part.local_to_global(
1072 shape.face_to_cell_index_nodal(face_no, j) *
1074 i += shape.dofs_per_component_on_cell * stride;
1082 Utilities::MPI::min<int>(
static_cast<int>(
1084 part.get_mpi_communicator()) != 0;
1086 std::sort(ghost_indices.begin(), ghost_indices.end());
1087 ghost_indices.erase(std::unique(ghost_indices.begin(),
1088 ghost_indices.end()),
1089 ghost_indices.end());
1092 ghost_indices.end());
1095 Utilities::MPI::min<int>(
static_cast<int>(
1098 part.get_mpi_communicator()) != 0;
1104 std::make_shared<Utilities::MPI::Partitioner>(
1105 part.locally_owned_range(),
part.get_mpi_communicator());
1116 const std::shared_ptr<const Utilities::MPI::Partitioner>
1118 std::shared_ptr<const Utilities::MPI::Partitioner>
1120 const std::function<void(
1121 const std::function<
void(
1122 const unsigned int,
const unsigned int,
const bool)> &)> &loop) {
1126 for (
unsigned int c = 0; c < n_base_elements; ++c)
1127 if (shape_info(global_base_element_offset + c, 0).element_type !=
1135 loop([&](
const unsigned int cell_no,
1138 const unsigned int index =
1139 dof_indices_contiguous[dof_access_cell][
cell_no];
1143 const unsigned int stride =
1144 dof_indices_interleave_strides[dof_access_cell][cell_no];
1146 for (unsigned int e = 0; e < n_base_elements; ++e)
1147 for (unsigned int c = 0; c < n_components[e]; ++c)
1149 const ShapeInfo<double> &shape =
1150 shape_info(global_base_element_offset + e, 0);
1151 for (unsigned int j = 0;
1152 j < 2 * shape.dofs_per_component_on_face;
1154 ghost_indices.push_back(part.local_to_global(
1156 shape.face_to_cell_index_hermite(face_no, j) *
1158 i += shape.dofs_per_component_on_cell * stride;
1163 std::sort(ghost_indices.begin(), ghost_indices.end());
1164 ghost_indices.erase(std::unique(ghost_indices.begin(),
1165 ghost_indices.end()),
1166 ghost_indices.end());
1169 ghost_indices.end());
1172 Utilities::MPI::min<int>(
static_cast<int>(
1175 part.get_mpi_communicator()) != 0;
1181 std::make_shared<Utilities::MPI::Partitioner>(
1182 part.locally_owned_range(),
part.get_mpi_communicator());
1201 ghost_indices.clear();
1209 temp_3 = std::make_shared<Utilities::MPI::Partitioner>(
1210 part.locally_owned_range(),
part.get_mpi_communicator());
1211 temp_4 = std::make_shared<Utilities::MPI::Partitioner>(
1212 part.locally_owned_range(),
part.get_mpi_communicator());
1217 vector_exchanger_face_variants[1] = std::make_shared<
1218 MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1220 vector_exchanger_face_variants[2] = std::make_shared<
1221 MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1223 vector_exchanger_face_variants[3] = std::make_shared<
1224 MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1226 vector_exchanger_face_variants[4] = std::make_shared<
1227 MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1232 vector_exchanger_face_variants[1] =
1233 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1234 temp_1, communicator_sm);
1235 vector_exchanger_face_variants[2] =
1236 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1237 temp_2, communicator_sm);
1238 vector_exchanger_face_variants[3] =
1239 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1240 temp_3, communicator_sm);
1241 vector_exchanger_face_variants[4] =
1242 std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1243 temp_4, communicator_sm);
1250 DoFInfo::compute_shared_memory_contiguous_indices(
1251 std::array<std::vector<std::pair<unsigned int, unsigned int>>, 3>
1256 for (
unsigned int i = 0; i < 3; ++i)
1258 dof_indices_contiguous_sm[i].resize(
1265 dof_indices_contiguous_sm[i][
j] = {
1287 const unsigned int end,
1289 std::vector<std::mutex> &
mutexes,
1292 std::vector<unsigned int> scratch;
1294 for (
unsigned int block = begin; block < end; ++block)
1300 dof_info.
row_starts[block * n_components].first,
1302 dof_info.
row_starts[(block + 1) * n_components].first);
1303 std::sort(scratch.begin(), scratch.end());
1304 std::vector<unsigned int>::const_iterator
end_unique =
1305 std::unique(scratch.begin(), scratch.end());
1306 std::vector<unsigned int>::const_iterator
it = scratch.
begin();
1313 std::lock_guard<std::mutex> lock(
1326 const unsigned int end,
1329 std::vector<std::mutex> &
mutexes,
1332 std::vector<unsigned int> scratch;
1334 for (
unsigned int block = begin; block < end; ++block)
1340 dof_info.
row_starts[block * n_components].first,
1342 dof_info.
row_starts[(block + 1) * n_components].first);
1343 std::sort(scratch.begin(), scratch.end());
1344 std::vector<unsigned int>::const_iterator
end_unique =
1345 std::unique(scratch.begin(), scratch.end());
1346 std::vector<unsigned int>::const_iterator
it = scratch.
begin();
1351 std::lock_guard<std::mutex> lock(
1364 const unsigned int end,
1372 for (
unsigned int block = begin; block < end; ++block)
1378 dof_info.
row_starts[block * n_components].first,
1384 std::vector<types::global_dof_index>::iterator
insert_pos =
1387 if (
sp->column() != block)
1399 DoFInfo::make_connectivity_graph(
1404 unsigned int n_rows = (vector_partitioner->local_range().second -
1405 vector_partitioner->local_range().first) +
1406 vector_partitioner->ghost_indices().n_elements();
1414 std::vector<std::mutex>
mutexes(n_rows / internal::bucket_size_threading +
1420 const unsigned int end) {
1421 internal::compute_row_lengths(
1422 begin, end, *this, mutexes, row_lengths);
1428 for (
unsigned int row = 0; row < n_rows; ++row)
1442 const unsigned int begin,
const unsigned int end) {
1443 internal::fill_connectivity_dofs(
1444 begin, end, *this, row_lengths, mutexes, connectivity_dof);
1462 const unsigned int begin,
const unsigned int end) {
1463 internal::fill_connectivity(begin,
1476 DoFInfo::compute_dof_renumbering(
1477 std::vector<types::global_dof_index> &
renumbering)
1479 const unsigned int locally_owned_size =
1480 vector_partitioner->locally_owned_size();
1485 const unsigned int n_components = start_components.back();
1486 const unsigned int n_cell_batches =
1487 n_vectorization_lanes_filled[dof_access_cell].size();
1489 (row_starts.size() - 1) / vectorization_length / n_components,
1494 if (row_starts[
cell_no * n_components * vectorization_length]
1496 row_starts[(
cell_no + 1) * n_components * vectorization_length]
1499 const unsigned int ndofs =
1500 dofs_per_cell.
size() == 1 ?
1502 (dofs_per_cell[cell_active_fe_index.size() > 0 ?
1503 cell_active_fe_index[
cell_no] :
1506 dof_indices.
data() +
1507 row_starts[
cell_no * n_components * vectorization_length].first;
1508 for (
unsigned int i = 0; i <
ndofs; ++i)
1509 for (
unsigned int j = 0;
1510 j < n_vectorization_lanes_filled[dof_access_cell][
cell_no];
1522 dof_index = counter++;
1526 dof_index = vector_partitioner->local_to_global(dof_index);
1534 DoFInfo::memory_consumption()
const
1536 std::size_t memory =
sizeof(*this);
1538 (row_starts.capacity() *
sizeof(std::pair<unsigned int, unsigned int>));
1553 namespace MatrixFreeFunctions
1556 DoFInfo::read_dof_indices<double>(
1557 const std::vector<types::global_dof_index> &,
1558 const std::vector<types::global_dof_index> &,
1560 const ::AffineConstraints<double> &,
1566 DoFInfo::read_dof_indices<float>(
1567 const std::vector<types::global_dof_index> &,
1568 const std::vector<types::global_dof_index> &,
1576 DoFInfo::process_hanging_node_constraints<1>(
1578 const std::vector<std::vector<unsigned int>> &,
1581 std::vector<types::global_dof_index> &);
1583 DoFInfo::process_hanging_node_constraints<2>(
1585 const std::vector<std::vector<unsigned int>> &,
1588 std::vector<types::global_dof_index> &);
1590 DoFInfo::process_hanging_node_constraints<3>(
1592 const std::vector<std::vector<unsigned int>> &,
1595 std::vector<types::global_dof_index> &);
1598 DoFInfo::compute_face_index_compression<1>(
1601 DoFInfo::compute_face_index_compression<2>(
1604 DoFInfo::compute_face_index_compression<4>(
1607 DoFInfo::compute_face_index_compression<8>(
1610 DoFInfo::compute_face_index_compression<16>(
1614 DoFInfo::compute_vector_zero_access_pattern<1>(
1618 DoFInfo::compute_vector_zero_access_pattern<2>(
1622 DoFInfo::compute_vector_zero_access_pattern<4>(
1626 DoFInfo::compute_vector_zero_access_pattern<8>(
1630 DoFInfo::compute_vector_zero_access_pattern<16>(
1635 DoFInfo::print_memory_consumption<std::ostream>(std::ostream &,
1638 DoFInfo::print_memory_consumption<ConditionalOStream>(
1643 DoFInfo::print<double>(
const std::vector<double> &,
1644 const std::vector<unsigned int> &,
1645 std::ostream &)
const;
1648 DoFInfo::print<float>(
const std::vector<float> &,
1649 const std::vector<unsigned int> &,
1650 std::ostream &)
const;
value_type * data() const noexcept
void add_entries(const size_type row, ForwardIterator begin, ForwardIterator end, const bool indices_are_unique_and_sorted=false)
size_type index_within_set(const size_type global_index) const
size_type n_elements() const
void add_range(const size_type begin, const size_type end)
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
#define Assert(cond, exc)
#define AssertDimension(dim1, dim2)
#define AssertIndexRange(index, range)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcNotInitialized()
@ tensor_symmetric_hermite
std::enable_if_t< std::is_fundamental< T >::value, std::size_t > memory_consumption(const T &t)
std::vector< Integer > invert_permutation(const std::vector< Integer > &permutation)
void fill_connectivity_dofs(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &row_lengths, std::vector< std::mutex > &mutexes, ::SparsityPattern &connectivity_dof)
void fill_connectivity(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &renumbering, const ::SparsityPattern &connectivity_dof, DynamicSparsityPattern &connectivity)
void compute_row_lengths(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, std::vector< std::mutex > &mutexes, std::vector< unsigned int > &row_lengths)
static constexpr unsigned int bucket_size_threading
constexpr compressed_constraint_kind unconstrained_compressed_constraint_kind
static const unsigned int invalid_unsigned_int
const types::global_dof_index invalid_dof_index
void apply_to_subranges(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, const Function &f, const unsigned int grainsize)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
void reorder_cells(const TaskInfo &task_info, const std::vector< unsigned int > &renumbering, const std::vector< unsigned int > &constraint_pool_row_index, const std::vector< unsigned char > &irregular_cells)
std::vector< std::pair< unsigned short, unsigned short > > constraint_indicator
std::vector< std::pair< unsigned int, unsigned int > > row_starts
unsigned int n_base_elements
void compute_tight_partitioners(const Table< 2, ShapeInfo< double > > &shape_info, const unsigned int n_owned_cells, const unsigned int n_lanes, const std::vector< FaceToCellTopology< 1 > > &inner_faces, const std::vector< FaceToCellTopology< 1 > > &ghosted_faces, const bool fill_cell_centric, const MPI_Comm communicator_sm, const bool use_vector_data_exchanger_full)
std::vector< std::vector< bool > > hanging_node_constraint_masks_comp
unsigned int global_base_element_offset
unsigned int max_fe_index
void get_dof_indices_on_cell_batch(std::vector< unsigned int > &local_indices, const unsigned int cell_batch, const bool with_constraints=true) const
void compute_cell_index_compression(const std::vector< unsigned char > &irregular_cells)
std::vector< unsigned int > dofs_per_cell
void assign_ghosts(const std::vector< unsigned int > &boundary_cells, const MPI_Comm communicator_sm, const bool use_vector_data_exchanger_full)
unsigned int vectorization_length
std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base > vector_exchanger
@ interleaved_contiguous_strided
@ interleaved_contiguous_mixed_strides
std::vector< std::vector< unsigned int > > fe_index_conversion
std::vector< unsigned int > dof_indices
std::shared_ptr< const Utilities::MPI::Partitioner > vector_partitioner
std::vector< compressed_constraint_kind > hanging_node_constraint_masks
std::array< std::vector< unsigned int >, 3 > dof_indices_interleave_strides
std::vector< unsigned int > row_starts_plain_indices
std::vector< unsigned int > dofs_per_face
std::vector< unsigned int > n_components
std::vector< types::global_dof_index > ghost_dofs
std::array< std::vector< unsigned int >, 3 > dof_indices_contiguous
std::vector< unsigned int > cell_active_fe_index
std::array< std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base >, 5 > vector_exchanger_face_variants
std::vector< unsigned int > plain_dof_indices
std::array< std::vector< unsigned char >, 3 > n_vectorization_lanes_filled
std::vector< unsigned int > start_components
std::vector< unsigned int > dof_indices_interleaved
std::array< std::vector< IndexStorageVariants >, 3 > index_storage_variants
unsigned int n_active_cells
std::vector< unsigned int > cell_partition_data