Reference documentation for deal.II version 9.4.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
dof_info.cc
Go to the documentation of this file.
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2020 - 2022 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
18 #include <deal.II/base/utilities.h>
20 
21 #include <deal.II/matrix_free/dof_info.templates.h>
22 
23 #include <iostream>
24 
26 
27 namespace internal
28 {
29  namespace MatrixFreeFunctions
30  {
31  // ensure that the type defined in both dof_info.h and
32  // hanging_nodes_internal.h is consistent
33  static_assert(std::is_same<compressed_constraint_kind, std::uint8_t>::value,
34  "Unexpected type for compressed hanging node indicators!");
35 
36 
37 
39  {
40  clear();
41  }
42 
43 
44 
45  void
47  {
48  row_starts.clear();
49  dof_indices.clear();
50  constraint_indicator.clear();
51  vector_partitioner.reset();
52  ghost_dofs.clear();
53  dofs_per_cell.clear();
54  dofs_per_face.clear();
56  dimension = 2;
58  n_base_elements = 0;
59  n_components.clear();
60  start_components.clear();
62  plain_dof_indices.clear();
64  for (unsigned int i = 0; i < 3; ++i)
65  {
66  index_storage_variants[i].clear();
67  dof_indices_contiguous[i].clear();
70  }
71  store_plain_indices = false;
72  cell_active_fe_index.clear();
73  max_fe_index = 0;
74  fe_index_conversion.clear();
75  }
76 
77 
78 
79  void
80  DoFInfo::get_dof_indices_on_cell_batch(std::vector<unsigned int> &my_rows,
81  const unsigned int cell,
82  const bool apply_constraints) const
83  {
84  const unsigned int n_fe_components = start_components.back();
85  const unsigned int fe_index =
86  dofs_per_cell.size() == 1 ? 0 : cell_active_fe_index[cell];
87  const unsigned int dofs_this_cell = dofs_per_cell[fe_index];
88 
89  const unsigned int n_vectorization = vectorization_length;
90  constexpr auto dof_access_index = dof_access_cell;
91  AssertIndexRange(cell,
92  n_vectorization_lanes_filled[dof_access_index].size());
93  const unsigned int n_vectorization_actual =
94  n_vectorization_lanes_filled[dof_access_index][cell];
95 
96  // we might have constraints, so the final number
97  // of indices is not known a priori.
98  // conservatively reserve the maximum without constraints
99  my_rows.reserve(n_vectorization * dofs_this_cell);
100  my_rows.resize(0);
101  unsigned int total_size = 0;
102  for (unsigned int v = 0; v < n_vectorization_actual; ++v)
103  {
104  const unsigned int ib =
105  (cell * n_vectorization + v) * n_fe_components;
106  const unsigned int ie =
107  (cell * n_vectorization + v + 1) * n_fe_components;
108 
109  // figure out constraints by comparing constraint_indicator row
110  // shift for this cell within the block as compared to the next
111  // one
112  const bool has_constraints =
113  (hanging_node_constraint_masks.size() != 0 &&
115  hanging_node_constraint_masks[cell * n_vectorization + v] !=
117  hanging_node_constraint_masks_comp[fe_index][0 /*TODO*/]) ||
118  (row_starts[ib].second != row_starts[ib + n_fe_components].second);
119 
120  auto do_copy = [&](const unsigned int *begin,
121  const unsigned int *end) {
122  const unsigned int shift = total_size;
123  total_size += (end - begin);
124  my_rows.resize(total_size);
125  std::copy(begin, end, my_rows.begin() + shift);
126  };
127 
128  if (!has_constraints || apply_constraints)
129  {
130  const unsigned int *begin =
131  dof_indices.data() + row_starts[ib].first;
132  const unsigned int *end =
133  dof_indices.data() + row_starts[ie].first;
134  do_copy(begin, end);
135  }
136  else
137  {
138  Assert(row_starts_plain_indices[cell * n_vectorization + v] !=
141  const unsigned int *begin =
142  plain_dof_indices.data() +
143  row_starts_plain_indices[cell * n_vectorization + v];
144  const unsigned int *end = begin + dofs_this_cell;
145  do_copy(begin, end);
146  }
147  }
148  }
149 
150 
151 
152  void
153  DoFInfo::assign_ghosts(const std::vector<unsigned int> &boundary_cells,
154  const MPI_Comm & communicator_sm,
155  const bool use_vector_data_exchanger_full)
156  {
157  Assert(boundary_cells.size() < row_starts.size(), ExcInternalError());
158 
159  // sort ghost dofs and compress out duplicates
160  const unsigned int n_owned = (vector_partitioner->local_range().second -
161  vector_partitioner->local_range().first);
162  const std::size_t n_ghosts = ghost_dofs.size();
163 #ifdef DEBUG
164  for (const auto dof_index : dof_indices)
165  AssertIndexRange(dof_index, n_owned + n_ghosts);
166 #endif
167 
168  const unsigned int n_components = start_components.back();
169  std::vector<unsigned int> ghost_numbering(n_ghosts);
170  IndexSet ghost_indices(vector_partitioner->size());
171  if (n_ghosts > 0)
172  {
173  unsigned int n_unique_ghosts = 0;
174  // since we need to go back to the local_to_global indices and
175  // replace the temporary numbering of ghosts by the real number in
176  // the index set, we need to store these values
177  std::vector<std::pair<types::global_dof_index, unsigned int>>
178  ghost_origin(n_ghosts);
179  for (std::size_t i = 0; i < n_ghosts; ++i)
180  {
181  ghost_origin[i].first = ghost_dofs[i];
182  ghost_origin[i].second = i;
183  }
184  std::sort(ghost_origin.begin(), ghost_origin.end());
185 
186  types::global_dof_index last_contiguous_start = ghost_origin[0].first;
187  ghost_numbering[ghost_origin[0].second] = 0;
188  for (std::size_t i = 1; i < n_ghosts; ++i)
189  {
190  if (ghost_origin[i].first > ghost_origin[i - 1].first + 1)
191  {
192  ghost_indices.add_range(last_contiguous_start,
193  ghost_origin[i - 1].first + 1);
194  last_contiguous_start = ghost_origin[i].first;
195  }
196  if (ghost_origin[i].first > ghost_origin[i - 1].first)
197  ++n_unique_ghosts;
198  ghost_numbering[ghost_origin[i].second] = n_unique_ghosts;
199  }
200  ++n_unique_ghosts;
201  ghost_indices.add_range(last_contiguous_start,
202  ghost_origin.back().first + 1);
203  ghost_indices.compress();
204 
205  // make sure that we got the correct local numbering of the ghost
206  // dofs. the ghost index set should store the same number
207  {
208  AssertDimension(n_unique_ghosts, ghost_indices.n_elements());
209  for (std::size_t i = 0; i < n_ghosts; ++i)
210  Assert(ghost_numbering[i] ==
211  ghost_indices.index_within_set(ghost_dofs[i]),
212  ExcInternalError());
213  }
214 
215  // apply correct numbering for ghost indices: We previously just
216  // enumerated them according to their appearance in the
217  // local_to_global structure. Above, we derived a relation between
218  // this enumeration and the actual number
219  const unsigned int n_boundary_cells = boundary_cells.size();
220  for (unsigned int i = 0; i < n_boundary_cells; ++i)
221  {
222  unsigned int *data_ptr =
223  dof_indices.data() +
224  row_starts[boundary_cells[i] * n_components].first;
225  const unsigned int *row_end =
226  dof_indices.data() +
227  row_starts[(boundary_cells[i] + 1) * n_components].first;
228  for (; data_ptr != row_end; ++data_ptr)
229  *data_ptr = ((*data_ptr < n_owned) ?
230  *data_ptr :
231  n_owned + ghost_numbering[*data_ptr - n_owned]);
232 
233  // now the same procedure for plain indices
234  if (store_plain_indices == true)
235  {
236  bool has_hanging_nodes = false;
237 
238  const unsigned int fe_index =
239  (cell_active_fe_index.size() == 0 ||
240  dofs_per_cell.size() == 1) ?
241  0 :
242  cell_active_fe_index[boundary_cells[i]];
243  AssertIndexRange(fe_index, dofs_per_cell.size());
244 
245  if (hanging_node_constraint_masks.size() > 0 &&
247  hanging_node_constraint_masks[boundary_cells[i]] !=
249  for (unsigned int comp = 0; comp < n_components; ++comp)
250  has_hanging_nodes |=
251  hanging_node_constraint_masks_comp[fe_index][comp];
252 
253  if (has_hanging_nodes ||
254  row_starts[boundary_cells[i] * n_components].second !=
255  row_starts[(boundary_cells[i] + 1) * n_components]
256  .second)
257  {
258  unsigned int *data_ptr =
259  plain_dof_indices.data() +
260  row_starts_plain_indices[boundary_cells[i]];
261  const unsigned int *row_end =
262  data_ptr + dofs_per_cell[fe_index];
263  for (; data_ptr != row_end; ++data_ptr)
264  *data_ptr =
265  ((*data_ptr < n_owned) ?
266  *data_ptr :
267  n_owned + ghost_numbering[*data_ptr - n_owned]);
268  }
269  }
270  }
271  }
272 
273  std::vector<types::global_dof_index> empty;
274  ghost_dofs.swap(empty);
275 
276  // set the ghost indices now. need to cast away constness here, but that
277  // is uncritical since we reset the Partitioner in the same initialize
278  // call as this call here.
279  Utilities::MPI::Partitioner *vec_part =
280  const_cast<Utilities::MPI::Partitioner *>(vector_partitioner.get());
281  vec_part->set_ghost_indices(ghost_indices);
282 
283  if (use_vector_data_exchanger_full == false)
284  vector_exchanger = std::make_shared<
287  else
289  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
290  vector_partitioner, communicator_sm);
291  }
292 
293 
294 
295  void
297  const TaskInfo & task_info,
298  const std::vector<unsigned int> & renumbering,
299  const std::vector<unsigned int> & constraint_pool_row_index,
300  const std::vector<unsigned char> &irregular_cells)
301  {
302  (void)constraint_pool_row_index;
303 
304  // first reorder the active FE index.
305  const bool have_hp = dofs_per_cell.size() > 1;
306  if (cell_active_fe_index.size() > 0)
307  {
308  std::vector<unsigned int> new_active_fe_index;
309  new_active_fe_index.reserve(task_info.cell_partition_data.back());
310  unsigned int position_cell = 0;
311  for (unsigned int cell = 0;
312  cell < task_info.cell_partition_data.back();
313  ++cell)
314  {
315  const unsigned int n_comp =
316  (irregular_cells[cell] > 0 ? irregular_cells[cell] :
318 
319  // take maximum FE index among the ones present (we might have
320  // lumped some lower indices into higher ones)
321  unsigned int fe_index =
322  cell_active_fe_index[renumbering[position_cell]];
323  for (unsigned int j = 1; j < n_comp; ++j)
324  fe_index = std::max(
325  fe_index,
326  cell_active_fe_index[renumbering[position_cell + j]]);
327 
328  new_active_fe_index.push_back(fe_index);
329  position_cell += n_comp;
330  }
331  std::swap(new_active_fe_index, cell_active_fe_index);
332  }
333  if (have_hp)
335  task_info.cell_partition_data.back());
336 
337  const unsigned int n_components = start_components.back();
338 
339  std::vector<std::pair<unsigned int, unsigned int>> new_row_starts(
341  task_info.cell_partition_data.back() +
342  1);
343  std::vector<unsigned int> new_dof_indices;
344  std::vector<std::pair<unsigned short, unsigned short>>
345  new_constraint_indicator;
346  std::vector<unsigned int> new_plain_indices, new_rowstart_plain;
347  unsigned int position_cell = 0;
348  new_dof_indices.reserve(dof_indices.size());
349  new_constraint_indicator.reserve(constraint_indicator.size());
350 
351  std::vector<compressed_constraint_kind> new_hanging_node_constraint_masks;
352  new_hanging_node_constraint_masks.reserve(
354 
355  if (store_plain_indices == true)
356  {
357  new_rowstart_plain.resize(vectorization_length *
358  task_info.cell_partition_data.back() +
359  1,
361  new_plain_indices.reserve(plain_dof_indices.size());
362  }
363 
364  // copy the indices and the constraint indicators to the new data field,
365  // where we will go through the cells in the renumbered way. in case the
366  // vectorization length does not exactly match up, we fill invalid
367  // numbers to the rowstart data. for contiguous cell indices, we skip
368  // the rowstarts field completely and directly go into the
369  // new_dof_indices field (this layout is used in FEEvaluation).
370  for (unsigned int i = 0; i < task_info.cell_partition_data.back(); ++i)
371  {
372  const unsigned int n_vect =
373  (irregular_cells[i] > 0 ? irregular_cells[i] :
375  const unsigned int dofs_per_cell =
376  have_hp ? this->dofs_per_cell[cell_active_fe_index[i]] :
377  this->dofs_per_cell[0];
378 
379  for (unsigned int j = 0; j < n_vect; ++j)
380  {
381  const unsigned int cell_no =
382  renumbering[position_cell + j] * n_components;
383 
384  bool has_hanging_nodes = false;
385 
386  if (hanging_node_constraint_masks.size() > 0 &&
388  {
389  const auto mask =
390  hanging_node_constraint_masks[renumbering[position_cell +
391  j]];
392  new_hanging_node_constraint_masks.push_back(mask);
393 
395  for (unsigned int comp = 0; comp < n_components; ++comp)
396  has_hanging_nodes |= hanging_node_constraint_masks_comp
397  [have_hp ? cell_active_fe_index[i] : 0][comp];
398  }
399 
400  for (unsigned int comp = 0; comp < n_components; ++comp)
401  {
402  new_row_starts[(i * vectorization_length + j) * n_components +
403  comp]
404  .first = new_dof_indices.size();
405  new_row_starts[(i * vectorization_length + j) * n_components +
406  comp]
407  .second = new_constraint_indicator.size();
408 
409  new_dof_indices.insert(
410  new_dof_indices.end(),
411  dof_indices.data() + row_starts[cell_no + comp].first,
412  dof_indices.data() + row_starts[cell_no + comp + 1].first);
413  for (unsigned int index = row_starts[cell_no + comp].second;
414  index != row_starts[cell_no + comp + 1].second;
415  ++index)
416  new_constraint_indicator.push_back(
418  }
419  if (store_plain_indices &&
420  ((row_starts[cell_no].second !=
421  row_starts[cell_no + n_components].second) ||
422  has_hanging_nodes))
423  {
424  new_rowstart_plain[i * vectorization_length + j] =
425  new_plain_indices.size();
426  new_plain_indices.insert(
427  new_plain_indices.end(),
428  plain_dof_indices.data() +
430  plain_dof_indices.data() +
432  dofs_per_cell);
433  }
434  }
435  for (unsigned int j = n_vect; j < vectorization_length; ++j)
436  for (unsigned int comp = 0; comp < n_components; ++comp)
437  {
438  new_row_starts[(i * vectorization_length + j) * n_components +
439  comp]
440  .first = new_dof_indices.size();
441  new_row_starts[(i * vectorization_length + j) * n_components +
442  comp]
443  .second = new_constraint_indicator.size();
444  }
445 
446  for (unsigned int j = n_vect; j < vectorization_length; ++j)
447  if (hanging_node_constraint_masks.size() > 0)
448  new_hanging_node_constraint_masks.push_back(
450 
451  position_cell += n_vect;
452  }
453  AssertDimension(position_cell * n_components + 1, row_starts.size());
454 
455  AssertDimension(dof_indices.size(), new_dof_indices.size());
456  new_row_starts[task_info.cell_partition_data.back() *
458  .first = new_dof_indices.size();
459  new_row_starts[task_info.cell_partition_data.back() *
461  .second = new_constraint_indicator.size();
462 
464  new_constraint_indicator.size());
465 
466  new_row_starts.swap(row_starts);
467  new_dof_indices.swap(dof_indices);
468  new_constraint_indicator.swap(constraint_indicator);
469  new_plain_indices.swap(plain_dof_indices);
470  new_rowstart_plain.swap(row_starts_plain_indices);
471  new_hanging_node_constraint_masks.swap(hanging_node_constraint_masks);
472 
473 #ifdef DEBUG
474  // sanity check 1: all indices should be smaller than the number of dofs
475  // locally owned plus the number of ghosts
476  const unsigned int index_range =
477  (vector_partitioner->local_range().second -
478  vector_partitioner->local_range().first) +
479  vector_partitioner->ghost_indices().n_elements();
480  for (const auto dof_index : dof_indices)
481  AssertIndexRange(dof_index, index_range);
482 
483  // sanity check 2: for the constraint indicators, the first index should
484  // be smaller than the number of indices in the row, and the second
485  // index should be smaller than the number of constraints in the
486  // constraint pool.
487  for (unsigned int row = 0; row < task_info.cell_partition_data.back();
488  ++row)
489  {
490  const unsigned int row_length_ind =
495  constraint_indicator.size() + 1);
496  const std::pair<unsigned short, unsigned short> *
497  con_it =
498  constraint_indicator.data() +
500  *end_con =
501  constraint_indicator.data() +
503  for (; con_it != end_con; ++con_it)
504  {
505  AssertIndexRange(con_it->first, row_length_ind + 1);
506  AssertIndexRange(con_it->second,
507  constraint_pool_row_index.size() - 1);
508  }
509  }
510 
511  // sanity check 3: check the number of cells once again
512  unsigned int n_active_cells = 0;
513  for (unsigned int c = 0; c < *(task_info.cell_partition_data.end() - 2);
514  ++c)
515  if (irregular_cells[c] > 0)
516  n_active_cells += irregular_cells[c];
517  else
520 #endif
521 
522  compute_cell_index_compression(irregular_cells);
523  }
524 
525 
526 
527  void
529  const std::vector<unsigned char> &irregular_cells)
530  {
531  const bool have_hp = dofs_per_cell.size() > 1;
532  const unsigned int n_components = start_components.back();
533 
535  row_starts.size() % vectorization_length == 1,
536  ExcInternalError());
537  if (vectorization_length > 1)
539  irregular_cells.size());
541  irregular_cells.size(), IndexStorageVariants::full);
543  irregular_cells.size());
544  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
545  if (irregular_cells[i] > 0)
546  n_vectorization_lanes_filled[dof_access_cell][i] = irregular_cells[i];
547  else
550 
552  irregular_cells.size() * vectorization_length,
554  dof_indices_interleaved.resize(dof_indices.size(),
557  irregular_cells.size() * vectorization_length,
559 
560  std::vector<unsigned int> index_kinds(
561  static_cast<unsigned int>(
563  1);
564  std::vector<unsigned int> offsets(vectorization_length);
565  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
566  {
567  const unsigned int ndofs =
568  dofs_per_cell[have_hp ? cell_active_fe_index[i] : 0];
569  const unsigned int n_comp =
571 
572  // check 1: Check if there are constraints -> no compression possible
573  bool has_constraints = false;
574  for (unsigned int j = 0; j < n_comp; ++j)
575  {
576  const unsigned int cell_no = i * vectorization_length + j;
577  if (row_starts[cell_no * n_components].second !=
578  row_starts[(cell_no + 1) * n_components].second)
579  {
580  has_constraints = true;
581  break;
582  }
583  }
584  if (has_constraints)
587  else
588  {
589  bool indices_are_contiguous = true;
590  for (unsigned int j = 0; j < n_comp; ++j)
591  {
592  const unsigned int cell_no = i * vectorization_length + j;
593  const unsigned int *dof_indices =
594  this->dof_indices.data() +
595  row_starts[cell_no * n_components].first;
597  ndofs,
598  row_starts[(cell_no + 1) * n_components].first -
599  row_starts[cell_no * n_components].first);
600  for (unsigned int i = 1; i < ndofs; ++i)
601  if (dof_indices[i] != dof_indices[0] + i)
602  {
603  indices_are_contiguous = false;
604  break;
605  }
606  }
607 
608  bool indices_are_interleaved_and_contiguous =
609  (ndofs > 1 && n_comp == vectorization_length);
610 
611  {
612  const unsigned int *dof_indices =
613  this->dof_indices.data() +
615  for (unsigned int k = 0; k < ndofs; ++k)
616  for (unsigned int j = 0; j < n_comp; ++j)
617  if (dof_indices[j * ndofs + k] !=
618  dof_indices[0] + k * n_comp + j)
619  {
620  indices_are_interleaved_and_contiguous = false;
621  break;
622  }
623  }
624 
625  if (indices_are_contiguous ||
626  indices_are_interleaved_and_contiguous)
627  {
628  for (unsigned int j = 0; j < n_comp; ++j)
631  this->dof_indices.size() == 0 ?
632  0 :
633  this->dof_indices
634  [row_starts[(i * vectorization_length + j) *
635  n_components]
636  .first];
637  }
638 
639  if (indices_are_interleaved_and_contiguous)
640  {
644  for (unsigned int j = 0; j < n_comp; ++j)
646  j] = n_comp;
647  }
648  else if (indices_are_contiguous)
649  {
652  for (unsigned int j = 0; j < n_comp; ++j)
654  j] = 1;
655  }
656  else
657  {
658  int indices_are_interleaved_and_mixed = 2;
659  const unsigned int *dof_indices =
660  &this->dof_indices[row_starts[i * vectorization_length *
661  n_components]
662  .first];
663  for (unsigned int j = 0; j < n_comp; ++j)
664  offsets[j] =
665  dof_indices[j * ndofs + 1] - dof_indices[j * ndofs];
666  for (unsigned int k = 0; k < ndofs; ++k)
667  for (unsigned int j = 0; j < n_comp; ++j)
668  // the first if case is to avoid negative offsets
669  // (invalid)
670  if (dof_indices[j * ndofs + 1] < dof_indices[j * ndofs] ||
671  dof_indices[j * ndofs + k] !=
672  dof_indices[j * ndofs] + k * offsets[j])
673  {
674  indices_are_interleaved_and_mixed = 0;
675  break;
676  }
677  if (indices_are_interleaved_and_mixed == 2)
678  {
679  for (unsigned int j = 0; j < n_comp; ++j)
682  offsets[j];
683  for (unsigned int j = 0; j < n_comp; ++j)
685  [i * vectorization_length + j] =
686  dof_indices[j * ndofs];
687  for (unsigned int j = 0; j < n_comp; ++j)
688  if (offsets[j] != vectorization_length)
689  {
690  indices_are_interleaved_and_mixed = 1;
691  break;
692  }
693  if (indices_are_interleaved_and_mixed == 1 ||
694  n_comp != vectorization_length)
698  else
701  }
702  else
703  {
704  const unsigned int *dof_indices =
705  this->dof_indices.data() +
707  .first;
708  if (n_comp == vectorization_length)
711  else
714 
715  // do not use interleaved storage if two vectorized
716  // components point to the same field (scatter not
717  // possible)
718  for (unsigned int k = 0; k < ndofs; ++k)
719  for (unsigned int l = 0; l < n_comp; ++l)
720  for (unsigned int j = l + 1; j < n_comp; ++j)
721  if (dof_indices[j * ndofs + k] ==
722  dof_indices[l * ndofs + k])
723  {
726  break;
727  }
728  }
729  }
730  }
731  index_kinds[static_cast<unsigned int>(
733  }
734 
735  // Cleanup phase: we want to avoid single cells with different properties
736  // than the bulk of the domain in order to avoid extra checks in the face
737  // identification.
738 
739  // Step 1: check whether the interleaved indices were only assigned to
740  // the single cell within a vectorized array.
741  auto fix_single_interleaved_indices =
742  [&](const IndexStorageVariants variant) {
743  if (index_kinds[static_cast<unsigned int>(
745  0 &&
746  index_kinds[static_cast<unsigned int>(variant)] > 0)
747  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
748  {
751  interleaved_contiguous_mixed_strides &&
753  (variant != IndexStorageVariants::contiguous ||
755  [i * vectorization_length] ==
756  1))
757  {
759  index_kinds[static_cast<unsigned int>(
762  index_kinds[static_cast<unsigned int>(variant)]++;
763  }
764  }
765  };
766 
767  fix_single_interleaved_indices(IndexStorageVariants::full);
768  fix_single_interleaved_indices(IndexStorageVariants::contiguous);
769  fix_single_interleaved_indices(IndexStorageVariants::interleaved);
770 
771  unsigned int n_interleaved =
772  index_kinds[static_cast<unsigned int>(
774  index_kinds[static_cast<unsigned int>(
776  index_kinds[static_cast<unsigned int>(
778 
779  // Step 2: fix single contiguous cell among others with interleaved
780  // storage
781  if (n_interleaved > 0 && index_kinds[static_cast<unsigned int>(
783  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
786  {
789  index_kinds[static_cast<unsigned int>(
791  index_kinds[static_cast<unsigned int>(
793  }
794 
795  // Step 3: Interleaved cells are left but also some non-contiguous ones
796  // -> revert all to full storage
797  if (n_interleaved > 0 &&
798  index_kinds[static_cast<unsigned int>(IndexStorageVariants::full)] +
799  index_kinds[static_cast<unsigned int>(
801  0)
802  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
805  {
806  index_kinds[static_cast<unsigned int>(
807  index_storage_variants[2][i])]--;
812  else
815  index_kinds[static_cast<unsigned int>(
817  }
818 
819  // Step 4: Copy the interleaved indices into their own data structure
820  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
823  {
826  {
829  continue;
830  }
831  const unsigned int ndofs =
832  dofs_per_cell[have_hp ? cell_active_fe_index[i] : 0];
833  const unsigned int *dof_indices =
834  &this->dof_indices
836  unsigned int *interleaved_dof_indices =
838  [row_starts[i * vectorization_length * n_components].first];
839  AssertDimension(this->dof_indices.size(),
840  this->dof_indices_interleaved.size());
845  this->dof_indices_interleaved.size());
847  row_starts[i * vectorization_length * n_components].first +
848  ndofs * vectorization_length,
849  this->dof_indices_interleaved.size() + 1);
850  for (unsigned int k = 0; k < ndofs; ++k)
851  for (unsigned int j = 0; j < vectorization_length; ++j)
852  interleaved_dof_indices[k * vectorization_length + j] =
853  dof_indices[j * ndofs + k];
854  }
855  }
856 
857 
858 
859  void
861  const Table<2, ShapeInfo<double>> & shape_info,
862  const unsigned int n_owned_cells,
863  const unsigned int n_lanes,
864  const std::vector<FaceToCellTopology<1>> &inner_faces,
865  const std::vector<FaceToCellTopology<1>> &ghosted_faces,
866  const bool fill_cell_centric,
867  const MPI_Comm & communicator_sm,
868  const bool use_vector_data_exchanger_full)
869  {
871 
872  // partitioner 0: no face integrals, simply use the indices present
873  // on the cells
874  std::vector<types::global_dof_index> ghost_indices;
875  {
876  const unsigned int n_components = start_components.back();
877  for (unsigned int cell = 0; cell < n_owned_cells; ++cell)
878  {
879  for (unsigned int i = row_starts[cell * n_components].first;
880  i < row_starts[(cell + 1) * n_components].first;
881  ++i)
882  if (dof_indices[i] >= part.local_size())
883  ghost_indices.push_back(part.local_to_global(dof_indices[i]));
884 
885  const unsigned int fe_index =
886  dofs_per_cell.size() == 1 ? 0 :
887  cell_active_fe_index[cell / n_lanes];
888  const unsigned int dofs_this_cell = dofs_per_cell[fe_index];
889 
890  for (unsigned int i = row_starts_plain_indices[cell];
891  i < row_starts_plain_indices[cell] + dofs_this_cell;
892  ++i)
893  if (plain_dof_indices[i] >= part.local_size())
894  ghost_indices.push_back(
896  }
897  std::sort(ghost_indices.begin(), ghost_indices.end());
898  ghost_indices.erase(std::unique(ghost_indices.begin(),
899  ghost_indices.end()),
900  ghost_indices.end());
901  IndexSet compressed_set(part.size());
902  compressed_set.add_indices(ghost_indices.begin(), ghost_indices.end());
903  compressed_set.subtract_set(part.locally_owned_range());
904  const bool all_ghosts_equal =
905  Utilities::MPI::min<int>(static_cast<int>(
906  compressed_set.n_elements() ==
907  part.ghost_indices().n_elements()),
908  part.get_mpi_communicator()) != 0;
909 
910  std::shared_ptr<const Utilities::MPI::Partitioner> temp_0;
911 
912  if (all_ghosts_equal)
913  temp_0 = vector_partitioner;
914  else
915  {
916  temp_0 = std::make_shared<Utilities::MPI::Partitioner>(
918  const_cast<Utilities::MPI::Partitioner *>(temp_0.get())
919  ->set_ghost_indices(compressed_set, part.ghost_indices());
920  }
921 
922  if (use_vector_data_exchanger_full == false)
923  vector_exchanger_face_variants[0] = std::make_shared<
925  temp_0);
926  else
928  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
929  temp_0, communicator_sm);
930  }
931 
932  // construct a numbering of faces
933  std::vector<FaceToCellTopology<1>> all_faces(inner_faces);
934  all_faces.insert(all_faces.end(),
935  ghosted_faces.begin(),
936  ghosted_faces.end());
937  Table<2, unsigned int> cell_and_face_to_faces(
938  (row_starts.size() - 1) / start_components.back(),
939  2 * shape_info(0, 0).n_dimensions);
940  cell_and_face_to_faces.fill(numbers::invalid_unsigned_int);
941  for (unsigned int f = 0; f < all_faces.size(); ++f)
942  {
943  cell_and_face_to_faces(all_faces[f].cells_interior[0],
944  all_faces[f].interior_face_no) = f;
945  Assert(all_faces[f].cells_exterior[0] !=
947  ExcInternalError());
948  cell_and_face_to_faces(all_faces[f].cells_exterior[0],
949  all_faces[f].exterior_face_no) = f;
950  }
951 
952  // lambda function to detect objects on face pairs
953  const auto loop_over_faces =
954  [&](const std::function<
955  void(const unsigned int, const unsigned int, const bool)> &fu) {
956  for (const auto &face : inner_faces)
957  {
958  AssertIndexRange(face.cells_interior[0], n_owned_cells);
959  fu(face.cells_exterior[0], face.exterior_face_no, false /*flag*/);
960  }
961  };
962 
963  const auto loop_over_all_faces =
964  [&](const std::function<
965  void(const unsigned int, const unsigned int, const bool)> &fu) {
966  for (unsigned int c = 0; c < cell_and_face_to_faces.size(0); ++c)
967  for (unsigned int d = 0; d < cell_and_face_to_faces.size(1); ++d)
968  {
969  const unsigned int f = cell_and_face_to_faces(c, d);
971  continue;
972 
973  const unsigned int cell_m = all_faces[f].cells_interior[0];
974  const unsigned int cell_p = all_faces[f].cells_exterior[0];
975 
976  const bool ext = c == cell_m;
977 
978  if (ext && cell_p == numbers::invalid_unsigned_int)
979  continue;
980 
981  const unsigned int p = ext ? cell_p : cell_m;
982  const unsigned int face_no = ext ?
983  all_faces[f].exterior_face_no :
984  all_faces[f].interior_face_no;
985 
986  fu(p, face_no, true);
987  }
988  };
989 
990  const auto process_values =
991  [&](
992  std::shared_ptr<const Utilities::MPI::Partitioner>
993  &vector_partitioner_values,
994  const std::function<void(
995  const std::function<void(
996  const unsigned int, const unsigned int, const bool)> &)> &loop) {
997  bool all_nodal_and_tensorial = true;
998  for (unsigned int c = 0; c < n_base_elements; ++c)
999  {
1000  const auto &si =
1001  shape_info(global_base_element_offset + c, 0).data.front();
1002  if (!si.nodal_at_cell_boundaries ||
1003  (si.element_type ==
1005  all_nodal_and_tensorial = false;
1006  }
1007  if (all_nodal_and_tensorial == false)
1008  vector_partitioner_values = vector_partitioner;
1009  else
1010  {
1011  bool has_noncontiguous_cell = false;
1012 
1013  loop([&](const unsigned int cell_no,
1014  const unsigned int face_no,
1015  const bool flag) {
1016  const unsigned int index =
1018  if (flag || (index != numbers::invalid_unsigned_int &&
1019  index >= part.local_size()))
1020  {
1021  const unsigned int stride =
1022  dof_indices_interleave_strides[dof_access_cell][cell_no];
1023  unsigned int i = 0;
1024  for (unsigned int e = 0; e < n_base_elements; ++e)
1025  for (unsigned int c = 0; c < n_components[e]; ++c)
1026  {
1027  const ShapeInfo<double> &shape =
1028  shape_info(global_base_element_offset + e, 0);
1029  for (unsigned int j = 0;
1030  j < shape.dofs_per_component_on_face;
1031  ++j)
1032  ghost_indices.push_back(part.local_to_global(
1033  index + i +
1034  shape.face_to_cell_index_nodal(face_no, j) *
1035  stride));
1036  i += shape.dofs_per_component_on_cell * stride;
1037  }
1038  AssertDimension(i, dofs_per_cell[0] * stride);
1039  }
1041  has_noncontiguous_cell = true;
1042  });
1043  has_noncontiguous_cell =
1044  Utilities::MPI::min<int>(static_cast<int>(
1045  has_noncontiguous_cell),
1046  part.get_mpi_communicator()) != 0;
1047 
1048  std::sort(ghost_indices.begin(), ghost_indices.end());
1049  ghost_indices.erase(std::unique(ghost_indices.begin(),
1050  ghost_indices.end()),
1051  ghost_indices.end());
1052  IndexSet compressed_set(part.size());
1053  compressed_set.add_indices(ghost_indices.begin(),
1054  ghost_indices.end());
1055  compressed_set.subtract_set(part.locally_owned_range());
1056  const bool all_ghosts_equal =
1057  Utilities::MPI::min<int>(static_cast<int>(
1058  compressed_set.n_elements() ==
1059  part.ghost_indices().n_elements()),
1060  part.get_mpi_communicator()) != 0;
1061  if (all_ghosts_equal || has_noncontiguous_cell)
1062  vector_partitioner_values = vector_partitioner;
1063  else
1064  {
1065  vector_partitioner_values =
1066  std::make_shared<Utilities::MPI::Partitioner>(
1067  part.locally_owned_range(), part.get_mpi_communicator());
1068  const_cast<Utilities::MPI::Partitioner *>(
1069  vector_partitioner_values.get())
1070  ->set_ghost_indices(compressed_set, part.ghost_indices());
1071  }
1072  }
1073  };
1074 
1075 
1076  const auto process_gradients =
1077  [&](
1078  const std::shared_ptr<const Utilities::MPI::Partitioner>
1079  &vector_partitoner_values,
1080  std::shared_ptr<const Utilities::MPI::Partitioner>
1081  &vector_partitioner_gradients,
1082  const std::function<void(
1083  const std::function<void(
1084  const unsigned int, const unsigned int, const bool)> &)> &loop) {
1085  bool all_hermite = true;
1086  for (unsigned int c = 0; c < n_base_elements; ++c)
1087  if (shape_info(global_base_element_offset + c, 0).element_type !=
1089  all_hermite = false;
1090  if (all_hermite == false ||
1091  vector_partitoner_values.get() == vector_partitioner.get())
1092  vector_partitioner_gradients = vector_partitioner;
1093  else
1094  {
1095  loop([&](const unsigned int cell_no,
1096  const unsigned int face_no,
1097  const bool flag) {
1098  const unsigned int index =
1099  dof_indices_contiguous[dof_access_cell][cell_no];
1100  if (flag || (index != numbers::invalid_unsigned_int &&
1101  index >= part.local_size()))
1102  {
1103  const unsigned int stride =
1104  dof_indices_interleave_strides[dof_access_cell][cell_no];
1105  unsigned int i = 0;
1106  for (unsigned int e = 0; e < n_base_elements; ++e)
1107  for (unsigned int c = 0; c < n_components[e]; ++c)
1108  {
1109  const ShapeInfo<double> &shape =
1110  shape_info(global_base_element_offset + e, 0);
1111  for (unsigned int j = 0;
1112  j < 2 * shape.dofs_per_component_on_face;
1113  ++j)
1114  ghost_indices.push_back(part.local_to_global(
1115  index + i +
1116  shape.face_to_cell_index_hermite(face_no, j) *
1117  stride));
1118  i += shape.dofs_per_component_on_cell * stride;
1119  }
1120  AssertDimension(i, dofs_per_cell[0] * stride);
1121  }
1122  });
1123  std::sort(ghost_indices.begin(), ghost_indices.end());
1124  ghost_indices.erase(std::unique(ghost_indices.begin(),
1125  ghost_indices.end()),
1126  ghost_indices.end());
1127  IndexSet compressed_set(part.size());
1128  compressed_set.add_indices(ghost_indices.begin(),
1129  ghost_indices.end());
1130  compressed_set.subtract_set(part.locally_owned_range());
1131  const bool all_ghosts_equal =
1132  Utilities::MPI::min<int>(static_cast<int>(
1133  compressed_set.n_elements() ==
1134  part.ghost_indices().n_elements()),
1135  part.get_mpi_communicator()) != 0;
1136  if (all_ghosts_equal)
1137  vector_partitioner_gradients = vector_partitioner;
1138  else
1139  {
1140  vector_partitioner_gradients =
1141  std::make_shared<Utilities::MPI::Partitioner>(
1142  part.locally_owned_range(), part.get_mpi_communicator());
1143  const_cast<Utilities::MPI::Partitioner *>(
1144  vector_partitioner_gradients.get())
1145  ->set_ghost_indices(compressed_set, part.ghost_indices());
1146  }
1147  }
1148  };
1149 
1150  std::shared_ptr<const Utilities::MPI::Partitioner> temp_1, temp_2, temp_3,
1151  temp_4;
1152 
1153  // partitioner 1: values on faces
1154  process_values(temp_1, loop_over_faces);
1155 
1156  // partitioner 2: values and gradients on faces
1157  process_gradients(temp_1, temp_2, loop_over_faces);
1158 
1159  if (fill_cell_centric)
1160  {
1161  ghost_indices.clear();
1162  // partitioner 3: values on all faces
1163  process_values(temp_3, loop_over_all_faces);
1164  // partitioner 4: values and gradients on faces
1165  process_gradients(temp_3, temp_4, loop_over_all_faces);
1166  }
1167  else
1168  {
1169  temp_3 = std::make_shared<Utilities::MPI::Partitioner>(
1170  part.locally_owned_range(), part.get_mpi_communicator());
1171  temp_4 = std::make_shared<Utilities::MPI::Partitioner>(
1172  part.locally_owned_range(), part.get_mpi_communicator());
1173  }
1174 
1175  if (use_vector_data_exchanger_full == false)
1176  {
1177  vector_exchanger_face_variants[1] = std::make_shared<
1178  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1179  temp_1);
1180  vector_exchanger_face_variants[2] = std::make_shared<
1181  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1182  temp_2);
1183  vector_exchanger_face_variants[3] = std::make_shared<
1184  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1185  temp_3);
1186  vector_exchanger_face_variants[4] = std::make_shared<
1187  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1188  temp_4);
1189  }
1190  else
1191  {
1192  vector_exchanger_face_variants[1] =
1193  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1194  temp_1, communicator_sm);
1195  vector_exchanger_face_variants[2] =
1196  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1197  temp_2, communicator_sm);
1198  vector_exchanger_face_variants[3] =
1199  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1200  temp_3, communicator_sm);
1201  vector_exchanger_face_variants[4] =
1202  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1203  temp_4, communicator_sm);
1204  }
1205  }
1206 
1207 
1208 
1209  void
1210  DoFInfo::compute_shared_memory_contiguous_indices(
1211  std::array<std::vector<std::pair<unsigned int, unsigned int>>, 3>
1212  &cell_indices_contiguous_sm)
1213  {
1214  AssertDimension(dofs_per_cell.size(), 1);
1215 
1216  for (unsigned int i = 0; i < 3; ++i)
1217  {
1218  dof_indices_contiguous_sm[i].resize(
1219  cell_indices_contiguous_sm[i].size());
1220 
1221  for (unsigned int j = 0; j < cell_indices_contiguous_sm[i].size();
1222  ++j)
1223  if (cell_indices_contiguous_sm[i][j].first !=
1225  dof_indices_contiguous_sm[i][j] = {
1226  cell_indices_contiguous_sm[i][j].first,
1227  cell_indices_contiguous_sm[i][j].second * dofs_per_cell[0]};
1228  else
1229  dof_indices_contiguous_sm[i][j] = {numbers::invalid_unsigned_int,
1231  }
1232  }
1233 
1234 
1235 
1236  namespace internal
1237  {
1238  // We construct the connectivity graph in parallel. we use one lock for
1239  // 256 degrees of freedom to keep the number of locks down to a
1240  // reasonable level and reduce the cost of locking to some extent.
1241  static constexpr unsigned int bucket_size_threading = 256;
1242 
1243 
1244 
1245  void
1246  compute_row_lengths(const unsigned int begin,
1247  const unsigned int end,
1248  const DoFInfo & dof_info,
1249  std::vector<std::mutex> & mutexes,
1250  std::vector<unsigned int> &row_lengths)
1251  {
1252  std::vector<unsigned int> scratch;
1253  const unsigned int n_components = dof_info.start_components.back();
1254  for (unsigned int block = begin; block < end; ++block)
1255  {
1256  scratch.clear();
1257  scratch.insert(
1258  scratch.end(),
1259  dof_info.dof_indices.data() +
1260  dof_info.row_starts[block * n_components].first,
1261  dof_info.dof_indices.data() +
1262  dof_info.row_starts[(block + 1) * n_components].first);
1263  std::sort(scratch.begin(), scratch.end());
1264  std::vector<unsigned int>::const_iterator end_unique =
1265  std::unique(scratch.begin(), scratch.end());
1266  std::vector<unsigned int>::const_iterator it = scratch.begin();
1267  while (it != end_unique)
1268  {
1269  // In this code, the procedure is that we insert all elements
1270  // that are within the range of one lock at once
1271  const unsigned int next_bucket =
1273  std::lock_guard<std::mutex> lock(
1274  mutexes[*it / bucket_size_threading]);
1275  for (; it != end_unique && *it < next_bucket; ++it)
1276  {
1277  AssertIndexRange(*it, row_lengths.size());
1278  row_lengths[*it]++;
1279  }
1280  }
1281  }
1282  }
1283 
1284  void
1285  fill_connectivity_dofs(const unsigned int begin,
1286  const unsigned int end,
1287  const DoFInfo & dof_info,
1288  const std::vector<unsigned int> &row_lengths,
1289  std::vector<std::mutex> & mutexes,
1290  ::SparsityPattern & connectivity_dof)
1291  {
1292  std::vector<unsigned int> scratch;
1293  const unsigned int n_components = dof_info.start_components.back();
1294  for (unsigned int block = begin; block < end; ++block)
1295  {
1296  scratch.clear();
1297  scratch.insert(
1298  scratch.end(),
1299  dof_info.dof_indices.data() +
1300  dof_info.row_starts[block * n_components].first,
1301  dof_info.dof_indices.data() +
1302  dof_info.row_starts[(block + 1) * n_components].first);
1303  std::sort(scratch.begin(), scratch.end());
1304  std::vector<unsigned int>::const_iterator end_unique =
1305  std::unique(scratch.begin(), scratch.end());
1306  std::vector<unsigned int>::const_iterator it = scratch.begin();
1307  while (it != end_unique)
1308  {
1309  const unsigned int next_bucket =
1311  std::lock_guard<std::mutex> lock(
1312  mutexes[*it / bucket_size_threading]);
1313  for (; it != end_unique && *it < next_bucket; ++it)
1314  if (row_lengths[*it] > 0)
1315  connectivity_dof.add(*it, block);
1316  }
1317  }
1318  }
1319 
1320 
1321 
1322  void
1323  fill_connectivity(const unsigned int begin,
1324  const unsigned int end,
1325  const DoFInfo & dof_info,
1326  const std::vector<unsigned int> &renumbering,
1327  const ::SparsityPattern & connectivity_dof,
1328  DynamicSparsityPattern & connectivity)
1329  {
1330  ordered_vector row_entries;
1331  const unsigned int n_components = dof_info.start_components.back();
1332  for (unsigned int block = begin; block < end; ++block)
1333  {
1334  row_entries.clear();
1335 
1336  const unsigned int
1337  *it = dof_info.dof_indices.data() +
1338  dof_info.row_starts[block * n_components].first,
1339  *end_cell = dof_info.dof_indices.data() +
1340  dof_info.row_starts[(block + 1) * n_components].first;
1341  for (; it != end_cell; ++it)
1342  {
1343  SparsityPattern::iterator sp = connectivity_dof.begin(*it);
1344  std::vector<types::global_dof_index>::iterator insert_pos =
1345  row_entries.begin();
1346  for (; sp != connectivity_dof.end(*it); ++sp)
1347  if (sp->column() != block)
1348  row_entries.insert(renumbering[sp->column()], insert_pos);
1349  }
1350  connectivity.add_entries(renumbering[block],
1351  row_entries.begin(),
1352  row_entries.end());
1353  }
1354  }
1355 
1356  } // namespace internal
1357 
1358  void
1359  DoFInfo::make_connectivity_graph(
1360  const TaskInfo & task_info,
1361  const std::vector<unsigned int> &renumbering,
1362  DynamicSparsityPattern & connectivity) const
1363  {
1364  unsigned int n_rows = (vector_partitioner->local_range().second -
1365  vector_partitioner->local_range().first) +
1366  vector_partitioner->ghost_indices().n_elements();
1367 
1368  // Avoid square sparsity patterns that allocate the diagonal entry
1369  if (n_rows == task_info.n_active_cells)
1370  ++n_rows;
1371 
1372  // first determine row lengths
1373  std::vector<unsigned int> row_lengths(n_rows);
1374  std::vector<std::mutex> mutexes(n_rows / internal::bucket_size_threading +
1375  1);
1377  0,
1378  task_info.n_active_cells,
1379  [this, &mutexes, &row_lengths](const unsigned int begin,
1380  const unsigned int end) {
1381  internal::compute_row_lengths(
1382  begin, end, *this, mutexes, row_lengths);
1383  },
1384  20);
1385 
1386  // disregard dofs that only sit on a single cell because they cannot
1387  // couple
1388  for (unsigned int row = 0; row < n_rows; ++row)
1389  if (row_lengths[row] <= 1)
1390  row_lengths[row] = 0;
1391 
1392  // Create a temporary sparsity pattern that holds to each degree of
1393  // freedom on which cells it appears, i.e., store the connectivity
1394  // between cells and dofs
1395  SparsityPattern connectivity_dof(n_rows,
1396  task_info.n_active_cells,
1397  row_lengths);
1399  0,
1400  task_info.n_active_cells,
1401  [this, &row_lengths, &mutexes, &connectivity_dof](
1402  const unsigned int begin, const unsigned int end) {
1403  internal::fill_connectivity_dofs(
1404  begin, end, *this, row_lengths, mutexes, connectivity_dof);
1405  },
1406  20);
1407  connectivity_dof.compress();
1408 
1409 
1410  // Invert renumbering for use in fill_connectivity.
1411  std::vector<unsigned int> reverse_numbering(task_info.n_active_cells);
1412  reverse_numbering = Utilities::invert_permutation(renumbering);
1413 
1414  // From the above connectivity between dofs and cells, we can finally
1415  // create a connectivity list between cells. The connectivity graph
1416  // should apply the renumbering, i.e., the entry for cell j is the entry
1417  // for cell renumbering[j] in the original ordering.
1419  0,
1420  task_info.n_active_cells,
1421  [this, &reverse_numbering, &connectivity_dof, &connectivity](
1422  const unsigned int begin, const unsigned int end) {
1423  internal::fill_connectivity(begin,
1424  end,
1425  *this,
1426  reverse_numbering,
1427  connectivity_dof,
1428  connectivity);
1429  },
1430  20);
1431  }
1432 
1433 
1434 
1435  void
1436  DoFInfo::compute_dof_renumbering(
1437  std::vector<types::global_dof_index> &renumbering)
1438  {
1439  const unsigned int locally_owned_size =
1440  vector_partitioner->locally_owned_size();
1441  renumbering.resize(0);
1442  renumbering.resize(locally_owned_size, numbers::invalid_dof_index);
1443 
1444  types::global_dof_index counter = 0;
1445  const unsigned int n_components = start_components.back();
1446  const unsigned int n_cell_batches =
1447  n_vectorization_lanes_filled[dof_access_cell].size();
1448  Assert(n_cell_batches <=
1449  (row_starts.size() - 1) / vectorization_length / n_components,
1450  ExcInternalError());
1451  for (unsigned int cell_no = 0; cell_no < n_cell_batches; ++cell_no)
1452  {
1453  // do not renumber in case we have constraints
1454  if (row_starts[cell_no * n_components * vectorization_length]
1455  .second ==
1456  row_starts[(cell_no + 1) * n_components * vectorization_length]
1457  .second)
1458  {
1459  const unsigned int ndofs =
1460  dofs_per_cell.size() == 1 ?
1461  dofs_per_cell[0] :
1462  (dofs_per_cell[cell_active_fe_index.size() > 0 ?
1463  cell_active_fe_index[cell_no] :
1464  0]);
1465  const unsigned int *dof_ind =
1466  dof_indices.data() +
1467  row_starts[cell_no * n_components * vectorization_length].first;
1468  for (unsigned int i = 0; i < ndofs; ++i)
1469  for (unsigned int j = 0;
1470  j < n_vectorization_lanes_filled[dof_access_cell][cell_no];
1471  ++j)
1472  if (dof_ind[j * ndofs + i] < locally_owned_size)
1473  if (renumbering[dof_ind[j * ndofs + i]] ==
1475  renumbering[dof_ind[j * ndofs + i]] = counter++;
1476  }
1477  }
1478 
1479  AssertIndexRange(counter, locally_owned_size + 1);
1480  for (types::global_dof_index &dof_index : renumbering)
1481  if (dof_index == numbers::invalid_dof_index)
1482  dof_index = counter++;
1483 
1484  // transform indices to global index space
1485  for (types::global_dof_index &dof_index : renumbering)
1486  dof_index = vector_partitioner->local_to_global(dof_index);
1487 
1488  AssertDimension(counter, renumbering.size());
1489  }
1490 
1491 
1492 
1493  std::size_t
1495  {
1496  std::size_t memory = sizeof(*this);
1497  memory +=
1498  (row_starts.capacity() * sizeof(std::pair<unsigned int, unsigned int>));
1499  memory += MemoryConsumption::memory_consumption(dof_indices);
1500  memory +=
1501  MemoryConsumption::memory_consumption(hanging_node_constraint_masks);
1502  memory += MemoryConsumption::memory_consumption(row_starts_plain_indices);
1503  memory += MemoryConsumption::memory_consumption(plain_dof_indices);
1504  memory += MemoryConsumption::memory_consumption(constraint_indicator);
1505  memory += MemoryConsumption::memory_consumption(*vector_partitioner);
1506  return memory;
1507  }
1508  } // namespace MatrixFreeFunctions
1509 } // namespace internal
1510 
1511 namespace internal
1512 {
1513  namespace MatrixFreeFunctions
1514  {
1515  template struct ConstraintValues<double>;
1516 
1517  template unsigned short
1519  const std::vector<std::pair<types::global_dof_index, float>> &);
1520  template unsigned short
1522  const std::vector<std::pair<types::global_dof_index, double>> &);
1523 
1524 
1525  template void
1526  DoFInfo::read_dof_indices<double>(
1527  const std::vector<types::global_dof_index> &,
1528  const std::vector<types::global_dof_index> &,
1529  const bool,
1530  const ::AffineConstraints<double> &,
1531  const unsigned int,
1532  ConstraintValues<double> &,
1533  bool &);
1534 
1535  template void
1536  DoFInfo::read_dof_indices<float>(
1537  const std::vector<types::global_dof_index> &,
1538  const std::vector<types::global_dof_index> &,
1539  const bool,
1540  const ::AffineConstraints<float> &,
1541  const unsigned int,
1542  ConstraintValues<double> &,
1543  bool &);
1544 
1545  template bool
1546  DoFInfo::process_hanging_node_constraints<1>(
1547  const HangingNodes<1> &,
1548  const std::vector<std::vector<unsigned int>> &,
1549  const unsigned int,
1551  std::vector<types::global_dof_index> &);
1552  template bool
1553  DoFInfo::process_hanging_node_constraints<2>(
1554  const HangingNodes<2> &,
1555  const std::vector<std::vector<unsigned int>> &,
1556  const unsigned int,
1558  std::vector<types::global_dof_index> &);
1559  template bool
1560  DoFInfo::process_hanging_node_constraints<3>(
1561  const HangingNodes<3> &,
1562  const std::vector<std::vector<unsigned int>> &,
1563  const unsigned int,
1565  std::vector<types::global_dof_index> &);
1566 
1567  template void
1568  DoFInfo::compute_face_index_compression<1>(
1569  const std::vector<FaceToCellTopology<1>> &);
1570  template void
1571  DoFInfo::compute_face_index_compression<2>(
1572  const std::vector<FaceToCellTopology<2>> &);
1573  template void
1574  DoFInfo::compute_face_index_compression<4>(
1575  const std::vector<FaceToCellTopology<4>> &);
1576  template void
1577  DoFInfo::compute_face_index_compression<8>(
1578  const std::vector<FaceToCellTopology<8>> &);
1579  template void
1580  DoFInfo::compute_face_index_compression<16>(
1581  const std::vector<FaceToCellTopology<16>> &);
1582 
1583  template void
1584  DoFInfo::compute_vector_zero_access_pattern<1>(
1585  const TaskInfo &,
1586  const std::vector<FaceToCellTopology<1>> &);
1587  template void
1588  DoFInfo::compute_vector_zero_access_pattern<2>(
1589  const TaskInfo &,
1590  const std::vector<FaceToCellTopology<2>> &);
1591  template void
1592  DoFInfo::compute_vector_zero_access_pattern<4>(
1593  const TaskInfo &,
1594  const std::vector<FaceToCellTopology<4>> &);
1595  template void
1596  DoFInfo::compute_vector_zero_access_pattern<8>(
1597  const TaskInfo &,
1598  const std::vector<FaceToCellTopology<8>> &);
1599  template void
1600  DoFInfo::compute_vector_zero_access_pattern<16>(
1601  const TaskInfo &,
1602  const std::vector<FaceToCellTopology<16>> &);
1603 
1604  template void
1605  DoFInfo::print_memory_consumption<std::ostream>(std::ostream &,
1606  const TaskInfo &) const;
1607  template void
1608  DoFInfo::print_memory_consumption<ConditionalOStream>(
1610  const TaskInfo &) const;
1611 
1612  template void
1613  DoFInfo::print<double>(const std::vector<double> &,
1614  const std::vector<unsigned int> &,
1615  std::ostream &) const;
1616 
1617  template void
1618  DoFInfo::print<float>(const std::vector<float> &,
1619  const std::vector<unsigned int> &,
1620  std::ostream &) const;
1621  } // namespace MatrixFreeFunctions
1622 } // namespace internal
1623 
size_type index_within_set(const size_type global_index) const
Definition: index_set.h:1923
size_type n_elements() const
Definition: index_set.h:1834
void subtract_set(const IndexSet &other)
Definition: index_set.cc:266
void add_range(const size_type begin, const size_type end)
Definition: index_set.h:1675
void compress() const
Definition: index_set.h:1644
void add_indices(const ForwardIterator &begin, const ForwardIterator &end)
Definition: index_set.h:1705
void add(const size_type i, const size_type j)
unsigned int local_size() const
types::global_dof_index local_to_global(const unsigned int local_index) const
void set_ghost_indices(const IndexSet &ghost_indices, const IndexSet &larger_ghost_index_set=IndexSet())
virtual const MPI_Comm & get_mpi_communicator() const override
types::global_dof_index size() const
const IndexSet & locally_owned_range() const
const IndexSet & ghost_indices() const
#define DEAL_II_NAMESPACE_OPEN
Definition: config.h:442
#define DEAL_II_NAMESPACE_CLOSE
Definition: config.h:443
Point< 2 > second
Definition: grid_out.cc:4604
Point< 2 > first
Definition: grid_out.cc:4603
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcNotInitialized()
#define Assert(cond, exc)
Definition: exceptions.h:1473
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1667
#define AssertIndexRange(index, range)
Definition: exceptions.h:1732
void loop(ITERATOR begin, typename identity< ITERATOR >::type end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
Definition: loop.h:439
void add_entries(const size_type row, ForwardIterator begin, ForwardIterator end, const bool indices_are_unique_and_sorted=false)
void shift(const Tensor< 1, spacedim > &shift_vector, Triangulation< dim, spacedim > &triangulation)
Definition: grid_tools.cc:2050
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
void swap(MemorySpaceData< Number, MemorySpace > &, MemorySpaceData< Number, MemorySpace > &)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
Tensor< 2, dim, Number > l(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
VectorType::value_type * begin(VectorType &V)
VectorType::value_type * end(VectorType &V)
std::vector< Integer > invert_permutation(const std::vector< Integer > &permutation)
Definition: utilities.h:1813
void fill_connectivity_dofs(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &row_lengths, std::vector< std::mutex > &mutexes, ::SparsityPattern &connectivity_dof)
Definition: dof_info.cc:1285
void fill_connectivity(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &renumbering, const ::SparsityPattern &connectivity_dof, DynamicSparsityPattern &connectivity)
Definition: dof_info.cc:1323
void compute_row_lengths(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, std::vector< std::mutex > &mutexes, std::vector< unsigned int > &row_lengths)
Definition: dof_info.cc:1246
static constexpr unsigned int bucket_size_threading
Definition: dof_info.cc:1241
constexpr compressed_constraint_kind unconstrained_compressed_constraint_kind
unsigned int n_active_cells(const internal::TriangulationImplementation::NumberCache< 1 > &c)
Definition: tria.cc:13741
void copy(const T *begin, const T *end, U *dest)
static const unsigned int invalid_unsigned_int
Definition: types.h:201
const types::global_dof_index invalid_dof_index
Definition: types.h:216
void apply_to_subranges(const tbb::blocked_range< RangeType > &range, const Function &f)
Definition: parallel.h:302
unsigned int global_dof_index
Definition: types.h:76
unsigned short insert_entries(const std::vector< std::pair< types::global_dof_index, number2 >> &entries)
void reorder_cells(const TaskInfo &task_info, const std::vector< unsigned int > &renumbering, const std::vector< unsigned int > &constraint_pool_row_index, const std::vector< unsigned char > &irregular_cells)
Definition: dof_info.cc:296
std::vector< std::pair< unsigned short, unsigned short > > constraint_indicator
Definition: dof_info.h:566
std::vector< std::pair< unsigned int, unsigned int > > row_starts
Definition: dof_info.h:525
std::vector< std::vector< bool > > hanging_node_constraint_masks_comp
Definition: dof_info.h:548
void get_dof_indices_on_cell_batch(std::vector< unsigned int > &local_indices, const unsigned int cell_batch, const bool with_constraints=true) const
Definition: dof_info.cc:80
void compute_cell_index_compression(const std::vector< unsigned char > &irregular_cells)
Definition: dof_info.cc:528
std::vector< unsigned int > dofs_per_cell
Definition: dof_info.h:723
std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base > vector_exchanger
Definition: dof_info.h:628
std::vector< std::vector< unsigned int > > fe_index_conversion
Definition: dof_info.h:751
std::vector< unsigned int > dof_indices
Definition: dof_info.h:542
std::shared_ptr< const Utilities::MPI::Partitioner > vector_partitioner
Definition: dof_info.h:621
std::vector< compressed_constraint_kind > hanging_node_constraint_masks
Definition: dof_info.h:554
std::array< std::vector< unsigned int >, 3 > dof_indices_interleave_strides
Definition: dof_info.h:602
std::vector< unsigned int > row_starts_plain_indices
Definition: dof_info.h:665
std::vector< unsigned int > dofs_per_face
Definition: dof_info.h:728
std::vector< unsigned int > n_components
Definition: dof_info.h:693
void assign_ghosts(const std::vector< unsigned int > &boundary_cells, const MPI_Comm &communicator_sm, const bool use_vector_data_exchanger_full)
Definition: dof_info.cc:153
std::vector< types::global_dof_index > ghost_dofs
Definition: dof_info.h:758
std::array< std::vector< unsigned int >, 3 > dof_indices_contiguous
Definition: dof_info.h:581
void compute_tight_partitioners(const Table< 2, ShapeInfo< double >> &shape_info, const unsigned int n_owned_cells, const unsigned int n_lanes, const std::vector< FaceToCellTopology< 1 >> &inner_faces, const std::vector< FaceToCellTopology< 1 >> &ghosted_faces, const bool fill_cell_centric, const MPI_Comm &communicator_sm, const bool use_vector_data_exchanger_full)
Definition: dof_info.cc:860
std::vector< unsigned int > cell_active_fe_index
Definition: dof_info.h:738
std::array< std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base >, 5 > vector_exchanger_face_variants
Definition: dof_info.h:653
std::vector< unsigned int > plain_dof_indices
Definition: dof_info.h:675
std::array< std::vector< unsigned char >, 3 > n_vectorization_lanes_filled
Definition: dof_info.h:613
std::vector< unsigned int > start_components
Definition: dof_info.h:699
std::vector< unsigned int > dof_indices_interleaved
Definition: dof_info.h:571
std::array< std::vector< IndexStorageVariants >, 3 > index_storage_variants
Definition: dof_info.h:517
std::vector< unsigned int > cell_partition_data
Definition: task_info.h:477