Reference documentation for deal.II version 9.4.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
task_info.cc
Go to the documentation of this file.
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2018 - 2022 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
19 #include <deal.II/base/mpi.h>
21 #include <deal.II/base/parallel.h>
22 #include <deal.II/base/utilities.h>
23 
25 
27 
28 
29 #ifdef DEAL_II_WITH_TBB
30 # include <tbb/blocked_range.h>
31 # include <tbb/parallel_for.h>
32 # include <tbb/task.h>
33 # ifndef DEAL_II_TBB_WITH_ONEAPI
34 # include <tbb/task_scheduler_init.h>
35 # endif
36 #endif
37 
38 #include <iostream>
39 #include <set>
40 
41 //
42 // TBB with oneAPI API has deprecated and removed the
43 // <code>tbb::tasks</code> backend. With this it is no longer possible to
44 // compile the following code that builds a directed acyclic graph (DAG) of
45 // (thread parallel) tasks without a major porting effort. It turned out
46 // that such a dynamic handling of dependencies and structures is not as
47 // competitive as initially assumed. Consequently, this part of the matrix
48 // free infrastructure has seen less attention than the rest over the last
49 // years and is (presumably) not used that often.
50 //
51 // In case of detected oneAPI backend we simply disable threading in the
52 // matrix free backend for now.
53 //
54 // Matthias Maier, Martin Kronbichler, 2021
55 //
56 
58 
59 
60 
61 /*-------------------- Implementation of the matrix-free loop --------------*/
62 namespace internal
63 {
64  namespace MatrixFreeFunctions
65  {
66 #if defined(DEAL_II_WITH_TBB) && !defined(DEAL_II_TBB_WITH_ONEAPI)
67 
68  // This defines the TBB data structures that are needed to schedule the
69  // partition-partition variant
70 
71  namespace partition
72  {
73  class ActualCellWork
74  {
75  public:
76  ActualCellWork(MFWorkerInterface **worker_pointer,
77  const unsigned int partition,
78  const TaskInfo & task_info)
79  : worker(nullptr)
80  , worker_pointer(worker_pointer)
82  , task_info(task_info)
83  {}
84 
85  ActualCellWork(MFWorkerInterface &worker,
86  const unsigned int partition,
87  const TaskInfo & task_info)
88  : worker(&worker)
89  , worker_pointer(nullptr)
91  , task_info(task_info)
92  {}
93 
94  void
95  operator()() const
96  {
97  MFWorkerInterface *used_worker =
98  worker != nullptr ? worker : *worker_pointer;
99  Assert(used_worker != nullptr, ExcInternalError());
100  used_worker->cell(partition);
101 
102  if (task_info.face_partition_data.empty() == false)
103  {
104  used_worker->face(partition);
105  used_worker->boundary(partition);
106  }
107  }
108 
109  private:
110  MFWorkerInterface * worker;
111  MFWorkerInterface **worker_pointer;
112  const unsigned int partition;
113  const TaskInfo & task_info;
114  };
115 
116  class CellWork : public tbb::task
117  {
118  public:
119  CellWork(MFWorkerInterface &worker,
120  const unsigned int partition,
121  const TaskInfo & task_info,
122  const bool is_blocked)
123  : dummy(nullptr)
124  , work(worker, partition, task_info)
125  , is_blocked(is_blocked)
126  {}
127 
128  tbb::task *
129  execute() override
130  {
131  work();
132 
133  if (is_blocked == true)
134  tbb::empty_task::spawn(*dummy);
135  return nullptr;
136  }
137 
138  tbb::empty_task *dummy;
139 
140  private:
141  ActualCellWork work;
142  const bool is_blocked;
143  };
144 
145 
146 
147  class PartitionWork : public tbb::task
148  {
149  public:
150  PartitionWork(MFWorkerInterface &function_in,
151  const unsigned int partition_in,
152  const TaskInfo & task_info_in,
153  const bool is_blocked_in = false)
154  : dummy(nullptr)
155  , function(function_in)
156  , partition(partition_in)
157  , task_info(task_info_in)
158  , is_blocked(is_blocked_in)
159  {}
160 
161  tbb::task *
162  execute() override
163  {
164  tbb::empty_task *root =
165  new (tbb::task::allocate_root()) tbb::empty_task;
166  const unsigned int evens = task_info.partition_evens[partition];
167  const unsigned int odds = task_info.partition_odds[partition];
168  const unsigned int n_blocked_workers =
169  task_info.partition_n_blocked_workers[partition];
170  const unsigned int n_workers =
171  task_info.partition_n_workers[partition];
172  std::vector<CellWork *> worker(n_workers);
173  std::vector<CellWork *> blocked_worker(n_blocked_workers);
174 
175  root->set_ref_count(evens + 1);
176  for (unsigned int j = 0; j < evens; ++j)
177  {
178  worker[j] = new (root->allocate_child())
179  CellWork(function,
180  task_info.partition_row_index[partition] + 2 * j,
181  task_info,
182  false);
183  if (j > 0)
184  {
185  worker[j]->set_ref_count(2);
186  blocked_worker[j - 1]->dummy =
187  new (worker[j]->allocate_child()) tbb::empty_task;
188  tbb::task::spawn(*blocked_worker[j - 1]);
189  }
190  else
191  worker[j]->set_ref_count(1);
192  if (j < evens - 1)
193  {
194  blocked_worker[j] = new (worker[j]->allocate_child())
195  CellWork(function,
196  task_info.partition_row_index[partition] + 2 * j +
197  1,
198  task_info,
199  true);
200  }
201  else
202  {
203  if (odds == evens)
204  {
205  worker[evens] = new (worker[j]->allocate_child())
206  CellWork(function,
207  task_info.partition_row_index[partition] +
208  2 * j + 1,
209  task_info,
210  false);
211  tbb::task::spawn(*worker[evens]);
212  }
213  else
214  {
215  tbb::empty_task *child =
216  new (worker[j]->allocate_child()) tbb::empty_task();
217  tbb::task::spawn(*child);
218  }
219  }
220  }
221 
222  root->wait_for_all();
223  root->destroy(*root);
224  if (is_blocked == true)
225  tbb::empty_task::spawn(*dummy);
226  return nullptr;
227  }
228 
229  tbb::empty_task *dummy;
230 
231  private:
232  MFWorkerInterface &function;
233  const unsigned int partition;
234  const TaskInfo & task_info;
235  const bool is_blocked;
236  };
237 
238  } // end of namespace partition
239 
240 
241 
242  namespace color
243  {
244  class CellWork
245  {
246  public:
247  CellWork(MFWorkerInterface &worker_in,
248  const TaskInfo & task_info_in,
249  const unsigned int partition_in)
250  : worker(worker_in)
251  , task_info(task_info_in)
252  , partition(partition_in)
253  {}
254 
255  void
256  operator()(const tbb::blocked_range<unsigned int> &r) const
257  {
258  const unsigned int start_index =
259  task_info.cell_partition_data[partition] +
260  task_info.block_size * r.begin();
261  const unsigned int end_index =
262  std::min(start_index + task_info.block_size * (r.end() - r.begin()),
263  task_info.cell_partition_data[partition + 1]);
264  worker.cell(std::make_pair(start_index, end_index));
265 
266  if (task_info.face_partition_data.empty() == false)
267  {
268  AssertThrow(false, ExcNotImplemented());
269  }
270  }
271 
272  private:
273  MFWorkerInterface &worker;
274  const TaskInfo & task_info;
275  const unsigned int partition;
276  };
277 
278 
279 
280  class PartitionWork : public tbb::task
281  {
282  public:
283  PartitionWork(MFWorkerInterface &worker_in,
284  const unsigned int partition_in,
285  const TaskInfo & task_info_in,
286  const bool is_blocked_in)
287  : dummy(nullptr)
288  , worker(worker_in)
289  , partition(partition_in)
290  , task_info(task_info_in)
291  , is_blocked(is_blocked_in)
292  {}
293 
294  tbb::task *
295  execute() override
296  {
297  const unsigned int n_chunks =
298  (task_info.cell_partition_data[partition + 1] -
299  task_info.cell_partition_data[partition] + task_info.block_size -
300  1) /
301  task_info.block_size;
302  parallel_for(tbb::blocked_range<unsigned int>(0, n_chunks, 1),
303  CellWork(worker, task_info, partition));
304  if (is_blocked == true)
305  tbb::empty_task::spawn(*dummy);
306  return nullptr;
307  }
308 
309  tbb::empty_task *dummy;
310 
311  private:
312  MFWorkerInterface &worker;
313  const unsigned int partition;
314  const TaskInfo & task_info;
315  const bool is_blocked;
316  };
317 
318  } // end of namespace color
319 
320 
321 
322  class MPICommunication : public tbb::task
323  {
324  public:
325  MPICommunication(MFWorkerInterface &worker_in, const bool do_compress)
326  : worker(worker_in)
327  , do_compress(do_compress)
328  {}
329 
330  tbb::task *
331  execute() override
332  {
333  if (do_compress == false)
334  worker.vector_update_ghosts_finish();
335  else
336  worker.vector_compress_start();
337  return nullptr;
338  }
339 
340  private:
341  MFWorkerInterface &worker;
342  const bool do_compress;
343  };
344 
345 #endif // DEAL_II_WITH_TBB
346 
347 
348 
349  void
351  {
352  // If we use thread parallelism, we do not currently support to schedule
353  // pieces of updates within the loop, so this index will collect all
354  // calls in that case and work like a single complete loop over all
355  // cells
356  if (scheme != none)
358  else
359  funct.cell_loop_pre_range(
361 
363 
364 #if defined(DEAL_II_WITH_TBB) && !defined(DEAL_II_TBB_WITH_ONEAPI)
365 
366  if (scheme != none)
367  {
369  if (scheme == partition_partition && evens > 0)
370  {
371  tbb::empty_task *root =
372  new (tbb::task::allocate_root()) tbb::empty_task;
373  root->set_ref_count(evens + 1);
374  std::vector<partition::PartitionWork *> worker(n_workers);
375  std::vector<partition::PartitionWork *> blocked_worker(
377  MPICommunication *worker_compr =
378  new (root->allocate_child()) MPICommunication(funct, true);
379  worker_compr->set_ref_count(1);
380  for (unsigned int j = 0; j < evens; ++j)
381  {
382  if (j > 0)
383  {
384  worker[j] = new (root->allocate_child())
385  partition::PartitionWork(funct, 2 * j, *this, false);
386  worker[j]->set_ref_count(2);
387  blocked_worker[j - 1]->dummy =
388  new (worker[j]->allocate_child()) tbb::empty_task;
389  tbb::task::spawn(*blocked_worker[j - 1]);
390  }
391  else
392  {
393  worker[j] = new (worker_compr->allocate_child())
394  partition::PartitionWork(funct, 2 * j, *this, false);
395  worker[j]->set_ref_count(2);
396  MPICommunication *worker_dist =
397  new (worker[j]->allocate_child())
398  MPICommunication(funct, false);
399  tbb::task::spawn(*worker_dist);
400  }
401  if (j < evens - 1)
402  {
403  blocked_worker[j] = new (worker[j]->allocate_child())
404  partition::PartitionWork(funct, 2 * j + 1, *this, true);
405  }
406  else
407  {
408  if (odds == evens)
409  {
410  worker[evens] = new (worker[j]->allocate_child())
411  partition::PartitionWork(funct,
412  2 * j + 1,
413  *this,
414  false);
415  tbb::task::spawn(*worker[evens]);
416  }
417  else
418  {
419  tbb::empty_task *child =
420  new (worker[j]->allocate_child()) tbb::empty_task();
421  tbb::task::spawn(*child);
422  }
423  }
424  }
425 
426  root->wait_for_all();
427  root->destroy(*root);
428  }
429  else if (scheme == partition_partition)
430  {
431  // catch the case of empty partition list: we still need to call
432  // the vector communication routines to clean up and initiate
433  // things
435  funct.vector_compress_start();
436  }
437  else // end of partition-partition, start of partition-color
438  {
439  // check whether there is only one partition. if not, build up the
440  // tree of partitions
441  if (odds > 0)
442  {
443  tbb::empty_task *root =
444  new (tbb::task::allocate_root()) tbb::empty_task;
445  root->set_ref_count(evens + 1);
446  const unsigned int n_blocked_workers =
447  odds - (odds + evens + 1) % 2;
448  const unsigned int n_workers =
450  std::vector<color::PartitionWork *> worker(n_workers);
451  std::vector<color::PartitionWork *> blocked_worker(
453  unsigned int worker_index = 0, slice_index = 0;
454  int spawn_index_child = -2;
455  MPICommunication *worker_compr =
456  new (root->allocate_child()) MPICommunication(funct, true);
457  worker_compr->set_ref_count(1);
458  for (unsigned int part = 0;
459  part < partition_row_index.size() - 1;
460  part++)
461  {
462  if (part == 0)
463  worker[worker_index] =
464  new (worker_compr->allocate_child())
465  color::PartitionWork(funct,
466  slice_index,
467  *this,
468  false);
469  else
470  worker[worker_index] = new (root->allocate_child())
471  color::PartitionWork(funct,
472  slice_index,
473  *this,
474  false);
475  slice_index++;
476  for (; slice_index < partition_row_index[part + 1];
477  slice_index++)
478  {
479  worker[worker_index]->set_ref_count(1);
480  worker_index++;
481  worker[worker_index] =
482  new (worker[worker_index - 1]->allocate_child())
483  color::PartitionWork(funct,
484  slice_index,
485  *this,
486  false);
487  }
488  worker[worker_index]->set_ref_count(2);
489  if (part > 0)
490  {
491  blocked_worker[(part - 1) / 2]->dummy =
492  new (worker[worker_index]->allocate_child())
493  tbb::empty_task;
494  worker_index++;
495  if (spawn_index_child == -1)
496  tbb::task::spawn(*blocked_worker[(part - 1) / 2]);
497  else
498  {
499  Assert(spawn_index_child >= 0,
500  ExcInternalError());
501  tbb::task::spawn(*worker[spawn_index_child]);
502  }
503  spawn_index_child = -2;
504  }
505  else
506  {
507  MPICommunication *worker_dist =
508  new (worker[worker_index]->allocate_child())
509  MPICommunication(funct, false);
510  tbb::task::spawn(*worker_dist);
511  worker_index++;
512  }
513  part += 1;
514  if (part < partition_row_index.size() - 1)
515  {
516  if (part < partition_row_index.size() - 2)
517  {
518  blocked_worker[part / 2] =
519  new (worker[worker_index - 1]->allocate_child())
520  color::PartitionWork(funct,
521  slice_index,
522  *this,
523  true);
524  slice_index++;
525  if (slice_index < partition_row_index[part + 1])
526  {
527  blocked_worker[part / 2]->set_ref_count(1);
528  worker[worker_index] = new (
529  blocked_worker[part / 2]->allocate_child())
530  color::PartitionWork(funct,
531  slice_index,
532  *this,
533  false);
534  slice_index++;
535  }
536  else
537  {
538  spawn_index_child = -1;
539  continue;
540  }
541  }
542  for (; slice_index < partition_row_index[part + 1];
543  slice_index++)
544  {
545  if (slice_index > partition_row_index[part])
546  {
547  worker[worker_index]->set_ref_count(1);
548  worker_index++;
549  }
550  worker[worker_index] =
551  new (worker[worker_index - 1]->allocate_child())
552  color::PartitionWork(funct,
553  slice_index,
554  *this,
555  false);
556  }
557  spawn_index_child = worker_index;
558  worker_index++;
559  }
560  else
561  {
562  tbb::empty_task *final =
563  new (worker[worker_index - 1]->allocate_child())
564  tbb::empty_task;
565  tbb::task::spawn(*final);
566  spawn_index_child = worker_index - 1;
567  }
568  }
569  if (evens == odds)
570  {
571  Assert(spawn_index_child >= 0, ExcInternalError());
572  tbb::task::spawn(*worker[spawn_index_child]);
573  }
574  root->wait_for_all();
575  root->destroy(*root);
576  }
577  // case when we only have one partition: this is the usual
578  // coloring scheme, and we just schedule a parallel for loop for
579  // each color
580  else
581  {
582  Assert(evens <= 1, ExcInternalError());
584 
585  for (unsigned int color = 0; color < partition_row_index[1];
586  ++color)
587  {
588  tbb::empty_task *root =
589  new (tbb::task::allocate_root()) tbb::empty_task;
590  root->set_ref_count(2);
591  color::PartitionWork *worker =
592  new (root->allocate_child())
593  color::PartitionWork(funct, color, *this, false);
594  tbb::empty_task::spawn(*worker);
595  root->wait_for_all();
596  root->destroy(*root);
597  }
598 
599  funct.vector_compress_start();
600  }
601  }
602  }
603  else
604 #endif
605  // serial loop, go through up to three times and do the MPI transfer at
606  // the beginning/end of the second part
607  {
608  for (unsigned int part = 0; part < partition_row_index.size() - 2;
609  ++part)
610  {
611  if (part == 1)
613 
614  for (unsigned int i = partition_row_index[part];
615  i < partition_row_index[part + 1];
616  ++i)
617  {
618  funct.cell_loop_pre_range(i);
619  funct.zero_dst_vector_range(i);
620  AssertIndexRange(i + 1, cell_partition_data.size());
622  {
623  funct.cell(i);
624  }
625 
626  if (face_partition_data.empty() == false)
627  {
629  funct.face(i);
630  if (boundary_partition_data[i + 1] >
632  funct.boundary(i);
633  }
634  funct.cell_loop_post_range(i);
635  }
636 
637  if (part == 1)
638  funct.vector_compress_start();
639  }
640  }
641  funct.vector_compress_finish();
642 
643  if (scheme != none)
645  else
646  funct.cell_loop_post_range(
648  }
649 
650 
651 
653  {
654  clear();
655  }
656 
657 
658 
659  void
661  {
662  n_active_cells = 0;
663  n_ghost_cells = 0;
665  block_size = 0;
666  n_blocks = 0;
667  scheme = none;
668  partition_row_index.clear();
669  partition_row_index.resize(2);
670  cell_partition_data.clear();
671  face_partition_data.clear();
672  boundary_partition_data.clear();
673  evens = 0;
674  odds = 0;
675  n_blocked_workers = 0;
676  n_workers = 0;
677  partition_evens.clear();
678  partition_odds.clear();
680  partition_n_workers.clear();
681  communicator = MPI_COMM_SELF;
682  my_pid = 0;
683  n_procs = 1;
684  }
685 
686 
687 
688  template <typename StreamType>
689  void
691  const std::size_t data_length) const
692  {
693  Utilities::MPI::MinMaxAvg memory_c =
694  Utilities::MPI::min_max_avg(1e-6 * data_length, communicator);
695  if (n_procs < 2)
696  out << memory_c.min;
697  else
698  out << memory_c.min << "/" << memory_c.avg << "/" << memory_c.max;
699  out << " MB" << std::endl;
700  }
701 
702 
703 
704  std::size_t
706  {
707  return (
708  sizeof(*this) +
717  }
718 
719 
720 
721  void
723  std::vector<unsigned int> &boundary_cells)
724  {
725  // try to make the number of boundary cells divisible by the number of
726  // vectors in vectorization
727  unsigned int fillup_needed =
728  (vectorization_length - boundary_cells.size() % vectorization_length) %
730  if (fillup_needed > 0 && boundary_cells.size() < n_active_cells)
731  {
732  // fill additional cells into the list of boundary cells to get a
733  // balanced number. Go through the indices successively until we
734  // found enough indices
735  std::vector<unsigned int> new_boundary_cells;
736  new_boundary_cells.reserve(boundary_cells.size());
737 
738  unsigned int next_free_slot = 0, bound_index = 0;
739  while (fillup_needed > 0 && bound_index < boundary_cells.size())
740  {
741  if (next_free_slot < boundary_cells[bound_index])
742  {
743  // check if there are enough cells to fill with in the
744  // current slot
745  if (next_free_slot + fillup_needed <=
746  boundary_cells[bound_index])
747  {
748  for (unsigned int j =
749  boundary_cells[bound_index] - fillup_needed;
750  j < boundary_cells[bound_index];
751  ++j)
752  new_boundary_cells.push_back(j);
753  fillup_needed = 0;
754  }
755  // ok, not enough indices, so just take them all up to the
756  // next boundary cell
757  else
758  {
759  for (unsigned int j = next_free_slot;
760  j < boundary_cells[bound_index];
761  ++j)
762  new_boundary_cells.push_back(j);
763  fillup_needed -=
764  boundary_cells[bound_index] - next_free_slot;
765  }
766  }
767  new_boundary_cells.push_back(boundary_cells[bound_index]);
768  next_free_slot = boundary_cells[bound_index] + 1;
769  ++bound_index;
770  }
771  while (fillup_needed > 0 &&
772  (new_boundary_cells.size() == 0 ||
773  new_boundary_cells.back() < n_active_cells - 1))
774  new_boundary_cells.push_back(new_boundary_cells.back() + 1);
775  while (bound_index < boundary_cells.size())
776  new_boundary_cells.push_back(boundary_cells[bound_index++]);
777 
778  boundary_cells.swap(new_boundary_cells);
779  }
780 
781  // set the number of cells
782  std::sort(boundary_cells.begin(), boundary_cells.end());
783 
784  // check that number of boundary cells is divisible by
785  // vectorization_length or that it contains all cells
786  Assert(boundary_cells.size() % vectorization_length == 0 ||
787  boundary_cells.size() == n_active_cells,
788  ExcInternalError());
789  }
790 
791 
792 
793  void
795  const std::vector<unsigned int> &cells_with_comm,
796  const unsigned int dofs_per_cell,
797  const bool categories_are_hp,
798  const std::vector<unsigned int> &cell_vectorization_categories,
799  const bool cell_vectorization_categories_strict,
800  const std::vector<unsigned int> &parent_relation,
801  std::vector<unsigned int> & renumbering,
802  std::vector<unsigned char> & incompletely_filled_vectorization)
803  {
804  Assert(dofs_per_cell > 0, ExcInternalError());
805  // This function is decomposed into several steps to determine a good
806  // ordering that satisfies the following constraints:
807  // a. Only cells belonging to the same category (or next higher if the
808  // cell_vectorization_categories_strict is false) can be grouped into
809  // the same SIMD batch
810  // b. hp-adaptive computations must form contiguous ranges for the same
811  // degree (category) in cell_partition_data
812  // c. We want to group the cells with the same parent in the same SIMD
813  // lane if possible
814  // d. The cell order should be similar to the initial one
815  // e. Form sets without MPI communication and those with to overlap
816  // communication with computation
817  //
818  // These constraints are satisfied by first grouping by the categories
819  // and, within the groups, to distinguish between cells with a parent
820  // and those without. All of this is set up with batches of cells (with
821  // padding if the size does not match). Then we define a vector of
822  // arrays where we define sorting criteria for the cell batches to
823  // satisfy the items b and d together, split by different parts to
824  // satisfy item e.
825 
826  // Give the compiler a chance to detect that vectorization_length is a
827  // power of two, which allows it to replace integer divisions by shifts
828  unsigned int vectorization_length_bits = 0;
829  unsigned int my_length = vectorization_length;
830  while ((my_length >>= 1) != 0u)
831  ++vectorization_length_bits;
832  const unsigned int n_lanes = 1 << vectorization_length_bits;
833 
834  // Step 1: find tight map of categories for not taking exceeding amounts
835  // of memory below. Sort the new categories by the numbers in the
836  // old one to ensure we respect the given rules
837  unsigned int n_categories = 1;
838  std::vector<unsigned int> tight_category_map(n_active_cells, 0);
839  if (cell_vectorization_categories.empty() == false)
840  {
841  AssertDimension(cell_vectorization_categories.size(),
843 
844  std::set<unsigned int> used_categories;
845  for (unsigned int i = 0; i < n_active_cells; ++i)
846  used_categories.insert(cell_vectorization_categories[i]);
847  std::vector<unsigned int> used_categories_vector(
848  used_categories.size());
849  n_categories = 0;
850  for (const auto &it : used_categories)
851  used_categories_vector[n_categories++] = it;
852  for (unsigned int i = 0; i < n_active_cells; ++i)
853  {
854  const unsigned int index =
855  std::lower_bound(used_categories_vector.begin(),
856  used_categories_vector.end(),
857  cell_vectorization_categories[i]) -
858  used_categories_vector.begin();
859  AssertIndexRange(index, used_categories_vector.size());
860  tight_category_map[i] = index;
861  }
862  }
863 
864  // Step 2: Sort the cells by the category. If we want to fill up the
865  // ranges in vectorization, promote some of the cells to a higher
866  // category
867  std::vector<std::vector<unsigned int>> renumbering_category(n_categories);
868  for (unsigned int i = 0; i < n_active_cells; ++i)
869  renumbering_category[tight_category_map[i]].push_back(i);
870 
871  if (cell_vectorization_categories_strict == false && n_categories > 1)
872  for (unsigned int j = n_categories - 1; j > 0; --j)
873  {
874  unsigned int lower_index = j - 1;
875  while ((renumbering_category[j].size() % n_lanes) != 0u)
876  {
877  while (((renumbering_category[j].size() % n_lanes) != 0u) &&
878  !renumbering_category[lower_index].empty())
879  {
880  renumbering_category[j].push_back(
881  renumbering_category[lower_index].back());
882  renumbering_category[lower_index].pop_back();
883  }
884  if (lower_index == 0)
885  break;
886  else
887  --lower_index;
888  }
889  }
890 
891  // Step 3: Use the parent relation to find a good grouping of cells. To
892  // do this, we first put cells of each category defined above into two
893  // bins, those which we know can be grouped together by the given parent
894  // relation and those which cannot
895  std::vector<unsigned int> temporary_numbering;
896  temporary_numbering.reserve(n_active_cells +
897  (n_lanes - 1) * n_categories);
898  const unsigned int n_cells_per_parent =
899  std::count(parent_relation.begin(), parent_relation.end(), 0);
900  std::vector<unsigned int> category_size;
901  for (unsigned int j = 0; j < n_categories; ++j)
902  {
903  std::vector<std::pair<unsigned int, unsigned int>> grouped_cells;
904  std::vector<unsigned int> other_cells;
905  for (const unsigned int cell : renumbering_category[j])
906  if (parent_relation.empty() ||
907  parent_relation[cell] == numbers::invalid_unsigned_int)
908  other_cells.push_back(cell);
909  else
910  grouped_cells.emplace_back(parent_relation[cell], cell);
911 
912  // Compute the number of cells per group
913  std::sort(grouped_cells.begin(), grouped_cells.end());
914  std::vector<unsigned int> n_cells_per_group;
915  unsigned int length = 0;
916  for (unsigned int i = 0; i < grouped_cells.size(); ++i, ++length)
917  if (i > 0 && grouped_cells[i].first != grouped_cells[i - 1].first)
918  {
919  n_cells_per_group.push_back(length);
920  length = 0;
921  }
922  if (length > 0)
923  n_cells_per_group.push_back(length);
924 
925  // Move groups that do not have the complete size (due to
926  // categories) to the 'other_cells'. The cells with correct group
927  // size are immediately appended to the temporary cell numbering
928  auto group_it = grouped_cells.begin();
929  for (unsigned int length : n_cells_per_group)
930  if (length < n_cells_per_parent)
931  for (unsigned int j = 0; j < length; ++j)
932  other_cells.push_back((group_it++)->second);
933  else
934  {
935  // we should not have more cells in a group than in the first
936  // check we did above
937  AssertDimension(length, n_cells_per_parent);
938  for (unsigned int j = 0; j < length; ++j)
939  temporary_numbering.push_back((group_it++)->second);
940  }
941 
942  // Sort the remaining cells and append them as well
943  std::sort(other_cells.begin(), other_cells.end());
944  temporary_numbering.insert(temporary_numbering.end(),
945  other_cells.begin(),
946  other_cells.end());
947 
948  while (temporary_numbering.size() % n_lanes != 0)
949  temporary_numbering.push_back(numbers::invalid_unsigned_int);
950 
951  category_size.push_back(temporary_numbering.size());
952  }
953 
954  // Step 4: Identify the batches with cells marked as "comm"
955  std::vector<bool> batch_with_comm(temporary_numbering.size() / n_lanes,
956  false);
957  std::vector<unsigned int> temporary_numbering_inverse(n_active_cells);
958  for (unsigned int i = 0; i < temporary_numbering.size(); ++i)
959  if (temporary_numbering[i] != numbers::invalid_unsigned_int)
960  temporary_numbering_inverse[temporary_numbering[i]] = i;
961  for (const unsigned int cell : cells_with_comm)
962  batch_with_comm[temporary_numbering_inverse[cell] / n_lanes] = true;
963 
964  // Step 5: Sort the batches of cells by their last cell index to get
965  // good locality, assuming that the initial cell order is of good
966  // locality. In case we have hp-calculations with categories, we need to
967  // sort also by the category.
968  std::vector<std::array<unsigned int, 3>> batch_order;
969  std::vector<std::array<unsigned int, 3>> batch_order_comm;
970  for (unsigned int i = 0; i < temporary_numbering.size(); i += n_lanes)
971  {
972  unsigned int max_index = 0;
973  for (unsigned int j = 0; j < n_lanes; ++j)
974  if (temporary_numbering[i + j] < numbers::invalid_unsigned_int)
975  max_index = std::max(temporary_numbering[i + j], max_index);
976  const unsigned int category_hp =
977  categories_are_hp ?
978  std::upper_bound(category_size.begin(), category_size.end(), i) -
979  category_size.begin() :
980  0;
981  const std::array<unsigned int, 3> next{{category_hp, max_index, i}};
982  if (batch_with_comm[i / n_lanes])
983  batch_order_comm.emplace_back(next);
984  else
985  batch_order.emplace_back(next);
986  }
987 
988  std::sort(batch_order.begin(), batch_order.end());
989  std::sort(batch_order_comm.begin(), batch_order_comm.end());
990 
991  // Step 6: Put the cells with communication in the middle of the cell
992  // range. For the MPI case, we need three groups to enable overlap for
993  // communication and computation (part before comm, part with comm, part
994  // after comm), whereas we need one for the other case. And in each
995  // case, we allow for a slot of "ghosted" cells.
996  std::vector<unsigned int> blocks;
997  if (n_procs == 1)
998  {
999  if (batch_order.empty())
1000  std::swap(batch_order_comm, batch_order);
1001  Assert(batch_order_comm.empty(), ExcInternalError());
1002  partition_row_index.resize(3);
1003  blocks = {0, static_cast<unsigned int>(batch_order.size())};
1004  }
1005  else
1006  {
1007  partition_row_index.resize(5);
1008  const unsigned int comm_begin = batch_order.size() / 2;
1009  batch_order.insert(batch_order.begin() + comm_begin,
1010  batch_order_comm.begin(),
1011  batch_order_comm.end());
1012  const unsigned int comm_end = comm_begin + batch_order_comm.size();
1013  const unsigned int end = batch_order.size();
1014  blocks = {0, comm_begin, comm_end, end};
1015  }
1016 
1017  // Step 7: Fill in the data by batches for the locally owned cells.
1018  const unsigned int n_cell_batches = batch_order.size();
1019  const unsigned int n_ghost_batches =
1020  (n_ghost_cells + n_lanes - 1) / n_lanes;
1021  incompletely_filled_vectorization.resize(n_cell_batches +
1022  n_ghost_batches);
1023 
1024  cell_partition_data.clear();
1025  cell_partition_data.resize(1, 0);
1026 
1027  renumbering.clear();
1028  renumbering.resize(n_active_cells + n_ghost_cells,
1030 
1031  unsigned int counter = 0;
1032  for (unsigned int block = 0; block < blocks.size() - 1; ++block)
1033  {
1034  const unsigned int grain_size =
1035  std::max((2048U / dofs_per_cell) / 8 * 4, 2U);
1036  for (unsigned int k = blocks[block]; k < blocks[block + 1];
1037  k += grain_size)
1038  cell_partition_data.push_back(
1039  std::min(k + grain_size, blocks[block + 1]));
1040  partition_row_index[block + 1] = cell_partition_data.size() - 1;
1041 
1042  // Set the numbering according to the reordered temporary one
1043  for (unsigned int k = blocks[block]; k < blocks[block + 1]; ++k)
1044  {
1045  const unsigned int pos = batch_order[k][2];
1046  unsigned int j = 0;
1047  for (; j < n_lanes && temporary_numbering[pos + j] !=
1049  ++j)
1050  renumbering[counter++] = temporary_numbering[pos + j];
1051  if (j < n_lanes)
1052  incompletely_filled_vectorization[k] = j;
1053  }
1054  }
1055  AssertDimension(counter, n_active_cells);
1056 
1057  // Step 8: Treat the ghost cells
1058  for (unsigned int cell = n_active_cells;
1059  cell < n_active_cells + n_ghost_cells;
1060  ++cell)
1061  {
1062  if (!cell_vectorization_categories.empty())
1063  AssertDimension(cell_vectorization_categories[cell],
1064  cell_vectorization_categories[n_active_cells]);
1065  renumbering[cell] = cell;
1066  }
1067  if ((n_ghost_cells % n_lanes) != 0u)
1068  incompletely_filled_vectorization.back() = n_ghost_cells % n_lanes;
1069  cell_partition_data.push_back(n_cell_batches + n_ghost_batches);
1070  partition_row_index.back() = cell_partition_data.size() - 1;
1071 
1072 #ifdef DEBUG
1073  std::vector<unsigned int> renumber_cpy(renumbering);
1074  std::sort(renumber_cpy.begin(), renumber_cpy.end());
1075  for (unsigned int i = 0; i < renumber_cpy.size(); ++i)
1076  AssertDimension(i, renumber_cpy[i]);
1077 #endif
1078  }
1079 
1080 
1081 
1082  void
1084  const std::vector<unsigned int> &boundary_cells,
1085  std::vector<unsigned int> & renumbering,
1086  std::vector<unsigned char> & incompletely_filled_vectorization)
1087  {
1088  const unsigned int n_cell_batches =
1090  const unsigned int n_ghost_slots =
1092  incompletely_filled_vectorization.resize(n_cell_batches + n_ghost_slots);
1093  if (n_cell_batches * vectorization_length > n_active_cells)
1094  incompletely_filled_vectorization[n_cell_batches - 1] =
1096  (n_cell_batches * vectorization_length - n_active_cells);
1097  if (n_ghost_slots * vectorization_length > n_ghost_cells)
1098  incompletely_filled_vectorization[n_cell_batches + n_ghost_slots - 1] =
1100  (n_ghost_slots * vectorization_length - n_ghost_cells);
1101 
1102  std::vector<unsigned int> reverse_numbering(
1104  for (unsigned int j = 0; j < boundary_cells.size(); ++j)
1105  reverse_numbering[boundary_cells[j]] = j;
1106  unsigned int counter = boundary_cells.size();
1107  for (unsigned int j = 0; j < n_active_cells; ++j)
1108  if (reverse_numbering[j] == numbers::invalid_unsigned_int)
1109  reverse_numbering[j] = counter++;
1110 
1111  AssertDimension(counter, n_active_cells);
1112  renumbering = Utilities::invert_permutation(reverse_numbering);
1113 
1114  for (unsigned int j = n_active_cells; j < n_active_cells + n_ghost_cells;
1115  ++j)
1116  renumbering.push_back(j);
1117 
1118  // TODO: might be able to simplify this code by not relying on the cell
1119  // partition data while computing the thread graph
1120  cell_partition_data.clear();
1121  cell_partition_data.push_back(0);
1122  if (n_procs > 1)
1123  {
1124  const unsigned int n_macro_boundary_cells =
1125  (boundary_cells.size() + vectorization_length - 1) /
1127  cell_partition_data.push_back(
1128  (n_cell_batches - n_macro_boundary_cells) / 2);
1130  n_macro_boundary_cells);
1131  }
1132  else
1133  AssertDimension(boundary_cells.size(), 0);
1134  cell_partition_data.push_back(n_cell_batches);
1135  cell_partition_data.push_back(cell_partition_data.back() + n_ghost_slots);
1136  partition_row_index.resize(n_procs > 1 ? 4 : 2);
1137  partition_row_index[0] = 0;
1138  partition_row_index[1] = 1;
1139  if (n_procs > 1)
1140  {
1141  partition_row_index[2] = 2;
1142  partition_row_index[3] = 3;
1143  }
1144  }
1145 
1146 
1147 
1148  void
1149  TaskInfo::guess_block_size(const unsigned int dofs_per_cell)
1150  {
1151  // user did not say a positive number, so we have to guess
1152  if (block_size == 0)
1153  {
1154  // we would like to have enough work to do, so as first guess, try
1155  // to get 16 times as many chunks as we have threads on the system.
1158 
1159  // if there are too few degrees of freedom per cell, need to
1160  // increase the block size
1161  const unsigned int minimum_parallel_grain_size = 200;
1162  if (dofs_per_cell * block_size < minimum_parallel_grain_size)
1163  block_size = (minimum_parallel_grain_size / dofs_per_cell + 1);
1164  if (dofs_per_cell * block_size > 10000)
1165  block_size /= 4;
1166 
1167  block_size =
1168  1 << static_cast<unsigned int>(std::log2(block_size + 1));
1169  }
1170  if (block_size > n_active_cells)
1172  }
1173 
1174 
1175 
1176  void
1178  DynamicSparsityPattern & connectivity_large,
1179  std::vector<unsigned int> & renumbering,
1180  std::vector<unsigned char> &irregular_cells,
1181  const bool)
1182  {
1183  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1184  if (n_cell_batches == 0)
1185  return;
1186 
1188 
1189  unsigned int partition = 0, counter = 0;
1190 
1191  // Create connectivity graph for blocks based on connectivity graph for
1192  // cells.
1193  DynamicSparsityPattern connectivity(n_blocks, n_blocks);
1194  make_connectivity_cells_to_blocks(irregular_cells,
1195  connectivity_large,
1196  connectivity);
1197 
1198  // Create cell-block partitioning.
1199 
1200  // For each block of cells, this variable saves to which partitions the
1201  // block belongs. Initialize all to -1 to mark them as not yet assigned
1202  // a partition.
1203  std::vector<unsigned int> cell_partition(n_blocks,
1205 
1206  // In element j of this variable, one puts the old number of the block
1207  // that should be the jth block in the new numeration.
1208  std::vector<unsigned int> partition_list(n_blocks, 0);
1209  std::vector<unsigned int> partition_color_list(n_blocks, 0);
1210 
1211  // This vector points to the start of each partition.
1212  std::vector<unsigned int> partition_size(2, 0);
1213 
1214  // blocking_connectivity = true;
1215 
1216  // The cluster_size in make_partitioning defines that the no. of cells
1217  // in each partition should be a multiple of cluster_size.
1218  unsigned int cluster_size = 1;
1219 
1220  // Make the partitioning of the first layer of the blocks of cells.
1221  make_partitioning(connectivity,
1222  cluster_size,
1223  cell_partition,
1224  partition_list,
1225  partition_size,
1226  partition);
1227 
1228  // Color the cells within each partition
1230  partition,
1231  cell_partition,
1232  partition_list,
1233  partition_size,
1234  partition_color_list);
1235 
1236  partition_list = renumbering;
1237 
1238 #ifdef DEBUG
1239  // in debug mode, check that the partition color list is one-to-one
1240  {
1241  std::vector<unsigned int> sorted_pc_list(partition_color_list);
1242  std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1243  for (unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1244  Assert(sorted_pc_list[i] == i, ExcInternalError());
1245  }
1246 #endif
1247 
1248  // set the start list for each block and compute the renumbering of
1249  // cells
1250  std::vector<unsigned int> block_start(n_cell_batches + 1);
1251  std::vector<unsigned char> irregular(n_cell_batches);
1252 
1253  unsigned int mcell_start = 0;
1254  block_start[0] = 0;
1255  for (unsigned int block = 0; block < n_blocks; ++block)
1256  {
1257  block_start[block + 1] = block_start[block];
1258  for (unsigned int mcell = mcell_start;
1259  mcell < std::min(mcell_start + block_size, n_cell_batches);
1260  ++mcell)
1261  {
1262  unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1263  irregular_cells[mcell] :
1265  block_start[block + 1] += n_comp;
1266  ++counter;
1267  }
1268  mcell_start += block_size;
1269  }
1270  counter = 0;
1271  unsigned int counter_macro = 0;
1272  unsigned int block_size_last =
1273  n_cell_batches - block_size * (n_blocks - 1);
1274  if (block_size_last == 0)
1275  block_size_last = block_size;
1276 
1277  unsigned int tick = 0;
1278  for (unsigned int block = 0; block < n_blocks; ++block)
1279  {
1280  unsigned int present_block = partition_color_list[block];
1281  for (unsigned int cell = block_start[present_block];
1282  cell < block_start[present_block + 1];
1283  ++cell)
1284  renumbering[counter++] = partition_list[cell];
1285  unsigned int this_block_size =
1286  (present_block == n_blocks - 1) ? block_size_last : block_size;
1287 
1288  // Also re-compute the content of cell_partition_data to
1289  // contain the numbers of cells, not blocks
1290  if (cell_partition_data[tick] == block)
1291  cell_partition_data[tick++] = counter_macro;
1292 
1293  for (unsigned int j = 0; j < this_block_size; ++j)
1294  irregular[counter_macro++] =
1295  irregular_cells[present_block * block_size + j];
1296  }
1297  AssertDimension(tick + 1, cell_partition_data.size());
1298  cell_partition_data.back() = counter_macro;
1299 
1300  irregular_cells.swap(irregular);
1301  AssertDimension(counter, n_active_cells);
1302  AssertDimension(counter_macro, n_cell_batches);
1303 
1304  // check that the renumbering is one-to-one
1305 #ifdef DEBUG
1306  {
1307  std::vector<unsigned int> sorted_renumbering(renumbering);
1308  std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1309  for (unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1310  Assert(sorted_renumbering[i] == i, ExcInternalError());
1311  }
1312 #endif
1313 
1314 
1316  partition); // Actually sets too much for partition color case
1317 
1318  AssertDimension(cell_partition_data.back(), n_cell_batches);
1319  }
1320 
1321 
1322 
1323  void
1325  const std::vector<unsigned int> &cell_active_fe_index,
1326  DynamicSparsityPattern & connectivity,
1327  std::vector<unsigned int> & renumbering,
1328  std::vector<unsigned char> & irregular_cells,
1329  const bool hp_bool)
1330  {
1331  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1332  if (n_cell_batches == 0)
1333  return;
1334 
1336 
1337  // if we want to block before partitioning, create connectivity graph
1338  // for blocks based on connectivity graph for cells.
1339  DynamicSparsityPattern connectivity_blocks(n_blocks, n_blocks);
1340  make_connectivity_cells_to_blocks(irregular_cells,
1341  connectivity,
1342  connectivity_blocks);
1343 
1344  unsigned int n_blocks = 0;
1345  if (scheme == partition_color ||
1346  scheme == color) // blocking_connectivity == true
1347  n_blocks = this->n_blocks;
1348  else
1349  n_blocks = n_active_cells;
1350 
1351  // For each block of cells, this variable saves to which partitions the
1352  // block belongs. Initialize all to -1 to mark them as not yet assigned
1353  // a partition.
1354  std::vector<unsigned int> cell_partition(n_blocks,
1356 
1357  // In element j of this variable, one puts the old number (but after
1358  // renumbering according to the input renumbering) of the block that
1359  // should be the jth block in the new numeration.
1360  std::vector<unsigned int> partition_list(n_blocks, 0);
1361  std::vector<unsigned int> partition_2layers_list(n_blocks, 0);
1362 
1363  // This vector points to the start of each partition.
1364  std::vector<unsigned int> partition_size(2, 0);
1365 
1366  unsigned int partition = 0;
1367 
1368  // Within the partitions we want to be able to block for the case that
1369  // we do not block already in the connectivity. The cluster_size in
1370  // make_partitioning defines that the no. of cells in each partition
1371  // should be a multiple of cluster_size.
1372  unsigned int cluster_size = 1;
1373  if (scheme == partition_partition)
1374  cluster_size = block_size * vectorization_length;
1375 
1376  // Make the partitioning of the first layer of the blocks of cells.
1377  if (scheme == partition_color || scheme == color)
1378  make_partitioning(connectivity_blocks,
1379  cluster_size,
1380  cell_partition,
1381  partition_list,
1382  partition_size,
1383  partition);
1384  else
1385  make_partitioning(connectivity,
1386  cluster_size,
1387  cell_partition,
1388  partition_list,
1389  partition_size,
1390  partition);
1391 
1392  // Partition or color second layer
1393  if (scheme == partition_partition)
1394 
1395  {
1396  // Partition within partitions.
1398  connectivity,
1399  cell_active_fe_index,
1400  partition,
1401  cluster_size,
1402  hp_bool,
1403  cell_partition,
1404  partition_list,
1405  partition_size,
1406  partition_2layers_list,
1407  irregular_cells);
1408  }
1409  else if (scheme == partition_color || scheme == color)
1410  {
1411  make_coloring_within_partitions_pre_blocked(connectivity_blocks,
1412  partition,
1413  cell_partition,
1414  partition_list,
1415  partition_size,
1416  partition_2layers_list);
1417  }
1418 
1419  // in debug mode, check that the partition_2layers_list is one-to-one
1420 #ifdef DEBUG
1421  {
1422  std::vector<unsigned int> sorted_pc_list(partition_2layers_list);
1423  std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1424  for (unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1425  Assert(sorted_pc_list[i] == i, ExcInternalError());
1426  }
1427 #endif
1428 
1429  // Set the new renumbering
1430  std::vector<unsigned int> renumbering_in(n_active_cells, 0);
1431  renumbering_in.swap(renumbering);
1432  if (scheme == partition_partition) // blocking_connectivity == false
1433  {
1434  // This is the simple case. The renumbering is just a combination of
1435  // the renumbering that we were given as an input and the
1436  // renumbering of partition/coloring given in partition_2layers_list
1437  for (unsigned int j = 0; j < renumbering.size(); ++j)
1438  renumbering[j] = renumbering_in[partition_2layers_list[j]];
1439  // Account for the ghost cells, finally.
1440  for (unsigned int i = 0; i < n_ghost_cells; ++i)
1441  renumbering.push_back(i + n_active_cells);
1442  }
1443  else
1444  {
1445  // set the start list for each block and compute the renumbering of
1446  // cells
1447  std::vector<unsigned int> block_start(n_cell_batches + 1);
1448  std::vector<unsigned char> irregular(n_cell_batches);
1449 
1450  unsigned int counter = 0;
1451  unsigned int mcell_start = 0;
1452  block_start[0] = 0;
1453  for (unsigned int block = 0; block < n_blocks; ++block)
1454  {
1455  block_start[block + 1] = block_start[block];
1456  for (unsigned int mcell = mcell_start;
1457  mcell < std::min(mcell_start + block_size, n_cell_batches);
1458  ++mcell)
1459  {
1460  unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1461  irregular_cells[mcell] :
1463  block_start[block + 1] += n_comp;
1464  ++counter;
1465  }
1466  mcell_start += block_size;
1467  }
1468  counter = 0;
1469  unsigned int counter_macro = 0;
1470  unsigned int block_size_last =
1471  n_cell_batches - block_size * (n_blocks - 1);
1472  if (block_size_last == 0)
1473  block_size_last = block_size;
1474 
1475  unsigned int tick = 0;
1476  for (unsigned int block = 0; block < n_blocks; ++block)
1477  {
1478  unsigned int present_block = partition_2layers_list[block];
1479  for (unsigned int cell = block_start[present_block];
1480  cell < block_start[present_block + 1];
1481  ++cell)
1482  renumbering[counter++] = renumbering_in[cell];
1483  unsigned int this_block_size =
1484  (present_block == n_blocks - 1) ? block_size_last : block_size;
1485 
1486  // Also re-compute the content of cell_partition_data to
1487  // contain the numbers of cells, not blocks
1488  if (cell_partition_data[tick] == block)
1489  cell_partition_data[tick++] = counter_macro;
1490 
1491  for (unsigned int j = 0; j < this_block_size; ++j)
1492  irregular[counter_macro++] =
1493  irregular_cells[present_block * block_size + j];
1494  }
1495  AssertDimension(tick + 1, cell_partition_data.size());
1496  cell_partition_data.back() = counter_macro;
1497 
1498  irregular_cells.swap(irregular);
1499  AssertDimension(counter, n_active_cells);
1500  AssertDimension(counter_macro, n_cell_batches);
1501  // check that the renumbering is one-to-one
1502 #ifdef DEBUG
1503  {
1504  std::vector<unsigned int> sorted_renumbering(renumbering);
1505  std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1506  for (unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1507  Assert(sorted_renumbering[i] == i, ExcInternalError());
1508  }
1509 #endif
1510  }
1511 
1512  // Update the task_info with the more information for the thread graph.
1514  }
1515 
1516 
1517 
1518  void
1520  const std::vector<unsigned int> &cell_active_fe_index,
1521  DynamicSparsityPattern & connectivity,
1522  std::vector<unsigned int> & renumbering,
1523  std::vector<unsigned char> & irregular_cells,
1524  const bool hp_bool)
1525  {
1526  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1527  if (n_cell_batches == 0)
1528  return;
1529 
1530  const unsigned int cluster_size = block_size * vectorization_length;
1531 
1532  // Create cell-block partitioning.
1533 
1534  // For each block of cells, this variable saves to which partitions the
1535  // block belongs. Initialize all to n_cell_batches to mark them as not
1536  // yet assigned a partition.
1537  std::vector<unsigned int> cell_partition(n_active_cells,
1539 
1540 
1541  // In element j of this variable, one puts the old number of the block
1542  // that should be the jth block in the new numeration.
1543  std::vector<unsigned int> partition_list(n_active_cells, 0);
1544  std::vector<unsigned int> partition_partition_list(n_active_cells, 0);
1545 
1546  // This vector points to the start of each partition.
1547  std::vector<unsigned int> partition_size(2, 0);
1548 
1549  unsigned int partition = 0;
1550  // Here, we do not block inside the connectivity graph
1551  // blocking_connectivity = false;
1552 
1553  // Make the partitioning of the first layer of the blocks of cells.
1554  make_partitioning(connectivity,
1555  cluster_size,
1556  cell_partition,
1557  partition_list,
1558  partition_size,
1559  partition);
1560 
1561  // Partition within partitions.
1563  cell_active_fe_index,
1564  partition,
1565  cluster_size,
1566  hp_bool,
1567  cell_partition,
1568  partition_list,
1569  partition_size,
1570  partition_partition_list,
1571  irregular_cells);
1572 
1573  partition_list.swap(renumbering);
1574 
1575  for (unsigned int j = 0; j < renumbering.size(); ++j)
1576  renumbering[j] = partition_list[partition_partition_list[j]];
1577 
1578  for (unsigned int i = 0; i < n_ghost_cells; ++i)
1579  renumbering.push_back(i + n_active_cells);
1580 
1582  }
1583 
1584 
1585 
1586  void
1588  const std::vector<unsigned char> &irregular_cells,
1589  const DynamicSparsityPattern & connectivity_cells,
1590  DynamicSparsityPattern & connectivity_blocks) const
1591  {
1592  std::vector<std::vector<unsigned int>> cell_blocks(n_blocks);
1593  std::vector<unsigned int> touched_cells(n_active_cells);
1594  unsigned int cell = 0;
1595  for (unsigned int i = 0, mcell = 0; i < n_blocks; ++i)
1596  {
1597  for (unsigned int c = 0;
1598  c < block_size && mcell < *(cell_partition_data.end() - 2);
1599  ++c, ++mcell)
1600  {
1601  unsigned int ncomp = (irregular_cells[mcell] > 0) ?
1602  irregular_cells[mcell] :
1604  for (unsigned int c = 0; c < ncomp; ++c, ++cell)
1605  {
1606  cell_blocks[i].push_back(cell);
1607  touched_cells[cell] = i;
1608  }
1609  }
1610  }
1612  for (unsigned int i = 0; i < cell_blocks.size(); ++i)
1613  for (unsigned int col = 0; col < cell_blocks[i].size(); ++col)
1614  {
1616  connectivity_cells.begin(cell_blocks[i][col]);
1617  it != connectivity_cells.end(cell_blocks[i][col]);
1618  ++it)
1619  {
1620  if (touched_cells[it->column()] != i)
1621  connectivity_blocks.add(i, touched_cells[it->column()]);
1622  }
1623  }
1624  }
1625 
1626 
1627 
1628  // Function to create partitioning on the second layer within each
1629  // partition. Version without preblocking.
1630  void
1632  const DynamicSparsityPattern & connectivity,
1633  const std::vector<unsigned int> &cell_active_fe_index,
1634  const unsigned int partition,
1635  const unsigned int cluster_size,
1636  const bool hp_bool,
1637  const std::vector<unsigned int> &cell_partition,
1638  const std::vector<unsigned int> &partition_list,
1639  const std::vector<unsigned int> &partition_size,
1640  std::vector<unsigned int> & partition_partition_list,
1641  std::vector<unsigned char> & irregular_cells)
1642  {
1643  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1644  const unsigned int n_ghost_slots =
1645  *(cell_partition_data.end() - 1) - n_cell_batches;
1646 
1647  // List of cells in previous partition
1648  std::vector<unsigned int> neighbor_list;
1649  // List of cells in current partition for use as neighbors in next
1650  // partition
1651  std::vector<unsigned int> neighbor_neighbor_list;
1652 
1653  std::vector<unsigned int> renumbering(n_active_cells);
1654 
1655  irregular_cells.back() = 0;
1656  irregular_cells.resize(n_active_cells + n_ghost_slots);
1657 
1658  unsigned int max_fe_index = 0;
1659  for (const unsigned int fe_index : cell_active_fe_index)
1660  max_fe_index = std::max(fe_index, max_fe_index);
1661 
1662  Assert(!hp_bool || cell_active_fe_index.size() == n_active_cells,
1663  ExcInternalError());
1664 
1665  {
1666  unsigned int n_cell_batches_before = 0;
1667  // Create partitioning within partitions.
1668 
1669  // For each block of cells, this variable saves to which partitions
1670  // the block belongs. Initialize all to n_cell_batches to mark them as
1671  // not yet assigned a partition.
1672  std::vector<unsigned int> cell_partition_l2(
1674  partition_row_index.clear();
1675  partition_row_index.resize(partition + 1, 0);
1676  cell_partition_data.resize(1, 0);
1677 
1678  unsigned int counter = 0;
1679  unsigned int missing_macros;
1680  for (unsigned int part = 0; part < partition; ++part)
1681  {
1682  neighbor_neighbor_list.resize(0);
1683  neighbor_list.resize(0);
1684  bool work = true;
1685  unsigned int partition_l2 = 0;
1686  unsigned int start_up = partition_size[part];
1687  unsigned int partition_counter = 0;
1688  while (work)
1689  {
1690  if (neighbor_list.size() == 0)
1691  {
1692  work = false;
1693  partition_counter = 0;
1694  for (unsigned int j = start_up;
1695  j < partition_size[part + 1];
1696  ++j)
1697  if (cell_partition[partition_list[j]] == part &&
1698  cell_partition_l2[partition_list[j]] ==
1700  {
1701  start_up = j;
1702  work = true;
1703  partition_counter = 1;
1704  // To start up, set the start_up cell to partition
1705  // and list all its neighbors.
1706  AssertIndexRange(start_up, partition_size[part + 1]);
1707  cell_partition_l2[partition_list[start_up]] =
1708  partition_l2;
1709  neighbor_neighbor_list.push_back(
1710  partition_list[start_up]);
1711  partition_partition_list[counter++] =
1712  partition_list[start_up];
1713  start_up++;
1714  break;
1715  }
1716  }
1717  else
1718  {
1719  partition_counter = 0;
1720  for (const unsigned int neighbor : neighbor_list)
1721  {
1722  Assert(cell_partition[neighbor] == part,
1723  ExcInternalError());
1724  Assert(cell_partition_l2[neighbor] == partition_l2 - 1,
1725  ExcInternalError());
1726  auto neighbor_it = connectivity.begin(neighbor);
1727  const auto end_it = connectivity.end(neighbor);
1728  for (; neighbor_it != end_it; ++neighbor_it)
1729  {
1730  if (cell_partition[neighbor_it->column()] == part &&
1731  cell_partition_l2[neighbor_it->column()] ==
1733  {
1734  cell_partition_l2[neighbor_it->column()] =
1735  partition_l2;
1736  neighbor_neighbor_list.push_back(
1737  neighbor_it->column());
1738  partition_partition_list[counter++] =
1739  neighbor_it->column();
1740  partition_counter++;
1741  }
1742  }
1743  }
1744  }
1745  if (partition_counter > 0)
1746  {
1747  int index_before = neighbor_neighbor_list.size(),
1748  index = index_before;
1749  {
1750  // put the cells into separate lists for each FE index
1751  // within one partition-partition
1752  missing_macros = 0;
1753  std::vector<unsigned int> remaining_per_cell_batch(
1754  max_fe_index + 1);
1755  std::vector<std::vector<unsigned int>>
1756  renumbering_fe_index;
1757  unsigned int cell;
1758  bool filled = true;
1759  if (hp_bool == true)
1760  {
1761  renumbering_fe_index.resize(max_fe_index + 1);
1762  for (cell = counter - partition_counter;
1763  cell < counter;
1764  ++cell)
1765  {
1766  renumbering_fe_index
1767  [cell_active_fe_index.empty() ?
1768  0 :
1769  cell_active_fe_index
1770  [partition_partition_list[cell]]]
1771  .push_back(partition_partition_list[cell]);
1772  }
1773  // check how many more cells are needed in the lists
1774  for (unsigned int j = 0; j < max_fe_index + 1; ++j)
1775  {
1776  remaining_per_cell_batch[j] =
1777  renumbering_fe_index[j].size() %
1779  if (remaining_per_cell_batch[j] != 0)
1780  filled = false;
1781  missing_macros +=
1782  ((renumbering_fe_index[j].size() +
1783  vectorization_length - 1) /
1785  }
1786  }
1787  else
1788  {
1789  remaining_per_cell_batch.resize(1);
1790  remaining_per_cell_batch[0] =
1791  partition_counter % vectorization_length;
1792  missing_macros =
1793  partition_counter / vectorization_length;
1794  if (remaining_per_cell_batch[0] != 0)
1795  {
1796  filled = false;
1797  missing_macros++;
1798  }
1799  }
1800  missing_macros =
1801  cluster_size - (missing_macros % cluster_size);
1802 
1803  // now we realized that there are some cells missing.
1804  while (missing_macros > 0 || filled == false)
1805  {
1806  if (index == 0)
1807  {
1808  index = neighbor_neighbor_list.size();
1809  if (index == index_before)
1810  {
1811  if (missing_macros != 0)
1812  {
1813  neighbor_neighbor_list.resize(0);
1814  }
1815  start_up--;
1816  break; // not connected - start again
1817  }
1818  index_before = index;
1819  }
1820  index--;
1821  unsigned int additional =
1822  neighbor_neighbor_list[index];
1823 
1824  // go through the neighbors of the last cell in the
1825  // current partition and check if we find some to
1826  // fill up with.
1828  connectivity.begin(
1829  additional),
1830  end =
1831  connectivity.end(
1832  additional);
1833  for (; neighbor != end; ++neighbor)
1834  {
1835  if (cell_partition[neighbor->column()] == part &&
1836  cell_partition_l2[neighbor->column()] ==
1838  {
1839  unsigned int this_index = 0;
1840  if (hp_bool == true)
1841  this_index =
1842  cell_active_fe_index.empty() ?
1843  0 :
1844  cell_active_fe_index[neighbor
1845  ->column()];
1846 
1847  // Only add this cell if we need more macro
1848  // cells in the current block or if there is
1849  // a macro cell with the FE index that is
1850  // not yet fully populated
1851  if (missing_macros > 0 ||
1852  remaining_per_cell_batch[this_index] > 0)
1853  {
1854  cell_partition_l2[neighbor->column()] =
1855  partition_l2;
1856  neighbor_neighbor_list.push_back(
1857  neighbor->column());
1858  if (hp_bool == true)
1859  renumbering_fe_index[this_index]
1860  .push_back(neighbor->column());
1861  partition_partition_list[counter] =
1862  neighbor->column();
1863  counter++;
1864  partition_counter++;
1865  if (remaining_per_cell_batch
1866  [this_index] == 0 &&
1867  missing_macros > 0)
1868  missing_macros--;
1869  remaining_per_cell_batch[this_index]++;
1870  if (remaining_per_cell_batch
1871  [this_index] ==
1873  {
1874  remaining_per_cell_batch[this_index] =
1875  0;
1876  }
1877  if (missing_macros == 0)
1878  {
1879  filled = true;
1880  for (unsigned int fe_ind = 0;
1881  fe_ind < max_fe_index + 1;
1882  ++fe_ind)
1883  if (remaining_per_cell_batch
1884  [fe_ind] != 0)
1885  filled = false;
1886  }
1887  if (filled == true)
1888  break;
1889  }
1890  }
1891  }
1892  }
1893  if (hp_bool == true)
1894  {
1895  // set the renumbering according to their active FE
1896  // index within one partition-partition which was
1897  // implicitly assumed above
1898  cell = counter - partition_counter;
1899  for (unsigned int j = 0; j < max_fe_index + 1; ++j)
1900  {
1901  for (const unsigned int jj :
1902  renumbering_fe_index[j])
1903  renumbering[cell++] = jj;
1904  if (renumbering_fe_index[j].size() %
1906  0)
1907  irregular_cells[renumbering_fe_index[j].size() /
1909  n_cell_batches_before] =
1910  renumbering_fe_index[j].size() %
1912  n_cell_batches_before +=
1913  (renumbering_fe_index[j].size() +
1914  vectorization_length - 1) /
1916  renumbering_fe_index[j].resize(0);
1917  }
1918  }
1919  else
1920  {
1921  n_cell_batches_before +=
1922  partition_counter / vectorization_length;
1923  if (partition_counter % vectorization_length != 0)
1924  {
1925  irregular_cells[n_cell_batches_before] =
1926  partition_counter % vectorization_length;
1927  n_cell_batches_before++;
1928  }
1929  }
1930  }
1931  cell_partition_data.push_back(n_cell_batches_before);
1932  partition_l2++;
1933  }
1934  neighbor_list = neighbor_neighbor_list;
1935  neighbor_neighbor_list.resize(0);
1936  }
1937  partition_row_index[part + 1] =
1938  partition_row_index[part] + partition_l2;
1939  }
1940  }
1941  if (hp_bool == true)
1942  {
1943  partition_partition_list.swap(renumbering);
1944  }
1945  }
1946 
1947 
1948 
1949  // Function to create coloring on the second layer within each partition.
1950  // Version assumes preblocking.
1951  void
1953  const DynamicSparsityPattern & connectivity,
1954  const unsigned int partition,
1955  const std::vector<unsigned int> &cell_partition,
1956  const std::vector<unsigned int> &partition_list,
1957  const std::vector<unsigned int> &partition_size,
1958  std::vector<unsigned int> & partition_color_list)
1959  {
1960  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1961  std::vector<unsigned int> cell_color(n_blocks, n_cell_batches);
1962  std::vector<bool> color_finder;
1963 
1964  partition_row_index.resize(partition + 1);
1965  cell_partition_data.clear();
1966  unsigned int color_counter = 0, index_counter = 0;
1967  for (unsigned int part = 0; part < partition; ++part)
1968  {
1969  partition_row_index[part] = index_counter;
1970  unsigned int max_color = 0;
1971  for (unsigned int k = partition_size[part];
1972  k < partition_size[part + 1];
1973  k++)
1974  {
1975  unsigned int cell = partition_list[k];
1976  unsigned int n_neighbors = connectivity.row_length(cell);
1977 
1978  // In the worst case, each neighbor has a different color. So we
1979  // find at least one available color between 0 and n_neighbors.
1980  color_finder.resize(n_neighbors + 1);
1981  for (unsigned int j = 0; j <= n_neighbors; ++j)
1982  color_finder[j] = true;
1984  connectivity.begin(cell),
1985  end = connectivity.end(cell);
1986  for (; neighbor != end; ++neighbor)
1987  {
1988  // Mark the color that a neighbor within the partition has
1989  // as taken
1990  if (cell_partition[neighbor->column()] == part &&
1991  cell_color[neighbor->column()] <= n_neighbors)
1992  color_finder[cell_color[neighbor->column()]] = false;
1993  }
1994  // Choose the smallest color that is not taken for the block
1995  cell_color[cell] = 0;
1996  while (color_finder[cell_color[cell]] == false)
1997  cell_color[cell]++;
1998  if (cell_color[cell] > max_color)
1999  max_color = cell_color[cell];
2000  }
2001  // Reorder within partition: First, all blocks that belong the 0 and
2002  // then so on until those with color max (Note that the smaller the
2003  // number the larger the partition)
2004  for (unsigned int color = 0; color <= max_color; ++color)
2005  {
2006  cell_partition_data.push_back(color_counter);
2007  index_counter++;
2008  for (unsigned int k = partition_size[part];
2009  k < partition_size[part + 1];
2010  k++)
2011  {
2012  unsigned int cell = partition_list[k];
2013  if (cell_color[cell] == color)
2014  {
2015  partition_color_list[color_counter++] = cell;
2016  }
2017  }
2018  }
2019  }
2020  cell_partition_data.push_back(n_blocks);
2021  partition_row_index[partition] = index_counter;
2022  AssertDimension(color_counter, n_blocks);
2023  }
2024 
2025 
2026  // Function to create partitioning on the first layer.
2027  void
2029  const unsigned int cluster_size,
2030  std::vector<unsigned int> & cell_partition,
2031  std::vector<unsigned int> & partition_list,
2032  std::vector<unsigned int> & partition_size,
2033  unsigned int & partition) const
2034 
2035  {
2036  // For each block of cells, this variable saves to which partitions the
2037  // block belongs. Initialize all to n_cell_batches to mark them as not
2038  // yet assigned a partition.
2039  // std::vector<unsigned int> cell_partition (n_active_cells,
2040  // numbers::invalid_unsigned_int);
2041  // List of cells in previous partition
2042  std::vector<unsigned int> neighbor_list;
2043  // List of cells in current partition for use as neighbors in next
2044  // partition
2045  std::vector<unsigned int> neighbor_neighbor_list;
2046 
2047  // In element j of this variable, one puts the old number of the block
2048  // that should be the jth block in the new numeration.
2049  // std::vector<unsigned int> partition_list(n_active_cells,0);
2050 
2051  // This vector points to the start of each partition.
2052  // std::vector<unsigned int> partition_size(2,0);
2053 
2054  partition = 0;
2055  unsigned int counter = 0;
2056  unsigned int start_nonboundary =
2057  cell_partition_data.size() == 5 ?
2060  0;
2061 
2062  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
2063  if (n_cell_batches == 0)
2064  return;
2065  if (scheme == color)
2066  start_nonboundary = n_cell_batches;
2067  if (scheme == partition_color ||
2068  scheme == color) // blocking_connectivity == true
2069  start_nonboundary = ((start_nonboundary + block_size - 1) / block_size);
2070  unsigned int n_blocks;
2071  if (scheme == partition_color ||
2072  scheme == color) // blocking_connectivity == true
2073  n_blocks = this->n_blocks;
2074  else
2075  n_blocks = n_active_cells;
2076 
2077  if (start_nonboundary > n_blocks)
2078  start_nonboundary = n_blocks;
2079 
2080 
2081  unsigned int start_up = 0;
2082  bool work = true;
2083  unsigned int remainder = cluster_size;
2084 
2085  // this performs a classical breath-first search in the connectivity
2086  // graph of the cells under the restriction that the size of the
2087  // partitions should be a multiple of the given block size
2088  while (work)
2089  {
2090  // put the cells with neighbors on remote MPI processes up front
2091  if (start_nonboundary > 0)
2092  {
2093  for (unsigned int cell = 0; cell < start_nonboundary; ++cell)
2094  {
2095  const unsigned int cell_nn = cell;
2096  cell_partition[cell_nn] = partition;
2097  neighbor_list.push_back(cell_nn);
2098  partition_list[counter++] = cell_nn;
2099  partition_size.back()++;
2100  }
2101  start_nonboundary = 0;
2102  remainder -= (start_nonboundary % cluster_size);
2103  if (remainder == cluster_size)
2104  remainder = 0;
2105  }
2106  else
2107  {
2108  // To start up, set the start_up cell to partition and list all
2109  // its neighbors.
2110  cell_partition[start_up] = partition;
2111  neighbor_list.push_back(start_up);
2112  partition_list[counter++] = start_up;
2113  partition_size.back()++;
2114  start_up++;
2115  remainder--;
2116  if (remainder == cluster_size)
2117  remainder = 0;
2118  }
2119  int index_before = neighbor_list.size(), index = index_before,
2120  index_stop = 0;
2121  while (remainder > 0)
2122  {
2123  if (index == index_stop)
2124  {
2125  index = neighbor_list.size();
2126  if (index == index_before)
2127  {
2128  neighbor_list.resize(0);
2129  goto not_connect;
2130  }
2131  index_stop = index_before;
2132  index_before = index;
2133  }
2134  index--;
2135  unsigned int additional = neighbor_list[index];
2137  connectivity.begin(additional),
2138  end =
2139  connectivity.end(additional);
2140  for (; neighbor != end; ++neighbor)
2141  {
2142  if (cell_partition[neighbor->column()] ==
2144  {
2145  partition_size.back()++;
2146  cell_partition[neighbor->column()] = partition;
2147  neighbor_list.push_back(neighbor->column());
2148  partition_list[counter++] = neighbor->column();
2149  remainder--;
2150  if (remainder == 0)
2151  break;
2152  }
2153  }
2154  }
2155 
2156  while (neighbor_list.size() > 0)
2157  {
2158  partition++;
2159 
2160  // counter for number of cells so far in current partition
2161  unsigned int partition_counter = 0;
2162 
2163  // Mark the start of the new partition
2164  partition_size.push_back(partition_size.back());
2165 
2166  // Loop through the list of cells in previous partition and put
2167  // all their neighbors in current partition
2168  for (const unsigned int cell : neighbor_list)
2169  {
2170  Assert(cell_partition[cell] == partition - 1,
2171  ExcInternalError());
2172  auto neighbor = connectivity.begin(cell);
2173  const auto end = connectivity.end(cell);
2174  for (; neighbor != end; ++neighbor)
2175  {
2176  if (cell_partition[neighbor->column()] ==
2178  {
2179  partition_size.back()++;
2180  cell_partition[neighbor->column()] = partition;
2181 
2182  // collect the cells of the current partition for
2183  // use as neighbors in next partition
2184  neighbor_neighbor_list.push_back(neighbor->column());
2185  partition_list[counter++] = neighbor->column();
2186  partition_counter++;
2187  }
2188  }
2189  }
2190  remainder = cluster_size - (partition_counter % cluster_size);
2191  if (remainder == cluster_size)
2192  remainder = 0;
2193  int index_stop = 0;
2194  int index_before = neighbor_neighbor_list.size(),
2195  index = index_before;
2196  while (remainder > 0)
2197  {
2198  if (index == index_stop)
2199  {
2200  index = neighbor_neighbor_list.size();
2201  if (index == index_before)
2202  {
2203  neighbor_neighbor_list.resize(0);
2204  break;
2205  }
2206  index_stop = index_before;
2207  index_before = index;
2208  }
2209  index--;
2210  unsigned int additional = neighbor_neighbor_list[index];
2212  connectivity.begin(
2213  additional),
2214  end = connectivity.end(
2215  additional);
2216  for (; neighbor != end; ++neighbor)
2217  {
2218  if (cell_partition[neighbor->column()] ==
2220  {
2221  partition_size.back()++;
2222  cell_partition[neighbor->column()] = partition;
2223  neighbor_neighbor_list.push_back(neighbor->column());
2224  partition_list[counter++] = neighbor->column();
2225  remainder--;
2226  if (remainder == 0)
2227  break;
2228  }
2229  }
2230  }
2231 
2232  neighbor_list = neighbor_neighbor_list;
2233  neighbor_neighbor_list.resize(0);
2234  }
2235  not_connect:
2236  // One has to check if the graph is not connected so we have to find
2237  // another partition.
2238  work = false;
2239  for (unsigned int j = start_up; j < n_blocks; ++j)
2240  if (cell_partition[j] == numbers::invalid_unsigned_int)
2241  {
2242  start_up = j;
2243  work = true;
2244  if (remainder == 0)
2245  remainder = cluster_size;
2246  break;
2247  }
2248  }
2249  if (remainder != 0)
2250  partition++;
2251 
2252  AssertDimension(partition_size[partition], n_blocks);
2253  }
2254 
2255 
2256  void
2258  {
2259  evens = (partition + 1) / 2;
2260  odds = partition / 2;
2261  n_blocked_workers = odds - (odds + evens + 1) % 2;
2263  // From here only used for partition partition option.
2264  partition_evens.resize(partition);
2265  partition_odds.resize(partition);
2268  for (unsigned int part = 0; part < partition; ++part)
2269  {
2270  partition_evens[part] =
2271  (partition_row_index[part + 1] - partition_row_index[part] + 1) / 2;
2272  partition_odds[part] =
2273  (partition_row_index[part + 1] - partition_row_index[part]) / 2;
2275  partition_odds[part] -
2276  (partition_odds[part] + partition_evens[part] + 1) % 2;
2277  partition_n_workers[part] = partition_evens[part] +
2278  partition_odds[part] -
2280  }
2281  }
2282  } // namespace MatrixFreeFunctions
2283 } // namespace internal
2284 
2285 
2286 
2287 // explicit instantiations of template functions
2288 template void
2289 internal::MatrixFreeFunctions::TaskInfo::print_memory_statistics<std::ostream>(
2290  std::ostream &,
2291  const std::size_t) const;
2292 template void
2294  ConditionalOStream>(ConditionalOStream &, const std::size_t) const;
2295 
2296 
static unsigned int n_threads()
#define DEAL_II_NAMESPACE_OPEN
Definition: config.h:442
#define DEAL_II_NAMESPACE_CLOSE
Definition: config.h:443
Point< 2 > first
Definition: grid_out.cc:4603
static ::ExceptionBase & ExcInternalError()
#define Assert(cond, exc)
Definition: exceptions.h:1473
static ::ExceptionBase & ExcNotImplemented()
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1667
#define AssertIndexRange(index, range)
Definition: exceptions.h:1732
#define AssertThrow(cond, exc)
Definition: exceptions.h:1583
size_type row_length(const size_type row) const
void add(const size_type i, const size_type j)
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
void swap(MemorySpaceData< Number, MemorySpace > &, MemorySpaceData< Number, MemorySpace > &)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
void partition(const SparsityPattern &sparsity_pattern, const unsigned int n_partitions, std::vector< unsigned int > &partition_indices, const Partitioner partitioner=Partitioner::metis)
VectorType::value_type * end(VectorType &V)
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
Definition: mpi.cc:114
Iterator lower_bound(Iterator first, Iterator last, const T &val)
Definition: utilities.h:1153
std::vector< Integer > invert_permutation(const std::vector< Integer > &permutation)
Definition: utilities.h:1813
unsigned int minimum_parallel_grain_size
Definition: parallel.cc:34
static const unsigned int invalid_unsigned_int
Definition: types.h:201
void parallel_for(Iterator x_begin, Iterator x_end, const Functor &functor, const unsigned int grainsize)
Definition: parallel.h:80
virtual void face(const unsigned int range_index)=0
virtual void zero_dst_vector_range(const unsigned int range_index)=0
virtual void cell(const std::pair< unsigned int, unsigned int > &cell_range)=0
virtual void boundary(const unsigned int range_index)=0
virtual void vector_compress_start()=0
Starts the communication for the vector compress operation.
virtual void cell_loop_post_range(const unsigned int range_index)=0
virtual void vector_update_ghosts_start()=0
Starts the communication for the update ghost values operation.
virtual void cell_loop_pre_range(const unsigned int range_index)=0
virtual void vector_update_ghosts_finish()=0
Finishes the communication for the update ghost values operation.
virtual void vector_compress_finish()=0
Finishes the communication for the vector compress operation.
std::size_t memory_consumption() const
Definition: task_info.cc:705
std::vector< unsigned int > boundary_partition_data
Definition: task_info.h:521
void loop(MFWorkerInterface &worker) const
Definition: task_info.cc:350
void make_thread_graph_partition_color(DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1177
std::vector< unsigned int > partition_n_workers
Definition: task_info.h:579
void create_blocks_serial(const std::vector< unsigned int > &cells_with_comm, const unsigned int dofs_per_cell, const bool categories_are_hp, const std::vector< unsigned int > &cell_vectorization_categories, const bool cell_vectorization_categories_strict, const std::vector< unsigned int > &parent_relation, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
Definition: task_info.cc:794
void make_connectivity_cells_to_blocks(const std::vector< unsigned char > &irregular_cells, const DynamicSparsityPattern &connectivity_cells, DynamicSparsityPattern &connectivity_blocks) const
Definition: task_info.cc:1587
void make_partitioning_within_partitions_post_blocked(const DynamicSparsityPattern &connectivity, const std::vector< unsigned int > &cell_active_fe_index, const unsigned int partition, const unsigned int cluster_size, const bool hp_bool, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_partition_list, std::vector< unsigned char > &irregular_cells)
Definition: task_info.cc:1631
void initial_setup_blocks_tasks(const std::vector< unsigned int > &boundary_cells, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
Definition: task_info.cc:1083
void print_memory_statistics(StreamType &out, std::size_t data_length) const
Definition: task_info.cc:690
void make_coloring_within_partitions_pre_blocked(const DynamicSparsityPattern &connectivity, const unsigned int partition, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_color_list)
Definition: task_info.cc:1952
std::vector< unsigned int > partition_row_index
Definition: task_info.h:469
void make_thread_graph_partition_partition(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1519
std::vector< unsigned int > partition_evens
Definition: task_info.h:561
void make_thread_graph(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1324
std::vector< unsigned int > partition_n_blocked_workers
Definition: task_info.h:573
std::vector< unsigned int > cell_partition_data
Definition: task_info.h:477
void make_partitioning(const DynamicSparsityPattern &connectivity, const unsigned int cluster_size, std::vector< unsigned int > &cell_partition, std::vector< unsigned int > &partition_list, std::vector< unsigned int > &partition_size, unsigned int &partition) const
Definition: task_info.cc:2028
void update_task_info(const unsigned int partition)
Definition: task_info.cc:2257
void make_boundary_cells_divisible(std::vector< unsigned int > &boundary_cells)
Definition: task_info.cc:722
void guess_block_size(const unsigned int dofs_per_cell)
Definition: task_info.cc:1149
std::vector< unsigned int > partition_odds
Definition: task_info.h:567
std::vector< unsigned int > face_partition_data
Definition: task_info.h:499