Reference documentation for deal.II version 9.4.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
mpi.cc
Go to the documentation of this file.
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2005 - 2022 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
18 #include <deal.II/base/index_set.h>
19 #include <deal.II/base/mpi.h>
20 #include <deal.II/base/mpi.templates.h>
23 #include <deal.II/base/mpi_tags.h>
25 #include <deal.II/base/utilities.h>
26 
30 
31 #include <boost/serialization/utility.hpp>
32 
33 #include <iostream>
34 #include <numeric>
35 #include <set>
36 #include <vector>
37 
38 #ifdef DEAL_II_WITH_TRILINOS
39 # ifdef DEAL_II_WITH_MPI
42 
43 # include <Epetra_MpiComm.h>
44 # endif
45 #endif
46 
47 #ifdef DEAL_II_WITH_PETSC
50 
51 # include <petscsys.h>
52 #endif
53 
54 #ifdef DEAL_II_WITH_SLEPC
56 
57 # include <slepcsys.h>
58 #endif
59 
60 #ifdef DEAL_II_WITH_P4EST
61 # include <p4est_bits.h>
62 #endif
63 
64 #ifdef DEAL_II_TRILINOS_WITH_ZOLTAN
65 # include <zoltan_cpp.h>
66 #endif
67 
69 
70 
71 namespace Utilities
72 {
73  IndexSet
74  create_evenly_distributed_partitioning(const unsigned int my_partition_id,
75  const unsigned int n_partitions,
76  const IndexSet::size_type total_size)
77  {
78  const unsigned int remain = total_size % n_partitions;
79 
80  const IndexSet::size_type min_size = total_size / n_partitions;
81 
83  min_size * my_partition_id + std::min(my_partition_id, remain);
84  const IndexSet::size_type end =
85  min_size * (my_partition_id + 1) + std::min(my_partition_id + 1, remain);
86  IndexSet result(total_size);
87  result.add_range(begin, end);
88  return result;
89  }
90 
91  namespace MPI
92  {
93 #ifdef DEAL_II_WITH_MPI
94  // Provide definitions of template variables for all valid instantiations.
95  template const MPI_Datatype mpi_type_id_for_type<bool>;
96  template const MPI_Datatype mpi_type_id_for_type<char>;
97  template const MPI_Datatype mpi_type_id_for_type<signed char>;
98  template const MPI_Datatype mpi_type_id_for_type<short>;
99  template const MPI_Datatype mpi_type_id_for_type<int>;
100  template const MPI_Datatype mpi_type_id_for_type<long int>;
101  template const MPI_Datatype mpi_type_id_for_type<unsigned char>;
102  template const MPI_Datatype mpi_type_id_for_type<unsigned short>;
103  template const MPI_Datatype mpi_type_id_for_type<unsigned long int>;
104  template const MPI_Datatype mpi_type_id_for_type<unsigned long long int>;
105  template const MPI_Datatype mpi_type_id_for_type<float>;
106  template const MPI_Datatype mpi_type_id_for_type<double>;
107  template const MPI_Datatype mpi_type_id_for_type<long double>;
108  template const MPI_Datatype mpi_type_id_for_type<std::complex<float>>;
109  template const MPI_Datatype mpi_type_id_for_type<std::complex<double>>;
110 #endif
111 
112 
113  MinMaxAvg
114  min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
115  {
116  MinMaxAvg result;
118  ArrayView<MinMaxAvg>(result),
119  mpi_communicator);
120 
121  return result;
122  }
123 
124 
125 
126  std::vector<MinMaxAvg>
127  min_max_avg(const std::vector<double> &my_values,
128  const MPI_Comm & mpi_communicator)
129  {
130  std::vector<MinMaxAvg> results(my_values.size());
131  min_max_avg(my_values, results, mpi_communicator);
132 
133  return results;
134  }
135 
136 
137 
138 #ifdef DEAL_II_WITH_MPI
139  unsigned int
140  n_mpi_processes(const MPI_Comm &mpi_communicator)
141  {
142  int n_jobs = 1;
143  const int ierr = MPI_Comm_size(mpi_communicator, &n_jobs);
144  AssertThrowMPI(ierr);
145 
146  return n_jobs;
147  }
148 
149 
150  unsigned int
151  this_mpi_process(const MPI_Comm &mpi_communicator)
152  {
153  int rank = 0;
154  const int ierr = MPI_Comm_rank(mpi_communicator, &rank);
155  AssertThrowMPI(ierr);
156 
157  return rank;
158  }
159 
160 
161 
162  const std::vector<unsigned int>
163  mpi_processes_within_communicator(const MPI_Comm &comm_large,
164  const MPI_Comm &comm_small)
165  {
166  if (Utilities::MPI::job_supports_mpi() == false)
167  return std::vector<unsigned int>{0};
168 
169  const unsigned int rank = Utilities::MPI::this_mpi_process(comm_large);
170  const unsigned int size = Utilities::MPI::n_mpi_processes(comm_small);
171 
172  std::vector<unsigned int> ranks(size);
173  const int ierr = MPI_Allgather(
174  &rank, 1, MPI_UNSIGNED, ranks.data(), 1, MPI_UNSIGNED, comm_small);
175  AssertThrowMPI(ierr);
176 
177  return ranks;
178  }
179 
180 
181 
182  MPI_Comm
183  duplicate_communicator(const MPI_Comm &mpi_communicator)
184  {
185  MPI_Comm new_communicator;
186  const int ierr = MPI_Comm_dup(mpi_communicator, &new_communicator);
187  AssertThrowMPI(ierr);
188  return new_communicator;
189  }
190 
191 
192 
193  void
194  free_communicator(MPI_Comm &mpi_communicator)
195  {
196  // MPI_Comm_free will set the argument to MPI_COMM_NULL automatically.
197  const int ierr = MPI_Comm_free(&mpi_communicator);
198  AssertThrowMPI(ierr);
199  }
200 
201 
202 
203  int
204  create_group(const MPI_Comm & comm,
205  const MPI_Group &group,
206  const int tag,
207  MPI_Comm * new_comm)
208  {
209  const int ierr = MPI_Comm_create_group(comm, group, tag, new_comm);
210  AssertThrowMPI(ierr);
211  return ierr;
212  }
213 
214 
215 
216  std::vector<IndexSet>
218  const IndexSet::size_type locally_owned_size)
219  {
220  const unsigned int n_proc = n_mpi_processes(comm);
221  const std::vector<IndexSet::size_type> sizes =
222  all_gather(comm, locally_owned_size);
223  const auto total_size =
224  std::accumulate(sizes.begin(), sizes.end(), IndexSet::size_type(0));
225 
226  std::vector<IndexSet> res(n_proc, IndexSet(total_size));
227 
229  for (unsigned int i = 0; i < n_proc; ++i)
230  {
231  res[i].add_range(begin, begin + sizes[i]);
232  begin = begin + sizes[i];
233  }
234 
235  return res;
236  }
237 
238 
239 
240  IndexSet
242  const IndexSet::size_type total_size)
243  {
244  const unsigned int this_proc = this_mpi_process(comm);
245  const unsigned int n_proc = n_mpi_processes(comm);
246 
248  n_proc,
249  total_size);
250  }
251 
252 
253 
254  std::unique_ptr<MPI_Datatype, void (*)(MPI_Datatype *)>
255  create_mpi_data_type_n_bytes(const std::size_t n_bytes)
256  {
257  MPI_Datatype result;
258  int ierr = LargeCount::Type_contiguous_c(n_bytes, MPI_BYTE, &result);
259  AssertThrowMPI(ierr);
260  ierr = MPI_Type_commit(&result);
261  AssertThrowMPI(ierr);
262 
263 # ifdef DEBUG
264  MPI_Count size64;
265  ierr = MPI_Type_size_x(result, &size64);
266  AssertThrowMPI(ierr);
267 
268  Assert(size64 == static_cast<MPI_Count>(n_bytes), ExcInternalError());
269 # endif
270 
271  // Now put the new data type into a std::unique_ptr with a custom
272  // deleter. We call the std::unique_ptr constructor that as first
273  // argument takes a pointer (here, a pointer to a copy of the `result`
274  // object, and as second argument a pointer-to-function, for which
275  // we here use a lambda function without captures that acts as the
276  // 'deleter' object: it calls `MPI_Type_free` and then deletes the
277  // pointer. To avoid a compiler warning about a null this pointer
278  // in the lambda (which don't make sense: the lambda doesn't store
279  // anything), we create the deleter first.
280  auto deleter = [](MPI_Datatype *p) {
281  if (p != nullptr)
282  {
283  const int ierr = MPI_Type_free(p);
284  (void)ierr;
285  AssertNothrow(ierr == MPI_SUCCESS, ExcMPI(ierr));
286 
287  delete p;
288  }
289  };
290 
291  return std::unique_ptr<MPI_Datatype, void (*)(MPI_Datatype *)>(
292  new MPI_Datatype(result), deleter);
293  }
294 
295 
296 
297  std::vector<unsigned int>
299  const MPI_Comm & mpi_comm,
300  const std::vector<unsigned int> &destinations)
301  {
302  const unsigned int myid = Utilities::MPI::this_mpi_process(mpi_comm);
303  const unsigned int n_procs = Utilities::MPI::n_mpi_processes(mpi_comm);
304  (void)myid;
305  (void)n_procs;
306 
307  for (const unsigned int destination : destinations)
308  {
309  (void)destination;
310  AssertIndexRange(destination, n_procs);
311  }
312 
313 
314  // Have a little function that checks if destinations provided
315  // to the current process are unique. The way it does this is
316  // to create a sorted list of destinations and then walk through
317  // the list and look at successive elements -- if we find the
318  // same number twice, we know that the destinations were not
319  // unique
320  const bool my_destinations_are_unique = [destinations]() {
321  if (destinations.size() == 0)
322  return true;
323  else
324  {
325  std::vector<unsigned int> my_destinations = destinations;
326  std::sort(my_destinations.begin(), my_destinations.end());
327  return (std::adjacent_find(my_destinations.begin(),
328  my_destinations.end()) ==
329  my_destinations.end());
330  }
331  }();
332 
333  // If all processes report that they have unique destinations,
334  // then we can short-cut the process using a consensus algorithm (which
335  // is implemented only for the case of unique destinations):
336  if (Utilities::MPI::min((my_destinations_are_unique ? 1 : 0), mpi_comm) ==
337  1)
338  {
339  return ConsensusAlgorithms::nbx<char, char>(
340  destinations, {}, {}, {}, mpi_comm);
341  }
342 
343  // So we need to run a different algorithm, specifically one that
344  // requires more memory -- MPI_Reduce_scatter_block will require memory
345  // proportional to the number of processes involved; that function is
346  // available for MPI 2.2 or later:
347  static CollectiveMutex mutex;
348  CollectiveMutex::ScopedLock lock(mutex, mpi_comm);
349 
350  const int mpi_tag =
352 
353  // Calculate the number of messages to send to each process
354  std::vector<unsigned int> dest_vector(n_procs);
355  for (const auto &el : destinations)
356  ++dest_vector[el];
357 
358  // Find how many processes will send to this one
359  // by reducing with sum and then scattering the
360  // results over all processes
361  unsigned int n_recv_from;
362  const int ierr = MPI_Reduce_scatter_block(
363  dest_vector.data(), &n_recv_from, 1, MPI_UNSIGNED, MPI_SUM, mpi_comm);
364 
365  AssertThrowMPI(ierr);
366 
367  // Send myid to every process in `destinations` vector...
368  std::vector<MPI_Request> send_requests(destinations.size());
369  for (const auto &el : destinations)
370  {
371  const int ierr =
372  MPI_Isend(&myid,
373  1,
374  MPI_UNSIGNED,
375  el,
376  mpi_tag,
377  mpi_comm,
378  send_requests.data() + (&el - destinations.data()));
379  AssertThrowMPI(ierr);
380  }
381 
382 
383  // Receive `n_recv_from` times from the processes
384  // who communicate with this one. Store the obtained id's
385  // in the resulting vector
386  std::vector<unsigned int> origins(n_recv_from);
387  for (auto &el : origins)
388  {
389  const int ierr = MPI_Recv(&el,
390  1,
391  MPI_UNSIGNED,
392  MPI_ANY_SOURCE,
393  mpi_tag,
394  mpi_comm,
395  MPI_STATUS_IGNORE);
396  AssertThrowMPI(ierr);
397  }
398 
399  if (destinations.size() > 0)
400  {
401  const int ierr = MPI_Waitall(destinations.size(),
402  send_requests.data(),
403  MPI_STATUSES_IGNORE);
404  AssertThrowMPI(ierr);
405  }
406 
407  return origins;
408  }
409 
410 
411 
412  unsigned int
414  const MPI_Comm & mpi_comm,
415  const std::vector<unsigned int> &destinations)
416  {
417  // Have a little function that checks if destinations provided
418  // to the current process are unique:
419  const bool my_destinations_are_unique = [destinations]() {
420  std::vector<unsigned int> my_destinations = destinations;
421  const unsigned int n_destinations = my_destinations.size();
422  std::sort(my_destinations.begin(), my_destinations.end());
423  my_destinations.erase(std::unique(my_destinations.begin(),
424  my_destinations.end()),
425  my_destinations.end());
426  return (my_destinations.size() == n_destinations);
427  }();
428 
429  // If all processes report that they have unique destinations,
430  // then we can short-cut the process using a consensus algorithm:
431 
432  if (Utilities::MPI::min((my_destinations_are_unique ? 1 : 0), mpi_comm) ==
433  1)
434  {
435  return ConsensusAlgorithms::nbx<char, char>(
436  destinations, {}, {}, {}, mpi_comm)
437  .size();
438  }
439  else
440  {
441  const unsigned int n_procs =
443 
444  for (const unsigned int destination : destinations)
445  {
446  (void)destination;
447  AssertIndexRange(destination, n_procs);
448  Assert(destination != Utilities::MPI::this_mpi_process(mpi_comm),
449  ExcMessage(
450  "There is no point in communicating with ourselves."));
451  }
452 
453  // Calculate the number of messages to send to each process
454  std::vector<unsigned int> dest_vector(n_procs);
455  for (const auto &el : destinations)
456  ++dest_vector[el];
457 
458  // Find out how many processes will send to this one
459  // MPI_Reduce_scatter(_block) does exactly this
460  unsigned int n_recv_from = 0;
461 
462  const int ierr = MPI_Reduce_scatter_block(dest_vector.data(),
463  &n_recv_from,
464  1,
465  MPI_UNSIGNED,
466  MPI_SUM,
467  mpi_comm);
468 
469  AssertThrowMPI(ierr);
470 
471  return n_recv_from;
472  }
473  }
474 
475 
476 
477  namespace
478  {
479  // custom MIP_Op for calculate_collective_mpi_min_max_avg
480  void
481  max_reduce(const void *in_lhs_,
482  void * inout_rhs_,
483  int * len,
484  MPI_Datatype *)
485  {
486  const MinMaxAvg *in_lhs = static_cast<const MinMaxAvg *>(in_lhs_);
487  MinMaxAvg * inout_rhs = static_cast<MinMaxAvg *>(inout_rhs_);
488 
489  for (int i = 0; i < *len; ++i)
490  {
491  inout_rhs[i].sum += in_lhs[i].sum;
492  if (inout_rhs[i].min > in_lhs[i].min)
493  {
494  inout_rhs[i].min = in_lhs[i].min;
495  inout_rhs[i].min_index = in_lhs[i].min_index;
496  }
497  else if (inout_rhs[i].min == in_lhs[i].min)
498  {
499  // choose lower cpu index when tied to make operator commutative
500  if (inout_rhs[i].min_index > in_lhs[i].min_index)
501  inout_rhs[i].min_index = in_lhs[i].min_index;
502  }
503 
504  if (inout_rhs[i].max < in_lhs[i].max)
505  {
506  inout_rhs[i].max = in_lhs[i].max;
507  inout_rhs[i].max_index = in_lhs[i].max_index;
508  }
509  else if (inout_rhs[i].max == in_lhs[i].max)
510  {
511  // choose lower cpu index when tied to make operator commutative
512  if (inout_rhs[i].max_index > in_lhs[i].max_index)
513  inout_rhs[i].max_index = in_lhs[i].max_index;
514  }
515  }
516  }
517  } // namespace
518 
519 
520 
521  void
523  const ArrayView<MinMaxAvg> & result,
524  const MPI_Comm & mpi_communicator)
525  {
526  // If MPI was not started, we have a serial computation and cannot run
527  // the other MPI commands
528  if (job_supports_mpi() == false ||
529  Utilities::MPI::n_mpi_processes(mpi_communicator) <= 1)
530  {
531  for (unsigned int i = 0; i < my_values.size(); ++i)
532  {
533  result[i].sum = my_values[i];
534  result[i].avg = my_values[i];
535  result[i].min = my_values[i];
536  result[i].max = my_values[i];
537  result[i].min_index = 0;
538  result[i].max_index = 0;
539  }
540  return;
541  }
542 
543  /*
544  * A custom MPI datatype handle describing the memory layout of the
545  * MinMaxAvg struct. Initialized on first pass control reaches the
546  * static variable. So hopefully not initialized too early.
547  */
548  static MPI_Datatype type = []() {
549  MPI_Datatype type;
550 
551  int lengths[] = {3, 2, 1};
552 
553  MPI_Aint displacements[] = {0,
554  offsetof(MinMaxAvg, min_index),
555  offsetof(MinMaxAvg, avg)};
556 
557  MPI_Datatype types[] = {MPI_DOUBLE, MPI_INT, MPI_DOUBLE};
558 
559  int ierr =
560  MPI_Type_create_struct(3, lengths, displacements, types, &type);
561  AssertThrowMPI(ierr);
562 
563  ierr = MPI_Type_commit(&type);
564  AssertThrowMPI(ierr);
565 
566  /* Ensure that we free the allocated datatype again at the end of
567  * the program run just before we call MPI_Finalize():*/
568  MPI_InitFinalize::signals.at_mpi_finalize.connect([type]() mutable {
569  int ierr = MPI_Type_free(&type);
570  AssertThrowMPI(ierr);
571  });
572 
573  return type;
574  }();
575 
576  /*
577  * A custom MPI op handle for our max_reduce function.
578  * Initialized on first pass control reaches the static variable. So
579  * hopefully not initialized too early.
580  */
581  static MPI_Op op = []() {
582  MPI_Op op;
583 
584  int ierr =
585  MPI_Op_create(reinterpret_cast<MPI_User_function *>(&max_reduce),
586  static_cast<int>(true),
587  &op);
588  AssertThrowMPI(ierr);
589 
590  /* Ensure that we free the allocated op again at the end of the
591  * program run just before we call MPI_Finalize():*/
592  MPI_InitFinalize::signals.at_mpi_finalize.connect([op]() mutable {
593  int ierr = MPI_Op_free(&op);
594  AssertThrowMPI(ierr);
595  });
596 
597  return op;
598  }();
599 
600  AssertDimension(Utilities::MPI::min(my_values.size(), mpi_communicator),
601  Utilities::MPI::max(my_values.size(), mpi_communicator));
602 
603  AssertDimension(my_values.size(), result.size());
604 
605  // To avoid uninitialized values on some MPI implementations, provide
606  // result with a default value already...
607  MinMaxAvg dummy = {0.,
609  std::numeric_limits<double>::lowest(),
610  0,
611  0,
612  0.};
613 
614  for (auto &i : result)
615  i = dummy;
616 
617  const unsigned int my_id =
618  ::Utilities::MPI::this_mpi_process(mpi_communicator);
619  const unsigned int numproc =
620  ::Utilities::MPI::n_mpi_processes(mpi_communicator);
621 
622  std::vector<MinMaxAvg> in(my_values.size());
623 
624  for (unsigned int i = 0; i < my_values.size(); ++i)
625  {
626  in[i].sum = in[i].min = in[i].max = my_values[i];
627  in[i].min_index = in[i].max_index = my_id;
628  }
629 
630  int ierr = MPI_Allreduce(
631  in.data(), result.data(), my_values.size(), type, op, mpi_communicator);
632  AssertThrowMPI(ierr);
633 
634  for (auto &r : result)
635  r.avg = r.sum / numproc;
636  }
637 
638 
639 #else
640 
641  unsigned int
642  n_mpi_processes(const MPI_Comm &)
643  {
644  return 1;
645  }
646 
647 
648 
649  unsigned int
650  this_mpi_process(const MPI_Comm &)
651  {
652  return 0;
653  }
654 
655 
656 
657  const std::vector<unsigned int>
658  mpi_processes_within_communicator(const MPI_Comm &, const MPI_Comm &)
659  {
660  return std::vector<unsigned int>{0};
661  }
662 
663 
664 
665  std::vector<IndexSet>
666  create_ascending_partitioning(const MPI_Comm & /*comm*/,
667  const IndexSet::size_type locally_owned_size)
668  {
669  return std::vector<IndexSet>(1, complete_index_set(locally_owned_size));
670  }
671 
672  IndexSet
673  create_evenly_distributed_partitioning(const MPI_Comm & /*comm*/,
674  const IndexSet::size_type total_size)
675  {
676  return complete_index_set(total_size);
677  }
678 
679 
680 
681  MPI_Comm
682  duplicate_communicator(const MPI_Comm &mpi_communicator)
683  {
684  return mpi_communicator;
685  }
686 
687 
688 
689  void
690  free_communicator(MPI_Comm & /*mpi_communicator*/)
691  {}
692 
693 
694 
695  void
696  min_max_avg(const ArrayView<const double> &my_values,
697  const ArrayView<MinMaxAvg> & result,
698  const MPI_Comm &)
699  {
700  AssertDimension(my_values.size(), result.size());
701 
702  for (unsigned int i = 0; i < my_values.size(); ++i)
703  {
704  result[i].sum = my_values[i];
705  result[i].avg = my_values[i];
706  result[i].min = my_values[i];
707  result[i].max = my_values[i];
708  result[i].min_index = 0;
709  result[i].max_index = 0;
710  }
711  }
712 
713 #endif
714 
715  /* Force initialization of static struct: */
716  MPI_InitFinalize::Signals MPI_InitFinalize::signals =
717  MPI_InitFinalize::Signals();
718 
719 
721  char **& argv,
722  const unsigned int max_num_threads)
723  {
724  static bool constructor_has_already_run = false;
725  (void)constructor_has_already_run;
726  Assert(constructor_has_already_run == false,
727  ExcMessage("You can only create a single object of this class "
728  "in a program since it initializes the MPI system."));
729 
730 
731  int ierr = 0;
732 #ifdef DEAL_II_WITH_MPI
733  // if we have PETSc, we will initialize it and let it handle MPI.
734  // Otherwise, we will do it.
735  int MPI_has_been_started = 0;
736  ierr = MPI_Initialized(&MPI_has_been_started);
737  AssertThrowMPI(ierr);
738  AssertThrow(MPI_has_been_started == 0,
739  ExcMessage("MPI error. You can only start MPI once!"));
740 
741  int provided;
742  // this works like ierr = MPI_Init (&argc, &argv); but tells MPI that
743  // we might use several threads but never call two MPI functions at the
744  // same time. For an explanation see on why we do this see
745  // http://www.open-mpi.org/community/lists/users/2010/03/12244.php
746  int wanted = MPI_THREAD_SERIALIZED;
747  ierr = MPI_Init_thread(&argc, &argv, wanted, &provided);
748  AssertThrowMPI(ierr);
749 
750  // disable for now because at least some implementations always return
751  // MPI_THREAD_SINGLE.
752  // Assert(max_num_threads==1 || provided != MPI_THREAD_SINGLE,
753  // ExcMessage("MPI reports that we are not allowed to use multiple
754  // threads."));
755 #else
756  // make sure the compiler doesn't warn about these variables
757  (void)argc;
758  (void)argv;
759  (void)ierr;
760 #endif
761 
762  // we are allowed to call MPI_Init ourselves and PETScInitialize will
763  // detect this. This allows us to use MPI_Init_thread instead.
764 #ifdef DEAL_II_WITH_PETSC
765 # ifdef DEAL_II_WITH_SLEPC
766  // Initialize SLEPc (with PETSc):
767  ierr = SlepcInitialize(&argc, &argv, nullptr, nullptr);
769 # else
770  // or just initialize PETSc alone:
771  ierr = PetscInitialize(&argc, &argv, nullptr, nullptr);
772  AssertThrow(ierr == 0, ExcPETScError(ierr));
773 # endif
774 
775  // Disable PETSc exception handling. This just prints a large wall
776  // of text that is not particularly helpful for what we do:
777  PetscPopSignalHandler();
778 #endif
779 
780  // Initialize zoltan
781 #ifdef DEAL_II_TRILINOS_WITH_ZOLTAN
782  float version;
783  Zoltan_Initialize(argc, argv, &version);
784 #endif
785 
786 #ifdef DEAL_II_WITH_P4EST
787  // Initialize p4est and libsc components
788 # if DEAL_II_P4EST_VERSION_GTE(2, 5, 0, 0)
789  // This feature is broken in version 2.0.0 for calls to
790  // MPI_Comm_create_group (see cburstedde/p4est#30).
791  // Disabling it leads to more verbose p4est error messages
792  // which should be fine.
793  sc_init(MPI_COMM_WORLD, 0, 0, nullptr, SC_LP_SILENT);
794 # endif
795  p4est_init(nullptr, SC_LP_SILENT);
796 #endif
797 
798  constructor_has_already_run = true;
799 
800 
801  // Now also see how many threads we'd like to run
802  if (max_num_threads != numbers::invalid_unsigned_int)
803  {
804  // set maximum number of threads (also respecting the environment
805  // variable that the called function evaluates) based on what the
806  // user asked
807  MultithreadInfo::set_thread_limit(max_num_threads);
808  }
809  else
810  // user wants automatic choice
811  {
812 #ifdef DEAL_II_WITH_MPI
813  // we need to figure out how many MPI processes there are on the
814  // current node, as well as how many CPU cores we have. for the
815  // first task, check what get_hostname() returns and then do an
816  // allgather so each processor gets the answer
817  //
818  // in calculating the length of the string, don't forget the
819  // terminating \0 on C-style strings
820  const std::string hostname = Utilities::System::get_hostname();
821  const unsigned int max_hostname_size =
822  Utilities::MPI::max(hostname.size() + 1, MPI_COMM_WORLD);
823  std::vector<char> hostname_array(max_hostname_size);
824  std::copy(hostname.c_str(),
825  hostname.c_str() + hostname.size() + 1,
826  hostname_array.begin());
827 
828  std::vector<char> all_hostnames(max_hostname_size *
829  MPI::n_mpi_processes(MPI_COMM_WORLD));
830  const int ierr = MPI_Allgather(hostname_array.data(),
831  max_hostname_size,
832  MPI_CHAR,
833  all_hostnames.data(),
834  max_hostname_size,
835  MPI_CHAR,
836  MPI_COMM_WORLD);
837  AssertThrowMPI(ierr);
838 
839  // search how often our own hostname appears and the how-manyth
840  // instance the current process represents
841  unsigned int n_local_processes = 0;
842  unsigned int nth_process_on_host = 0;
843  for (unsigned int i = 0; i < MPI::n_mpi_processes(MPI_COMM_WORLD);
844  ++i)
845  if (std::string(all_hostnames.data() + i * max_hostname_size) ==
846  hostname)
847  {
848  ++n_local_processes;
849  if (i <= MPI::this_mpi_process(MPI_COMM_WORLD))
850  ++nth_process_on_host;
851  }
852  Assert(nth_process_on_host > 0, ExcInternalError());
853 
854 
855  // compute how many cores each process gets. if the number does not
856  // divide evenly, then we get one more core if we are among the
857  // first few processes
858  //
859  // if the number would be zero, round up to one since every process
860  // needs to have at least one thread
861  const unsigned int n_threads =
862  std::max(MultithreadInfo::n_cores() / n_local_processes +
863  (nth_process_on_host <=
864  MultithreadInfo::n_cores() % n_local_processes ?
865  1 :
866  0),
867  1U);
868 #else
869  const unsigned int n_threads = MultithreadInfo::n_cores();
870 #endif
871 
872  // finally set this number of threads
874  }
875 
876  // As a final step call the at_mpi_init() signal handler.
878  }
879 
880 
881 
882  void
884  {
885  // insert if it is not in the set already:
886  requests.insert(&request);
887  }
888 
889 
890 
891  void
893  {
894  Assert(
895  requests.find(&request) != requests.end(),
896  ExcMessage(
897  "You tried to call unregister_request() with an invalid request."));
898 
899  requests.erase(&request);
900  }
901 
902 
903 
904  std::set<MPI_Request *> MPI_InitFinalize::requests;
905 
906 
907 
909  {
910  // First, call the at_mpi_finalize() signal handler.
912 
913  // make memory pool release all PETSc/Trilinos/MPI-based vectors that
914  // are no longer used at this point. this is relevant because the static
915  // object destructors run for these vectors at the end of the program
916  // would run after MPI_Finalize is called, leading to errors
917 
918 #ifdef DEAL_II_WITH_MPI
919  // Before exiting, wait for nonblocking communication to complete:
920  for (auto request : requests)
921  {
922  const int ierr = MPI_Wait(request, MPI_STATUS_IGNORE);
923  AssertThrowMPI(ierr);
924  }
925 
926  // Start with deal.II MPI vectors and delete vectors from the pools:
928  LinearAlgebra::distributed::Vector<double>>::release_unused_memory();
930  release_unused_memory();
932  LinearAlgebra::distributed::Vector<float>>::release_unused_memory();
934  release_unused_memory();
935 
936  // Next with Trilinos:
937 # ifdef DEAL_II_WITH_TRILINOS
939  TrilinosWrappers::MPI::Vector>::release_unused_memory();
941  TrilinosWrappers::MPI::BlockVector>::release_unused_memory();
942 # endif
943 #endif
944 
945 
946  // Now deal with PETSc (with or without MPI). Only delete the vectors if
947  // finalize hasn't been called yet, otherwise this will lead to errors.
948 #ifdef DEAL_II_WITH_PETSC
949  if ((PetscInitializeCalled == PETSC_TRUE) &&
950  (PetscFinalizeCalled == PETSC_FALSE))
951  {
953  PETScWrappers::MPI::Vector>::release_unused_memory();
955  PETScWrappers::MPI::BlockVector>::release_unused_memory();
956 
957 # ifdef DEAL_II_WITH_SLEPC
958  // and now end SLEPc (with PETSc)
959  SlepcFinalize();
960 # else
961  // or just end PETSc.
962  PetscFinalize();
963 # endif
964  }
965 #endif
966 
967 // There is a similar issue with CUDA: The destructor of static objects might
968 // run after the CUDA driver is unloaded. Hence, also release all memory
969 // related to CUDA vectors.
970 #ifdef DEAL_II_WITH_CUDA
973  release_unused_memory();
976  release_unused_memory();
977 #endif
978 
979 #ifdef DEAL_II_WITH_P4EST
980  // now end p4est and libsc
981  // Note: p4est has no finalize function
982  sc_finalize();
983 #endif
984 
985 
986  // only MPI_Finalize if we are running with MPI. We also need to do this
987  // when running PETSc, because we initialize MPI ourselves before
988  // calling PetscInitialize
989 #ifdef DEAL_II_WITH_MPI
990  if (job_supports_mpi() == true)
991  {
992 # if __cpp_lib_uncaught_exceptions >= 201411
993  // std::uncaught_exception() is deprecated in c++17
994  if (std::uncaught_exceptions() > 0)
995 # else
996  if (std::uncaught_exception() == true)
997 # endif
998  {
999  // do not try to call MPI_Finalize to avoid a deadlock.
1000  }
1001  else
1002  {
1003  const int ierr = MPI_Finalize();
1004  (void)ierr;
1005  AssertNothrow(ierr == MPI_SUCCESS, ::ExcMPI(ierr));
1006  }
1007  }
1008 #endif
1009  }
1010 
1011 
1012 
1013  bool
1015  {
1016 #ifdef DEAL_II_WITH_MPI
1017  int MPI_has_been_started = 0;
1018  const int ierr = MPI_Initialized(&MPI_has_been_started);
1019  AssertThrowMPI(ierr);
1020 
1021  return (MPI_has_been_started > 0);
1022 #else
1023  return false;
1024 #endif
1025  }
1026 
1027 
1028 
1029  std::vector<unsigned int>
1030  compute_index_owner(const IndexSet &owned_indices,
1031  const IndexSet &indices_to_look_up,
1032  const MPI_Comm &comm)
1033  {
1034  Assert(owned_indices.size() == indices_to_look_up.size(),
1035  ExcMessage("IndexSets have to have the same sizes."));
1036 
1037  Assert(
1038  owned_indices.size() == Utilities::MPI::max(owned_indices.size(), comm),
1039  ExcMessage("IndexSets have to have the same size on all processes."));
1040 
1041  std::vector<unsigned int> owning_ranks(indices_to_look_up.n_elements());
1042 
1043  // Step 1: setup dictionary
1044  // The input owned_indices can be partitioned arbitrarily. In the
1045  // dictionary, the index set is statically repartitioned among the
1046  // processes again and extended with information with the actual owner
1047  // of that the index.
1049  owned_indices, indices_to_look_up, comm, owning_ranks);
1050 
1051  // Step 2: read dictionary
1052  // Communicate with the process who owns the index in the static
1053  // partition (i.e. in the dictionary). This process returns the actual
1054  // owner of the index.
1056  std::vector<
1057  std::pair<types::global_dof_index, types::global_dof_index>>,
1058  std::vector<unsigned int>>
1059  consensus_algorithm(process, comm);
1060  consensus_algorithm.run();
1061 
1062  return owning_ranks;
1063  }
1064 
1065 
1066 
1068  : locked(false)
1069  , request(MPI_REQUEST_NULL)
1070  {
1072  }
1073 
1074 
1075 
1077  {
1078  Assert(
1079  !locked,
1080  ExcMessage(
1081  "Error: MPI::CollectiveMutex is still locked while being destroyed!"));
1082 
1084  }
1085 
1086 
1087 
1088  void
1089  CollectiveMutex::lock(const MPI_Comm &comm)
1090  {
1091  (void)comm;
1092 
1093  Assert(
1094  !locked,
1095  ExcMessage(
1096  "Error: MPI::CollectiveMutex needs to be unlocked before lock()"));
1097 
1098 #ifdef DEAL_II_WITH_MPI
1099 
1100  // TODO: For now, we implement this mutex with a blocking barrier
1101  // in the lock and unlock. It needs to be tested, if we can move
1102  // to a nonblocking barrier (code disabled below).
1103 
1104  const int ierr = MPI_Barrier(comm);
1105  AssertThrowMPI(ierr);
1106 
1107 # if 0
1108  // wait for non-blocking barrier to finish. This is a noop the
1109  // first time we lock().
1110  const int ierr = MPI_Wait(&request, MPI_STATUS_IGNORE);
1111  AssertThrowMPI(ierr);
1112 # else
1113  // nothing to do as blocking barrier already completed
1114 # endif
1115 #endif
1116 
1117  locked = true;
1118  }
1119 
1120 
1121 
1122  void
1124  {
1125  (void)comm;
1126 
1127  Assert(
1128  locked,
1129  ExcMessage(
1130  "Error: MPI::CollectiveMutex needs to be locked before unlock()"));
1131 
1132 #ifdef DEAL_II_WITH_MPI
1133 
1134  // TODO: For now, we implement this mutex with a blocking barrier
1135  // in the lock and unlock. It needs to be tested, if we can move
1136  // to a nonblocking barrier (code disabled below):
1137 # if 0
1138  const int ierr = MPI_Ibarrier(comm, &request);
1139  AssertThrowMPI(ierr);
1140 # else
1141  const int ierr = MPI_Barrier(comm);
1142  AssertThrowMPI(ierr);
1143 # endif
1144 #endif
1145 
1146  locked = false;
1147  }
1148 
1149 
1150 #ifndef DOXYGEN
1151  // explicit instantiations
1152 
1153  // booleans aren't in MPI_SCALARS
1154  template bool
1155  reduce(const bool &,
1156  const MPI_Comm &,
1157  const std::function<bool(const bool &, const bool &)> &,
1158  const unsigned int);
1159 
1160  template std::vector<bool>
1161  reduce(const std::vector<bool> &,
1162  const MPI_Comm &,
1163  const std::function<std::vector<bool>(const std::vector<bool> &,
1164  const std::vector<bool> &)> &,
1165  const unsigned int);
1166 
1167  template bool
1168  all_reduce(const bool &,
1169  const MPI_Comm &,
1170  const std::function<bool(const bool &, const bool &)> &);
1171 
1172  template std::vector<bool>
1173  all_reduce(
1174  const std::vector<bool> &,
1175  const MPI_Comm &,
1176  const std::function<std::vector<bool>(const std::vector<bool> &,
1177  const std::vector<bool> &)> &);
1178 
1179  // We need an explicit instantiation of this for the same reason as the
1180  // other types described in mpi.inst.in
1181  template void
1182  internal::all_reduce<bool>(const MPI_Op &,
1183  const ArrayView<const bool> &,
1184  const MPI_Comm &,
1185  const ArrayView<bool> &);
1186 
1187 
1188  template bool
1189  logical_or<bool>(const bool &, const MPI_Comm &);
1190 
1191 
1192  template void
1193  logical_or<bool>(const ArrayView<const bool> &,
1194  const MPI_Comm &,
1195  const ArrayView<bool> &);
1196 
1197 
1198  template std::vector<unsigned int>
1199  compute_set_union(const std::vector<unsigned int> &vec,
1200  const MPI_Comm & comm);
1201 
1202 
1203  template std::set<unsigned int>
1204  compute_set_union(const std::set<unsigned int> &set, const MPI_Comm &comm);
1205 #endif
1206 
1207 #include "mpi.inst"
1208  } // end of namespace MPI
1209 } // end of namespace Utilities
1210 
value_type * data() const noexcept
Definition: array_view.h:553
std::size_t size() const
Definition: array_view.h:576
size_type size() const
Definition: index_set.h:1636
size_type n_elements() const
Definition: index_set.h:1834
void add_range(const size_type begin, const size_type end)
Definition: index_set.h:1675
types::global_dof_index size_type
Definition: index_set.h:80
static unsigned int n_cores()
static void set_thread_limit(const unsigned int max_threads=numbers::invalid_unsigned_int)
void unlock(const MPI_Comm &comm)
Definition: mpi.cc:1123
void lock(const MPI_Comm &comm)
Definition: mpi.cc:1089
static void unregister_request(MPI_Request &request)
Definition: mpi.cc:892
static std::set< MPI_Request * > requests
Definition: mpi.h:1103
static Signals signals
Definition: mpi.h:1097
MPI_InitFinalize(int &argc, char **&argv, const unsigned int max_num_threads=numbers::invalid_unsigned_int)
Definition: mpi.cc:720
static void register_request(MPI_Request &request)
Definition: mpi.cc:883
#define DEAL_II_NAMESPACE_OPEN
Definition: config.h:442
#define DEAL_II_NAMESPACE_CLOSE
Definition: config.h:443
__global__ void set(Number *val, const Number s, const size_type N)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcSLEPcError(int arg1)
#define Assert(cond, exc)
Definition: exceptions.h:1473
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1667
#define AssertThrowMPI(error_code)
Definition: exceptions.h:1790
#define AssertNothrow(cond, exc)
Definition: exceptions.h:1536
#define AssertIndexRange(index, range)
Definition: exceptions.h:1732
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
Definition: exceptions.h:1583
IndexSet complete_index_set(const IndexSet::size_type N)
Definition: index_set.h:1014
VectorType::value_type * begin(VectorType &V)
VectorType::value_type * end(VectorType &V)
int Type_contiguous_c(MPI_Count count, MPI_Datatype oldtype, MPI_Datatype *newtype)
@ compute_point_to_point_communication_pattern
Utilities::MPI::compute_point_to_point_communication_pattern()
Definition: mpi_tags.h:57
template const MPI_Datatype mpi_type_id_for_type< int >
Definition: mpi.cc:99
void free_communicator(MPI_Comm &mpi_communicator)
Definition: mpi.cc:194
std::vector< unsigned int > compute_index_owner(const IndexSet &owned_indices, const IndexSet &indices_to_look_up, const MPI_Comm &comm)
Definition: mpi.cc:1030
template const MPI_Datatype mpi_type_id_for_type< unsigned char >
Definition: mpi.cc:101
std::unique_ptr< MPI_Datatype, void(*)(MPI_Datatype *)> create_mpi_data_type_n_bytes(const std::size_t n_bytes)
Definition: mpi.cc:255
template const MPI_Datatype mpi_type_id_for_type< signed char >
Definition: mpi.cc:97
template const MPI_Datatype mpi_type_id_for_type< bool >
Definition: mpi.cc:95
template const MPI_Datatype mpi_type_id_for_type< unsigned short >
Definition: mpi.cc:102
std::vector< T > compute_set_union(const std::vector< T > &vec, const MPI_Comm &comm)
template const MPI_Datatype mpi_type_id_for_type< short >
Definition: mpi.cc:98
unsigned int compute_n_point_to_point_communications(const MPI_Comm &mpi_comm, const std::vector< unsigned int > &destinations)
Definition: mpi.cc:413
T all_reduce(const T &local_value, const MPI_Comm &comm, const std::function< T(const T &, const T &)> &combiner)
std::vector< IndexSet > create_ascending_partitioning(const MPI_Comm &comm, const IndexSet::size_type locally_owned_size)
Definition: mpi.cc:217
IndexSet create_evenly_distributed_partitioning(const MPI_Comm &comm, const IndexSet::size_type total_size)
Definition: mpi.cc:241
template const MPI_Datatype mpi_type_id_for_type< double >
Definition: mpi.cc:106
unsigned int this_mpi_process(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:151
std::vector< unsigned int > compute_point_to_point_communication_pattern(const MPI_Comm &mpi_comm, const std::vector< unsigned int > &destinations)
Definition: mpi.cc:298
template const MPI_Datatype mpi_type_id_for_type< char >
Definition: mpi.cc:96
template const MPI_Datatype mpi_type_id_for_type< long double >
Definition: mpi.cc:107
template const MPI_Datatype mpi_type_id_for_type< unsigned long long int >
Definition: mpi.cc:104
template const MPI_Datatype mpi_type_id_for_type< unsigned long int >
Definition: mpi.cc:103
T min(const T &t, const MPI_Comm &mpi_communicator)
T reduce(const T &local_value, const MPI_Comm &comm, const std::function< T(const T &, const T &)> &combiner, const unsigned int root_process=0)
bool job_supports_mpi()
Definition: mpi.cc:1014
MPI_Comm duplicate_communicator(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:183
const std::vector< unsigned int > mpi_processes_within_communicator(const MPI_Comm &comm_large, const MPI_Comm &comm_small)
Definition: mpi.cc:163
unsigned int n_mpi_processes(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:140
std::vector< T > all_gather(const MPI_Comm &comm, const T &object_to_send)
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
Definition: mpi.cc:114
template const MPI_Datatype mpi_type_id_for_type< long int >
Definition: mpi.cc:100
T max(const T &t, const MPI_Comm &mpi_communicator)
template const MPI_Datatype mpi_type_id_for_type< float >
Definition: mpi.cc:105
int create_group(const MPI_Comm &comm, const MPI_Group &group, const int tag, MPI_Comm *new_comm)
Definition: mpi.cc:204
std::string get_hostname()
Definition: utilities.cc:1001
IndexSet create_evenly_distributed_partitioning(const unsigned int my_partition_id, const unsigned int n_partitions, const IndexSet::size_type total_size)
Definition: mpi.cc:74
void copy(const T *begin, const T *end, U *dest)
static const unsigned int invalid_unsigned_int
Definition: types.h:201
Definition: types.h:32
boost::signals2::signal< void()> at_mpi_init
Definition: mpi.h:1086
boost::signals2::signal< void()> at_mpi_finalize
Definition: mpi.h:1094
const MPI_Comm & comm