Sebastian Ohlmann
--- a/src/basic/multicomm.F90

+ 50

− 30
+++ b/src/basic/multicomm.F90

+ 50

− 30
 @@ -133,6 +133,8 @@ module multicomm_oct_m
 @@ -133,6 +133,8 @@ module multicomm_oct_m
    integer, public  :: master_comm      !< The communicator without slaves.
    integer          :: master_comm_rank !< The rank in the communicator without slaves.
    integer, public  :: slave_intercomm  !< the intercomm to communicate with slaves
+    logical          :: reorder_ranks    !< do we reorder ranks in a more compact way?
  end type multicomm_t
  !> An all-pairs communication schedule for a given group.
 @@ -183,6 +185,18 @@ contains
 @@ -183,6 +185,18 @@ contains
    call messages_print_stress(stdout, "Parallelization")
+    !%Variable ReorderRanks
+    !%Default no
+    !%Type logical
+    !%Section Execution::Parallelization
+    !%Description
+    !% This variable controls whether the ranks are reorganized to have a more
+    !% compact distribution with respect to domain parallelization which needs
+    !% to communicate most often. Depending on the system, this can improve
+    !% communication speeds.
+    !%End
+    call parse_variable(namespace, 'ReorderRanks', .false., mc%reorder_ranks)
    call messages_obsolete_variable(namespace, 'ParallelizationStrategy')
    call messages_obsolete_variable(namespace, 'ParallelizationGroupRanks')
 @@ -596,40 +610,46 @@ contains
 @@ -596,40 +610,46 @@ contains
      mc%full_comm = MPI_COMM_NULL
      mc%slave_intercomm = MPI_COMM_NULL
      if(mc%par_strategy /= P_STRATEGY_SERIAL) then
-        ! first, reorder the ranks
+        if(mc%reorder_ranks) then
-        ! this is done to get a column-major ordering of the ranks in the
+          call messages_experimental('Reordering ranks')
-        ! Cartesian communicator, since they a ordered row-major otherwise
-        call MPI_Comm_group(base_grp%comm, base_group, mpi_err)
+          ! first, reorder the ranks
-        if(mpi_err /= MPI_SUCCESS) then
+          ! this is done to get a column-major ordering of the ranks in the
-          message(1) = "Error in getting MPI group!"
+          ! Cartesian communicator, since they a ordered row-major otherwise
-          call messages_fatal(1)
+          call MPI_Comm_group(base_grp%comm, base_group, mpi_err)
-        end if
+          if(mpi_err /= MPI_SUCCESS) then
-        ! now transpose the hypercube => get rank numbers in column-major order
+            message(1) = "Error in getting MPI group!"
-        nn = 1
+            call messages_fatal(1)
-        do ii = 1, mc%group_sizes(1)
+          end if
-          do jj = 1, mc%group_sizes(2)
+          ! now transpose the hypercube => get rank numbers in column-major order
-            do kk = 1, mc%group_sizes(3)
+          nn = 1
-              do ll = 1, mc%group_sizes(4)
+          do ii = 1, mc%group_sizes(1)
-                ranks(nn) = (ll-1)*mc%group_sizes(3)*mc%group_sizes(2)*mc%group_sizes(1) &
+            do jj = 1, mc%group_sizes(2)
-                          + (kk-1)*mc%group_sizes(2)*mc%group_sizes(1) &
+              do kk = 1, mc%group_sizes(3)
-                          + (jj-1)*mc%group_sizes(1) + ii - 1
+                do ll = 1, mc%group_sizes(4)
-                nn = nn + 1
+                  ranks(nn) = (ll-1)*mc%group_sizes(3)*mc%group_sizes(2)*mc%group_sizes(1) &
+                            + (kk-1)*mc%group_sizes(2)*mc%group_sizes(1) &
+                            + (jj-1)*mc%group_sizes(1) + ii - 1
+                  nn = nn + 1
+                end do
              end do
            end do
          end do
-        end do
+          call MPI_Group_incl(base_group, base_grp%size, ranks, reorder_group, mpi_err)
-        call MPI_Group_incl(base_group, base_grp%size, ranks, reorder_group, mpi_err)
+          if(mpi_err /= MPI_SUCCESS) then
-        if(mpi_err /= MPI_SUCCESS) then
+            message(1) = "Error in creating MPI group!"
-          message(1) = "Error in creating MPI group!"
+            call messages_fatal(1)
-          call messages_fatal(1)
+          end if
-        end if
+          ! now get the reordered communicator
-        ! now get the reordered communicator
+          call MPI_Comm_create(base_grp%comm, reorder_group, reorder_comm, mpi_err)
-        call MPI_Comm_create(base_grp%comm, reorder_group, reorder_comm, mpi_err)
+          if(mpi_err /= MPI_SUCCESS) then
-        if(mpi_err /= MPI_SUCCESS) then
+            message(1) = "Error in creating reordered communicator!"
-          message(1) = "Error in creating reordered communicator!"
+            call messages_fatal(1)
-          call messages_fatal(1)
+          end if
+          call mpi_grp_init(reorder_grp, reorder_comm)
+        else
+          call mpi_grp_copy(reorder_grp, base_grp)
        end if
-        call mpi_grp_init(reorder_grp, reorder_comm)
        ! Multilevel parallelization is organized in a hypercube. We
        ! use an MPI Cartesian topology to generate the communicators