tg_gather.f90 3.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
!----------------------------------------------------------------------------------------------------------------
!-real version 
SUBROUTINE tg_gather( dffts, v, tg_v )
  !
  USE fft_param
  USE fft_types,      ONLY : fft_type_descriptor

  IMPLICIT NONE

  TYPE(fft_type_descriptor), INTENT(in) :: dffts
  REAL(DP), INTENT(IN)  :: v(dffts%nnr)
  REAL(DP), INTENT(OUT) :: tg_v(dffts%nnr_tg)

  INTEGER :: nxyp, ir3, off, tg_off
  INTEGER :: i, nsiz, ierr

  nxyp   = dffts%nr1x*dffts%my_nr2p
  !
  !  The potential in v is distributed so that each Z-plane is shared among nproc2 processors.
  !  We collect the data of whole planes in tg_v to be used with task group distributed wfcs.
  !
  tg_v(:) = (0.d0,0.d0)
  do ir3 =1, dffts%my_nr3p
     off    = dffts%nr1x*dffts%my_nr2p*(ir3-1)
     tg_off = dffts%nr1x*dffts%nr2x   *(ir3-1) + dffts%nr1x*dffts%my_i0r2p
     tg_v(tg_off+1:tg_off+nxyp) = v(off+1:off+nxyp)
  end do
  !write (6,*) ' tg_v ', dffts%my_i0r2p, dffts%my_nr2p
  !write (6,'(20f12.7)') (v(dffts%my_i0r2p+i+dffts%nr1x*(i-1)), i=1,dffts%my_nr2p)
  !write (6,'(20f12.7)') (tg_v(i+dffts%nr1x*(i-1)), i=1,dffts%nr2x)
#if defined(__MPI)
!used to be   CALL mp_sum(tg_v, dffts%comm2 )
  nsiz =dffts%nnr_tg
  CALL MPI_ALLREDUCE( MPI_IN_PLACE, tg_v, nsiz, MPI_DOUBLE_PRECISION, MPI_SUM, dffts%comm2, ierr )
  IF( ierr /= 0 ) CALL fftx_error__( ' tg_gather ', ' MPI_ALLREDUCE ', abs( ierr ) )
!- could be done (more efficintly?) with an ALLgatherv but the loigc of the ALLREDUCE is simpler
!  CALL MPI_Allgatherv( v(1), nsiz, MPI_DOUBLE_PRECISION, &
!        tg_v(1), recv_cnt, recv_displ, MPI_DOUBLE_PRECISION, dffts%comm2, IERR)
!  IF( ierr /= 0 ) CALL fftx_error__( ' tg_gather ', ' MPI_Allgatherv ', abs( ierr ) )
#endif
  !write (6,'(20f12.7)') (tg_v(i+dffts%nr1x*(i-1)), i=1,dffts%nr1x)
  RETURN
END SUBROUTINE tg_gather

!-complex version of previous routine
SUBROUTINE tg_cgather( dffts, v, tg_v )
  !
  USE fft_param
  USE fft_types,      ONLY : fft_type_descriptor

  IMPLICIT NONE

  TYPE(fft_type_descriptor), INTENT(in) :: dffts
  COMPLEX(DP), INTENT(IN)  :: v(dffts%nnr)
  COMPLEX(DP), INTENT(OUT) :: tg_v(dffts%nnr_tg)

  INTEGER :: nxyp, ir3, off, tg_off
  INTEGER :: i, nsiz, ierr

  nxyp   = dffts%nr1x*dffts%my_nr2p
  !
  !  The potential in v is distributed so that each Z-plane is shared among nproc2 processors.
  !  We collect the data of whole planes in tg_v to be used with task group distributed wfcs.
  !
  tg_v(:) = (0.d0,0.d0)
  do ir3 =1, dffts%my_nr3p
     off    = dffts%nr1x*dffts%my_nr2p*(ir3-1)
     tg_off = dffts%nr1x*dffts%nr2x   *(ir3-1) + dffts%nr1x*dffts%my_i0r2p
     tg_v(tg_off+1:tg_off+nxyp) = v(off+1:off+nxyp)
  end do
  !write (6,*) ' tg_v ', dffts%my_i0r2p, dffts%my_nr2p
  !write (6,'(20f12.7)') (v(dffts%my_i0r2p+i+dffts%nr1x*(i-1)), i=1,dffts%my_nr2p)
  !write (6,'(20f12.7)') (tg_v(i+dffts%nr1x*(i-1)), i=1,dffts%nr2x)
#if defined(__MPI)
!used to be   CALL mp_sum(tg_v, dffts%comm2 )
  nsiz =2 * dffts%nnr_tg
  CALL MPI_ALLREDUCE( MPI_IN_PLACE, tg_v, nsiz, MPI_DOUBLE_PRECISION, MPI_SUM, dffts%comm2, ierr )
  IF( ierr /= 0 ) CALL fftx_error__( ' tg_gather ', ' MPI_ALLREDUCE ', abs( ierr ) )
!- could be done (more efficintly?) with an ALLgatherv but the loigc of the ALLREDUCE is simpler
!  CALL MPI_Allgatherv( v(1), nsiz, MPI_DOUBLE_PRECISION, &
!        tg_v(1), recv_cnt, recv_displ, MPI_DOUBLE_PRECISION, dffts%comm2, IERR)
!  IF( ierr /= 0 ) CALL fftx_error__( ' tg_gather ', ' MPI_Allgatherv ', abs( ierr ) )
#endif
  !write (6,'(20f12.7)') (tg_v(i+dffts%nr1x*(i-1)), i=1,dffts%nr1x)
  RETURN
END SUBROUTINE tg_cgather