Commit 1e68dc9e authored by degironc's avatar degironc

linear algebra parallelization made independend of its parent communicator by

introducing ortho_parent_comm to be used when addressing the whole group.
linear algebra is now distributed (in PW) inside the pool group (in CPV is left unchanged... are there pools in CPV?). 
mp_global sets ortho_comm as a sub-communicator of intra_pool_comm (used to be intra_bgrp_comm). It can be reverted 
to previous choice by commenting/uncommenting one line

tested on PW/example02 co.rx.in case (both Gamma and K=(000)) with 
  -np 8 -nd 4 -nb 2
that is using 2 bgrp (procs 0123 and 4567) and diagonalizing on 4 procs (0246).
tested also on 
  -np 4 -nd 4 -nb 2
that is using 2 bgrp (procs 01 and 34) and diagonalizing on 4 procs (0123).

some bgrp parallelization added to a few routines. global variables (evc,..) are NOT distributed but some local ones
are and more could be done.




git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/[email protected] c92efa57-630b-4861-b058-cf58834340f0
parent 1589b096
......@@ -213,11 +213,9 @@ MODULE control_flags
REAL(DP), PUBLIC :: &
ethr ! the convergence threshold for eigenvalues
INTEGER, PUBLIC :: &
isolve, &! Davidson or CG or ParO diagonalization
david, &! max dimension of subspace in Davidson diagonalization
isolve, &! Davidson or CG or DIIS diagonalization
max_cg_iter, &! maximum number of iterations in a CG di
diis_buff, &! dimension of the buffer in diis
diis_ndim ! dimension of reduced basis in DIIS
max_cg_iter ! maximum number of iterations in a CG call
LOGICAL, PUBLIC :: &
diago_full_acc = .FALSE. ! if true, empty eigenvalues have the same
! accuracy of the occupied ones
......
......@@ -940,7 +940,7 @@ MODULE input_parameters
diis_temp, diis_achmix, diis_g0chmix, diis_g1chmix, &
diis_nchmix, diis_nrot, diis_rothr, diis_ethr, diis_chguess, &
mixing_mode, mixing_beta, mixing_ndim, mixing_fixed_ns, &
tqr, diago_cg_maxiter, diago_david_ndim, diagonalization , &
tqr, diago_cg_maxiter, diago_david_ndim, diagonalization, &
startingpot, startingwfc , conv_thr, &
adaptive_thr, conv_thr_init, conv_thr_multi, &
diago_thr_init, n_inner, fermi_energy, rotmass, occmass, &
......
......@@ -37,6 +37,7 @@ MODULE mp_diag
INTEGER :: ortho_row_comm = 0 ! communicator for the ortho row group
INTEGER :: ortho_col_comm = 0 ! communicator for the ortho col group
INTEGER :: ortho_comm_id= 0 ! id of the ortho_comm
INTEGER :: ortho_parent_comm = 0 ! parent communicator from which ortho group has been created
!
#if defined __SCALAPACK
INTEGER :: me_blacs = 0 ! BLACS processor index starting from 0
......@@ -182,6 +183,10 @@ CONTAINS
!
CALL mp_comm_split ( comm_all, color, key, ortho_comm )
!
! and remember where it comes from
!
ortho_parent_comm = comm_all
!
! Computes coordinates of the processors, in row maior order
!
me_ortho1 = mp_rank( ortho_comm )
......
......@@ -74,7 +74,12 @@ CONTAINS
!
CALL mp_start_pools ( npool_, intra_image_comm )
CALL mp_start_bands ( nband_, ntg_, intra_pool_comm )
CALL mp_start_diag ( ndiag_, intra_bgrp_comm )
!
! linear algebra parallelization. comment/uncomment as desired
! one diag group per pool ( individual k-point level )
CALL mp_start_diag ( ndiag_, intra_pool_comm )
! used to be one diag group per bgrp
!CALL mp_start_diag ( ndiag_, intra_bgrp_comm )
!
RETURN
!
......
......@@ -20,12 +20,14 @@ SUBROUTINE atomic_wfc (ik, wfcatom)
USE basis, ONLY : natomwfc
USE gvect, ONLY : mill, eigts1, eigts2, eigts3, g
USE klist, ONLY : xk
USE wvfct, ONLY : npwx, npw, nbnd, igk
USE wvfct, ONLY : npwx, npw, igk
USE us, ONLY : tab_at, dq
USE uspp_param, ONLY : upf
USE noncollin_module, ONLY : noncolin, npol, angle1, angle2
USE spin_orb, ONLY : lspinorb, rot_ylm, fcoef, lmaxx, domag, &
starting_spin_angle
USE mp_bands, ONLY : inter_bgrp_comm, set_bgrp_indices
USE mp, ONLY : mp_sum
!
implicit none
!
......@@ -38,6 +40,7 @@ SUBROUTINE atomic_wfc (ik, wfcatom)
complex(DP), allocatable :: sk (:), aux(:)
complex(DP) :: kphase, lphase
real(DP) :: arg, px, ux, vx, wx
integer :: ig_start, ig_end
call start_clock ('atomic_wfc')
......@@ -49,8 +52,7 @@ SUBROUTINE atomic_wfc (ik, wfcatom)
!
nwfcm = MAXVAL ( upf(1:ntyp)%nwfc )
!
allocate ( ylm (npw,(lmax_wfc+1)**2), chiq(npw,nwfcm,ntyp), &
sk(npw), gk(3,npw), qg(npw) )
allocate ( ylm (npw,(lmax_wfc+1)**2), chiq(npw,nwfcm,ntyp), sk(npw), gk(3,npw), qg(npw) )
!
do ig = 1, npw
gk (1,ig) = xk(1, ik) + g(1, igk(ig) )
......@@ -62,10 +64,13 @@ SUBROUTINE atomic_wfc (ik, wfcatom)
! ylm = spherical harmonics
!
call ylmr2 ((lmax_wfc+1)**2, npw, gk, qg, ylm)
! from now to the end of the routine the ig loops are distributed across bgrp
call set_bgrp_indices(npw,ig_start,ig_end)
!
! set now q=|k+G| in atomic units
!
do ig = 1, npw
do ig = ig_start, ig_end
qg(ig) = sqrt(qg(ig))*tpiba
enddo
!
......@@ -76,7 +81,7 @@ SUBROUTINE atomic_wfc (ik, wfcatom)
do nt = 1, ntyp
do nb = 1, upf(nt)%nwfc
if ( upf(nt)%oc (nb) >= 0.d0) then
do ig = 1, npw
do ig = ig_start, ig_end
px = qg (ig) / dq - int (qg (ig) / dq)
ux = 1.d0 - px
vx = 2.d0 - px
......@@ -106,7 +111,7 @@ SUBROUTINE atomic_wfc (ik, wfcatom)
!
! sk is the structure factor
!
do ig = 1, npw
do ig = ig_start, ig_end
iig = igk (ig)
sk (ig) = kphase * eigts1 (mill (1,iig), na) * &
eigts2 (mill (2,iig), na) * &
......@@ -155,6 +160,9 @@ SUBROUTINE atomic_wfc (ik, wfcatom)
deallocate(aux, sk, chiq, ylm)
! collect results across bgrp
call mp_sum(wfcatom, inter_bgrp_comm)
call stop_clock ('atomic_wfc')
return
......@@ -185,7 +193,7 @@ CONTAINS
if (abs(rot_ylm(ind,n1)) > 1.d-8) &
aux(:)=aux(:)+rot_ylm(ind,n1)*ylm(:,ind1)
ENDDO
DO ig=1,npw
do ig = ig_start, ig_end
wfcatom (ig,is,n_starting_wfc) = lphase*fact(is)*&
sk(ig)*aux(ig)*chiq (ig, nb, nt)
END DO
......@@ -249,14 +257,14 @@ CONTAINS
n_starting_wfc = n_starting_wfc + 1
if (n_starting_wfc + 2*l+1 > natomwfc) call errore &
('atomic_wfc_nc', 'internal error: too many wfcs', 1)
DO ig=1,npw
do ig = ig_start, ig_end
aux(ig) = sk(ig)*ylm(ig,lm)*chiaux(ig)
END DO
!
! now, rotate wfc as needed
! first : rotation with angle alpha around (OX)
!
DO ig=1,npw
do ig = ig_start, ig_end
fup = cos(0.5d0*alpha)*aux(ig)
fdown = (0.d0,1.d0)*sin(0.5d0*alpha)*aux(ig)
!
......@@ -304,14 +312,14 @@ CONTAINS
n_starting_wfc = n_starting_wfc + 1
if (n_starting_wfc + 2*l+1 > natomwfc) call errore &
('atomic_wfc_nc', 'internal error: too many wfcs', 1)
DO ig=1,npw
do ig = ig_start, ig_end
aux(ig) = sk(ig)*ylm(ig,lm)*chiq(ig,nb,nt)
END DO
!
! now, rotate wfc as needed
! first : rotation with angle alpha around (OX)
!
DO ig=1,npw
do ig = ig_start, ig_end
fup = cos(0.5d0*alpha)*aux(ig)
fdown = (0.d0,1.d0)*sin(0.5d0*alpha)*aux(ig)
!
......@@ -353,7 +361,7 @@ CONTAINS
if (n_starting_wfc > natomwfc) call errore &
('atomic_wfc___', 'internal error: too many wfcs', 1)
!
DO ig = 1, npw
do ig = ig_start, ig_end
wfcatom (ig, 1, n_starting_wfc) = lphase * &
sk (ig) * ylm (ig, lm) * chiq (ig, nb, nt)
ENDDO
......
!
! Copyright (C) 2001-2013 Quantum ESPRESSO group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
......@@ -179,8 +179,9 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
USE becmod, ONLY : bec_type, becp, calbec, &
allocate_bec_type, deallocate_bec_type
USE klist, ONLY : nks
USE mp_bands, ONLY : nproc_bgrp, intra_bgrp_comm
USE mp, ONLY : mp_sum
USE mp_bands, ONLY : nproc_bgrp, intra_bgrp_comm, inter_bgrp_comm, &
set_bgrp_indices, my_bgrp_id, root_bgrp, nbgrp
USE mp, ONLY : mp_sum, mp_bcast
!
IMPLICIT NONE
!
......@@ -194,7 +195,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
! number of iterations in Davidson
! number or repeated call to diagonalization in case of non convergence
! number of notconverged elements
INTEGER :: ierr, ipw
INTEGER :: ierr, ipw, ibnd, ibnd_start, ibnd_end
!
LOGICAL :: lrot
! .TRUE. if the wfc have already be rotated
......@@ -214,7 +215,7 @@ SUBROUTINE diag_bands( iter, ik, avg_iter )
IF ( nbndx > ipw ) &
CALL errore ( 'diag_bands', 'too many bands, or too few plane waves',1)
!
CALL allocate_bec_type ( nkb, nbnd, becp, intra_bgrp_comm )
CALL allocate_bec_type ( nkb, nbnd, becp, intra_bgrp_comm )
!
IF ( gamma_only ) THEN
!
......@@ -266,8 +267,7 @@ CONTAINS
!
FORALL( ig = 1 : npw )
!
h_diag(ig,1) = 1.D0 + g2kin(ig) + &
SQRT( 1.D0 + ( g2kin(ig) - 1.D0 )**2 )
h_diag(ig,1) = 1.D0 + g2kin(ig) + SQRT( 1.D0 + ( g2kin(ig) - 1.D0 )**2 )
!
END FORALL
!
......@@ -279,8 +279,7 @@ CONTAINS
!
IF ( .NOT. lrot ) THEN
!
CALL rotate_wfc ( npwx, npw, nbnd, gstart, nbnd, &
evc, npol, okvan, evc, et(1,ik) )
CALL rotate_wfc ( npwx, npw, nbnd, gstart, nbnd, evc, npol, okvan, evc, et(1,ik) )
!
avg_iter = avg_iter + 1.D0
!
......@@ -319,6 +318,8 @@ CONTAINS
!
IF ( use_para_diag ) then
!
! ! make sure that all processors have the same wfc
! IF ( nbgrp > 1 ) CALL mp_bcast(evc,root_bgrp,inter_bgrp_comm)
CALL pregterg( npw, npwx, nbnd, nbndx, evc, ethr, &
okvan, gstart, et(1,ik), btype(1,ik), &
notconv, lrot, dav_iter )
......@@ -356,7 +357,7 @@ CONTAINS
!
! ... here the local variables
!
INTEGER :: ipol, ierr
INTEGER :: ipol
REAL(dp) :: eps
! --- Define a small number ---
eps=0.000001d0
......@@ -403,8 +404,7 @@ CONTAINS
!
FORALL( ig = 1 : npwx )
!
h_diag(ig,:) = 1.D0 + g2kin(ig) + &
SQRT( 1.D0 + ( g2kin(ig) - 1.D0 )**2 )
h_diag(ig,:) = 1.D0 + g2kin(ig) + SQRT( 1.D0 + ( g2kin(ig) - 1.D0 )**2 )
!
END FORALL
!
......@@ -416,8 +416,7 @@ CONTAINS
!
IF ( .NOT. lrot ) THEN
!
CALL rotate_wfc ( npwx, npw, nbnd, gstart, nbnd, &
evc, npol, okvan, evc, et(1,ik) )
CALL rotate_wfc ( npwx, npw, nbnd, gstart, nbnd, evc, npol, okvan, evc, et(1,ik) )
!
avg_iter = avg_iter + 1.D0
!
......
......@@ -1089,7 +1089,7 @@ CONTAINS
vtmp(:,1:notcl) = vl(:,1:notcl)
END IF
CALL mp_bcast( vtmp(:,1:notcl), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:notcl), root, ortho_comm )
!
IF ( uspp ) THEN
!
......@@ -1158,14 +1158,14 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vl(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, &
psi(1,1,ir), kdmx, vl, nx, beta, evc(1,1,ic), kdmx )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, &
psi(1,1,ir), kdmx, vtmp, nx, beta, evc(1,1,ic), kdmx )
END IF
......@@ -1216,14 +1216,14 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vl(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, &
spsi(1,1,ir), kdmx, vl, nx, beta, psi(1,1,nvec+ic), kdmx )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, &
spsi(1,1,ir), kdmx, vtmp, nx, beta, psi(1,1,nvec+ic), kdmx )
END IF
......@@ -1276,14 +1276,14 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vl(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, &
hpsi(1,1,ir), kdmx, vl, nx, beta, psi(1,1,nvec+ic), kdmx )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ONE, &
hpsi(1,1,ir), kdmx, vtmp, nx, beta, psi(1,1,nvec+ic), kdmx )
END IF
......@@ -1343,11 +1343,12 @@ CONTAINS
! accumulate result on dm of root proc.
CALL mp_root_sum( work, dm, root, intra_bgrp_comm )
CALL mp_root_sum( work, dm, root, ortho_comm )
END DO
!
END DO
if (nbgrp > 1) dm = dm/nbgrp
!
! The matrix is hermitianized using upper triangle
!
......@@ -1396,11 +1397,12 @@ CONTAINS
CALL ZGEMM( 'C', 'N', nr, nc, kdim, ONE, v( 1, 1, ir ), &
kdmx, w(1,1,ii), kdmx, ZERO, vtmp, nx )
if (nbgrp > 1) vtmp = vtmp/nbgrp
!
IF( (desc%active_node > 0) .AND. (ipr-1 == desc%myr) .AND. (ipc-1 == desc%myc) ) THEN
CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, intra_bgrp_comm )
CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, ortho_comm )
ELSE
CALL mp_root_sum( vtmp(:,1:nc), dm, root, intra_bgrp_comm )
CALL mp_root_sum( vtmp(:,1:nc), dm, root, ortho_comm )
END IF
END DO
......@@ -1427,7 +1429,7 @@ CONTAINS
e( i + ic - 1 ) = REAL( hl( i, i ) )
END DO
END IF
CALL mp_sum( e(1:nbase), intra_bgrp_comm )
CALL mp_sum( e(1:nbase), ortho_comm )
RETURN
END SUBROUTINE set_e_from_h
!
......
......@@ -5,7 +5,6 @@
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!
!----------------------------------------------------------------------------
SUBROUTINE h_1psi( lda, n, psi, hpsi, spsi )
!----------------------------------------------------------------------------
......@@ -14,16 +13,18 @@ SUBROUTINE h_1psi( lda, n, psi, hpsi, spsi )
! ... to a vector psi and puts the result in hpsi and spsi
! ... Wrapper routine - calls h_psi and s_psi
!
USE kinds, ONLY: DP
USE bp, ONLY: lelfield
USE noncollin_module, ONLY: noncolin, npol
USE realus, ONLY : real_space, invfft_orbital_gamma, fwfft_orbital_gamma, &
calbec_rs_gamma, s_psir_gamma, initialisation_level
! ... No bgrp parallelization here !
!
USE kinds, ONLY: DP
USE bp, ONLY: lelfield
USE noncollin_module, &
ONLY: npol
USE realus, ONLY : real_space, invfft_orbital_gamma, fwfft_orbital_gamma, &
calbec_rs_gamma, s_psir_gamma, initialisation_level
!
IMPLICIT NONE
!
INTEGER :: lda, n
INTEGER, INTENT(IN) :: lda, n
COMPLEX (DP) :: psi(lda*npol,1), hpsi(n), spsi(n,1)
!
!
......@@ -37,8 +38,8 @@ SUBROUTINE h_1psi( lda, n, psi, hpsi, spsi )
call s_psir_gamma(1,1)
call fwfft_orbital_gamma(spsi,1,1)
else
CALL h_psi( lda, n, 1, psi, hpsi )
CALL s_psi( lda, n, 1, psi, spsi )
CALL h_psi( lda, n, 1, psi, hpsi ) ! apply H to a single wfc (no bgrp parallelization here)
CALL s_psi( lda, n, 1, psi, spsi ) ! apply S to a single wfc (no bgrp parallelization here)
endif
!
CALL stop_clock( 'h_1psi' )
......
......@@ -39,7 +39,11 @@ SUBROUTINE h_psi( lda, n, m, psi, hpsi )
!
CALL start_clock( 'h_psi_bgrp' )
if (tbgrp .and. .not. exx_is_active() ) then
! if exx_is_active bgrp parallelization is already used in exx routines that are part of Hpsi !
! if m <= 1 there is nothing to distribute so we can avoid the communication step.
! moreover if a band by band diagonalization (such as ParO for instance) is used it may
! be useful/necessary to operate on different vectors independently.
if (tbgrp .and. .not. exx_is_active() .and. m > 1) then
hpsi(:,:) = (0.d0,0.d0)
call set_bgrp_indices(m,m_start,m_end)
if (m_end >= m_start) & !! at least one band in this band group
......
......@@ -130,6 +130,8 @@ atomic_wfc.o : ../../Modules/cell_base.o
atomic_wfc.o : ../../Modules/constants.o
atomic_wfc.o : ../../Modules/ions_base.o
atomic_wfc.o : ../../Modules/kind.o
atomic_wfc.o : ../../Modules/mp.o
atomic_wfc.o : ../../Modules/mp_bands.o
atomic_wfc.o : ../../Modules/noncol.o
atomic_wfc.o : ../../Modules/recvec.o
atomic_wfc.o : ../../Modules/uspp.o
......@@ -1179,6 +1181,11 @@ paw_symmetry.o : ../../Modules/noncol.o
paw_symmetry.o : ../../Modules/uspp.o
paw_symmetry.o : pwcom.o
paw_symmetry.o : symm_base.o
pcg.o : ../../Modules/control_flags.o
pcg.o : ../../Modules/io_global.o
pcg.o : ../../Modules/kind.o
pcg.o : ../../Modules/mp.o
pcg.o : ../../Modules/mp_bands.o
plugin_clean.o : ../../Modules/plugin_flags.o
plugin_clock.o : ../../Modules/io_global.o
plugin_clock.o : ../../Modules/plugin_flags.o
......@@ -1623,6 +1630,7 @@ setup.o : ../../Modules/io_files.o
setup.o : ../../Modules/io_global.o
setup.o : ../../Modules/ions_base.o
setup.o : ../../Modules/kind.o
setup.o : ../../Modules/mp_bands.o
setup.o : ../../Modules/mp_diag.o
setup.o : ../../Modules/mp_pools.o
setup.o : ../../Modules/noncol.o
......@@ -2014,6 +2022,7 @@ wfcinit.o : ../../Modules/control_flags.o
wfcinit.o : ../../Modules/io_files.o
wfcinit.o : ../../Modules/io_global.o
wfcinit.o : ../../Modules/kind.o
wfcinit.o : ../../Modules/mp.o
wfcinit.o : ../../Modules/mp_bands.o
wfcinit.o : ../../Modules/noncol.o
wfcinit.o : ../../Modules/random_numbers.o
......
......@@ -136,6 +136,7 @@ SUBROUTINE print_clock_pw()
CALL print_clock( 'add_vuspsi' )
CALL print_clock( 'vhpsi' )
CALL print_clock( 'h_psi_meta' )
CALL print_clock( 'h_1psi' )
!
WRITE( stdout, '(/5X,"General routines")' )
!
......
......@@ -479,7 +479,8 @@ SUBROUTINE pregterg( npw, npwx, nvec, nvecx, evc, ethr, &
USE kinds, ONLY : DP
USE io_global, ONLY : stdout
USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm, root_bgrp, nbgrp
USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho
USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id, leg_ortho, &
ortho_parent_comm
USE descriptors, ONLY : la_descriptor, descla_init, descla_local_dims
USE parallel_toolkit, ONLY : dsqmdst, dsqmcll, dsqmred, dsqmsym
USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum
......@@ -1057,7 +1058,7 @@ CONTAINS
vtmp(:,1:notcl) = vl(:,1:notcl)
END IF
CALL mp_bcast( vtmp(:,1:notcl), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:notcl), root, ortho_parent_comm )
!
IF ( uspp ) THEN
!
......@@ -1127,14 +1128,14 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
psi(1,ir), npwx2, vl, nx, beta, evc(1,ic), npwx2 )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
psi(1,ir), npwx2, vtmp, nx, beta, evc(1,ic), npwx2 )
END IF
......@@ -1185,14 +1186,14 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
spsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
spsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 )
END IF
......@@ -1245,14 +1246,14 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vl(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vl(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
hpsi(1,ir), npwx2, vl, nx, beta, psi(1,nvec+ic), npwx2 )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', npw2, nc, nr, 1.D0, &
hpsi(1,ir), npwx2, vtmp, nx, beta, psi(1,nvec+ic), npwx2 )
END IF
......@@ -1313,11 +1314,12 @@ CONTAINS
! accumulate result on dm of root proc.
CALL mp_root_sum( work, dm, root, intra_bgrp_comm )
CALL mp_root_sum( work, dm, root, ortho_parent_comm )
END DO
!
END DO
IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp
!
CALL dsqmsym( nbase, dm, nx, desc )
!
......@@ -1367,11 +1369,12 @@ CONTAINS
!
IF ( gstart == 2 ) &
CALL DGER( nr, nc, -1.D0, v( 1, ir ), npwx2, w(1,ii), npwx2, vtmp, nx )
IF (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) vtmp = vtmp/nbgrp
IF( (desc%active_node > 0) .AND. (ipr-1 == desc%myr) .AND. (ipc-1 == desc%myc) ) THEN
CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, intra_bgrp_comm )
CALL mp_root_sum( vtmp(:,1:nc), dm(:,icc:icc+nc-1), root, ortho_parent_comm )
ELSE
CALL mp_root_sum( vtmp(:,1:nc), dm, root, intra_bgrp_comm )
CALL mp_root_sum( vtmp(:,1:nc), dm, root, ortho_parent_comm )
END IF
......@@ -1399,7 +1402,7 @@ CONTAINS
e( i + ic - 1 ) = hl( i, i )
END DO
END IF
CALL mp_sum( e(1:nbase), intra_bgrp_comm )
CALL mp_sum( e(1:nbase), ortho_parent_comm )
RETURN
END SUBROUTINE set_e_from_h
!
......
......@@ -123,14 +123,13 @@ SUBROUTINE protate_wfc_gamma( npwx, npw, nstart, gstart, nbnd, psi, overlap, evc
!
USE kinds, ONLY : DP
USE control_flags, ONLY : gamma_only
USE mp_bands, ONLY : intra_bgrp_comm
USE mp_bands, ONLY : intra_bgrp_comm, nbgrp
USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id,&
leg_ortho
leg_ortho, ortho_parent_comm
USE descriptors, ONLY : la_descriptor, descla_init
USE parallel_toolkit, ONLY : dsqmred, dsqmdst, dsqmsym
USE parallel_toolkit, ONLY : dsqmsym
USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier
!
IMPLICIT NONE
!
......@@ -292,11 +291,13 @@ CONTAINS
! accumulate result on dm of root proc.
CALL mp_root_sum( work, dm, root, intra_bgrp_comm )
CALL mp_root_sum( work, dm, root, ortho_parent_comm )
END DO
!
END DO
if (ortho_parent_comm.ne.intra_bgrp_comm .and. nbgrp > 1) dm = dm/nbgrp
!
CALL dsqmsym( nstart, dm, nx, desc )
!
......@@ -337,13 +338,13 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vr(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vr(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', 2*npw, nc, nr, 1.D0, psi(1,ir), 2*npwx, vr, nx, beta, aux(1,ic), 2*npwx )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_parent_comm )
CALL DGEMM( 'N', 'N', 2*npw, nc, nr, 1.D0, psi(1,ir), 2*npwx, vtmp, nx, beta, aux(1,ic), 2*npwx )
END IF
!
......
......@@ -112,11 +112,11 @@ SUBROUTINE protate_wfc_k( npwx, npw, nstart, nbnd, npol, psi, overlap, evc, e )
! ... Subroutine with distributed matrices, written by Carlo Cavazzoni
!
USE kinds, ONLY : DP
USE mp_bands, ONLY : intra_bgrp_comm
USE mp_bands, ONLY : intra_bgrp_comm, nbgrp
USE mp_diag, ONLY : ortho_comm, np_ortho, me_ortho, ortho_comm_id,&
leg_ortho
USE descriptors, ONLY : descla_init , la_descriptor
USE parallel_toolkit, ONLY : zsqmred, zsqmher, zsqmdst
USE parallel_toolkit, ONLY : zsqmher
USE mp, ONLY : mp_bcast, mp_root_sum, mp_sum, mp_barrier
!
IMPLICIT NONE
......@@ -286,11 +286,12 @@ CONTAINS
CALL ZGEMM( 'C', 'N', nr, nc, kdim, ( 1.D0, 0.D0 ) , v(1,ir), kdmx, w(1,ic), kdmx, ( 0.D0, 0.D0 ), work, nx )
! accumulate result on dm of root proc.
CALL mp_root_sum( work, dm, root, intra_bgrp_comm )
CALL mp_root_sum( work, dm, root, ortho_comm )
END DO
!
END DO
if (nbgrp > 1) dm = dm/nbgrp
!
CALL zsqmher( nstart, dm, nx, desc )
!
......@@ -331,13 +332,13 @@ CONTAINS
!
! this proc sends his block
!
CALL mp_bcast( vc(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vc(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ( 1.D0, 0.D0 ), psi(1,ir), kdmx, vc, nx, beta, aux(1,ic), kdmx )
ELSE
!
! all other procs receive
!
CALL mp_bcast( vtmp(:,1:nc), root, intra_bgrp_comm )
CALL mp_bcast( vtmp(:,1:nc), root, ortho_comm )
CALL ZGEMM( 'N', 'N', kdim, nc, nr, ( 1.D0, 0.D0 ), psi(1,ir), kdmx, vtmp, nx, beta, aux(1,ic), kdmx )
END IF
!
......
......@@ -44,7 +44,11 @@ SUBROUTINE s_psi( lda, n, m, psi, spsi )
!
CALL start_clock( 's_psi_bgrp' )