Commit de2a01c4 authored by ccavazzoni's avatar ccavazzoni

- Task groups variables moved form FFT type to a new data type


git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@12626 c92efa57-630b-4861-b058-cf58834340f0
parent 510bc358
......@@ -117,7 +117,7 @@
USE cg_module, ONLY: tcg
USE cp_interfaces, ONLY: stress_kin, enkin
USE fft_interfaces, ONLY: fwfft, invfft
USE fft_base, ONLY: dffts, dfftp, dfft3d
USE fft_base, ONLY: dffts, dfftp, dfft3d, dtgs
USE cp_interfaces, ONLY: checkrho, ennl, calrhovan, dennl
USE cp_main_variables, ONLY: iprint_stdout, descla
USE wannier_base, ONLY: iwf
......@@ -510,15 +510,15 @@
REAL(DP), ALLOCATABLE :: tmp_rhos(:,:)
COMPLEX(DP), ALLOCATABLE :: aux(:)
ALLOCATE( psis( dffts%tg_nnr * dffts%nogrp ) )
ALLOCATE( aux( dffts%tg_nnr * dffts%nogrp ) )
ALLOCATE( psis( dtgs%tg_nnr * dtgs%nogrp ) )
ALLOCATE( aux( dtgs%tg_nnr * dtgs%nogrp ) )
!
ALLOCATE( tmp_rhos ( dffts%nr1x * dffts%nr2x * dffts%tg_npp( me_bgrp + 1 ), nspin ) )
ALLOCATE( tmp_rhos ( dffts%nr1x * dffts%nr2x * dtgs%tg_npp( me_bgrp + 1 ), nspin ) )
!
tmp_rhos = 0_DP
do i = 1, nbsp_bgrp, 2*dffts%nogrp
do i = 1, nbsp_bgrp, 2*dtgs%nogrp
!
! Initialize wave-functions in Fourier space (to be FFTed)
! The size of psis is nnr: which is equal to the total number
......@@ -541,7 +541,14 @@
!
eig_offset = 0
do eig_index = 1, 2*dffts%nogrp, 2
!!$omp parallel
!!$omp single
do eig_index = 1, 2*dtgs%nogrp, 2
!
!!$omp task default(none) &
!!$omp firstprivate( i, eig_offset, nbsp_bgrp, ngw, eig_index ) &
!!$omp shared( aux, c_bgrp, dffts )
!
! here we pack 2*nogrp electronic states in the psis array
! note that if nogrp == nproc_bgrp each proc perform a full 3D
......@@ -551,14 +558,18 @@
!
! The eig_index loop is executed only ONCE when NOGRP=1.
!
CALL c2psi( aux(eig_offset*dffts%tg_nnr+1), dffts%tg_nnr, &
CALL c2psi( aux(eig_offset*dtgs%tg_nnr+1), dtgs%tg_nnr, &
c_bgrp( 1, i+eig_index-1 ), c_bgrp( 1, i+eig_index ), ngw, 2 )
!
eig_offset = eig_offset + 1
!
ENDIF
!!$omp end task
!
eig_offset = eig_offset + 1
!
end do
!!$omp end single
!!$omp end parallel
!
! 2*NOGRP are trasformed at the same time
! psis: holds the fourier coefficients of the current proccesor
......@@ -567,18 +578,18 @@
! now redistribute data
!
!
IF( dffts%nogrp == dffts%nproc ) THEN
CALL pack_group_sticks( aux, psis, dffts )
CALL maps_sticks_to_3d( dffts, psis, SIZE(psis), aux, 2 )
IF( dtgs%nogrp == dtgs%nproc ) THEN
CALL pack_group_sticks( aux, psis, dtgs )
CALL maps_sticks_to_3d( dffts, dtgs, psis, SIZE(psis), aux, 2 )
CALL cfft3ds( aux, dfft3d%nr1, dfft3d%nr2, dfft3d%nr3, &
dfft3d%nr1x,dfft3d%nr2x,dfft3d%nr3x, 1, dfft3d%isind, dfft3d%iplw )
psis = aux
ELSE
!
CALL pack_group_sticks( aux, psis, dffts )
CALL fw_tg_cft3_z( psis, dffts, aux )
CALL fw_tg_cft3_scatter( psis, dffts, aux )
CALL fw_tg_cft3_xy( psis, dffts )
CALL pack_group_sticks( aux, psis, dtgs )
CALL fw_tg_cft3_z( psis, dffts, aux, dtgs )
CALL fw_tg_cft3_scatter( psis, dffts, aux, dtgs )
CALL fw_tg_cft3_xy( psis, dffts, dtgs )
END IF
#else
......@@ -587,7 +598,7 @@
CALL c2psi( psis, dffts%nnr, c_bgrp( 1, i ), c_bgrp( 1, i+1 ), ngw, 2 )
CALL invfft('Wave',psis, dffts )
CALL invfft('Wave', psis, dffts )
#endif
!
......@@ -598,8 +609,8 @@
!
! Compute the proper factor for each band
!
DO ii = 1, dffts%nogrp
IF( dffts%nolist( ii ) == me_bgrp ) EXIT
DO ii = 1, dtgs%nogrp
IF( dtgs%nolist( ii ) == me_bgrp ) EXIT
END DO
!
! Remember two bands are packed in a single array :
......@@ -640,11 +651,11 @@
!code this should be equal to the total number of planes
!
ir = dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 )
ir = dffts%nr1x*dffts%nr2x*dtgs%tg_npp( me_bgrp + 1 )
IF( ir > SIZE( psis ) ) &
CALL errore( ' rhoofr ', ' psis size too small ', ir )
do ir = 1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 )
do ir = 1, dffts%nr1x*dffts%nr2x*dtgs%tg_npp( me_bgrp + 1 )
tmp_rhos(ir,iss1) = tmp_rhos(ir,iss1) + sa1*( real(psis(ir)))**2
tmp_rhos(ir,iss2) = tmp_rhos(ir,iss2) + sa2*(aimag(psis(ir)))**2
end do
......@@ -660,8 +671,8 @@
! CALL MPI_REDUCE( rho(1+ioff*nr1*nr2,1), rhos(1,1), dffts%nnr, MPI_DOUBLE_PRECISION, MPI_SUM, ip-1, intra_bgrp_comm, ierr)
! ioff = ioff + dffts%npp( ip )
!END DO
IF ( dffts%nogrp > 1 ) THEN
CALL mp_sum( tmp_rhos, gid = dffts%ogrp_comm )
IF ( dtgs%nogrp > 1 ) THEN
CALL mp_sum( tmp_rhos, gid = dtgs%ogrp_comm )
ENDIF
!
!BRING CHARGE DENSITY BACK TO ITS ORIGINAL POSITION
......@@ -670,9 +681,9 @@
!orbital group then does a local copy (reshuffling) of its data
!
from = 1
DO ii = 1, dffts%nogrp
IF ( dffts%nolist( ii ) == me_bgrp ) EXIT !Exit the loop
from = from + dffts%nr1x*dffts%nr2x*dffts%npp( dffts%nolist( ii ) + 1 )! From where to copy initially
DO ii = 1, dtgs%nogrp
IF ( dtgs%nolist( ii ) == me_bgrp ) EXIT !Exit the loop
from = from + dffts%nr1x*dffts%nr2x*dffts%npp( dtgs%nolist( ii ) + 1 )! From where to copy initially
ENDDO
!
DO ir = 1, nspin
......@@ -682,7 +693,6 @@
DEALLOCATE( tmp_rhos )
DEALLOCATE( aux )
DEALLOCATE( psis )
!call errore('stop','qui',1) ! debug
RETURN
END SUBROUTINE loop_over_states
......
......@@ -10,7 +10,7 @@ SUBROUTINE exx_gs(nfi, c)
!
USE kinds, ONLY : DP
USE constants, ONLY : fpi
USE fft_base, ONLY : dffts, dfftp
USE fft_base, ONLY : dffts, dfftp, dtgs
USE mp, ONLY : mp_barrier, mp_sum
USE mp_global, ONLY : nproc_image, me_image, root_image, intra_image_comm, intra_bgrp_comm, me_bgrp
USE parallel_include
......@@ -886,7 +886,7 @@ SUBROUTINE exx_gs(nfi, c)
!
!-----------Zhaofeng's vpsil (local) to exx_potential (global) -----------
!
nogrp = dffts%nogrp
nogrp = dtgs%nogrp
!
ALLOCATE( sdispls(nproc_image), sendcount(nproc_image) ); sdispls=0; sendcount=0
ALLOCATE( rdispls(nproc_image), recvcount(nproc_image) ); rdispls=0; recvcount=0
......@@ -961,10 +961,10 @@ SUBROUTINE exx_gs(nfi, c)
#ifdef __MPI
!
DO ir=1,nproc_image/nogrp
CALL mp_barrier( dffts%ogrp_comm )
CALL mp_barrier( dtgs%ogrp_comm )
CALL MPI_ALLTOALLV(exx_tmp3(1,ir), sendcount1, sdispls1, MPI_DOUBLE_PRECISION, &
& exx_potential(1,ir),recvcount1, rdispls1, MPI_DOUBLE_PRECISION, &
& dffts%ogrp_comm, ierr)
& dtgs%ogrp_comm, ierr)
END DO
#endif
!
......
......@@ -36,6 +36,7 @@ MODULE exx_module
USE electrons_base, ONLY: nupdwn !number of states with up and down spin
USE fft_base, ONLY: dffts !FFT derived data type
USE fft_base, ONLY: dfftp !FFT derived data type
USE fft_base, ONLY: dtgs !FFT task groups
USE funct, ONLY: get_exx_fraction ! function to get exx_fraction value
USE funct, ONLY: stop_exx, start_exx
USE input_parameters, ONLY: ref_alat !alat of reference cell ..
......@@ -166,7 +167,7 @@ CONTAINS
#if defined(__OPENMP)
WRITE(stdout,'(5X,"OpenMP threads/MPI task",3X,I4)') omp_get_max_threads()
#endif
WRITE(stdout,'(5X,"Taskgroups ",3X,I7)') dffts%nogrp
WRITE(stdout,'(5X,"Taskgroups ",3X,I7)') dtgs%nogrp
!
! the fraction of exact exchange is stored here
!
......@@ -268,11 +269,11 @@ CONTAINS
!
END IF
!
IF((nproc_image.LE.nbsp).AND.(dffts%nogrp.GT.1)) CALL errore('exx_module','EXX calculation error : &
IF((nproc_image.LE.nbsp).AND.(dtgs%nogrp.GT.1)) CALL errore('exx_module','EXX calculation error : &
& use taskgroup (-ntg) = 1 when number of MPI tasks is less or equal to the number of electronic states',1)
!
! to fix this issue. see file exx_psi.f90, exx_gs.f90
IF(nproc_image.GT.nbsp.AND.MOD(dffts%nnr,dffts%nogrp).NE.0) CALL errore('exx_module','EXX calculation error : &
IF(nproc_image.GT.nbsp.AND.MOD(dffts%nnr,dtgs%nogrp).NE.0) CALL errore('exx_module','EXX calculation error : &
& (nr1x * nr2x) is not integer multiple of the number of task groups. Change task groups such that &
& (nr1x * nr2x) becomes integer multiple of the number of task groups. Otherwise restrict number of MPI tasks &
& up to the number of electronic states.',1)
......@@ -283,7 +284,7 @@ CONTAINS
& or equal to the electronic bands. Otherwise, change ecutwfc to make (nr1x * nr2x) an even number.',1)
!
! to fix this issue. see file exx_psi.f90, exx_gs.f90
IF((nproc_image.GT.nbsp).AND.MOD(nbsp,2*dffts%nogrp).NE.0) CALL errore('exx_module','EXX calculation error : &
IF((nproc_image.GT.nbsp).AND.MOD(nbsp,2*dtgs%nogrp).NE.0) CALL errore('exx_module','EXX calculation error : &
& number of electronic states is not integer multiple of two times the number of task groups. &
& Either change the number of taskgroups or restrict number of MPI tasks up to the number of electronic states.',1)
!
......@@ -301,7 +302,7 @@ CONTAINS
write(stdout,*) "You may want to use number of MPI tasks = ", CEILING(DBLE(2.0*dfftp%nr3)/DBLE(nbsp))*nbsp,&
& "(combined with -ntg 2)"
!
ELSE IF (NINT(2**(LOG(DBLE(INT(nproc_image / dfftp%nr3))) / LOG(2.0))).GT.dffts%nogrp) THEN
ELSE IF (NINT(2**(LOG(DBLE(INT(nproc_image / dfftp%nr3))) / LOG(2.0))).GT.dtgs%nogrp) THEN
!
write(stdout,*)
write(stdout,*) "**********************************************************************************************"
......@@ -311,7 +312,7 @@ CONTAINS
NINT(2**(LOG(DBLE(INT(nproc_image / dfftp%nr3))) / LOG(2.0)))
END IF
!
IF(dffts%nogrp.EQ.1) THEN
IF(dtgs%nogrp.EQ.1) THEN
!
write(stdout,*)
write(stdout,*) "**********************************************************************************************"
......@@ -328,9 +329,9 @@ CONTAINS
& One needs number of task groups = 2^n where n is a positive integer when number of MPI tasks is greater than &
& the number of electronic states. See above for Possible Solutions',1)
!
ELSE IF (NINT(2**(LOG(DBLE(dffts%nogrp)) / LOG(2.0))).NE.dffts%nogrp) THEN
ELSE IF (NINT(2**(LOG(DBLE(dtgs%nogrp)) / LOG(2.0))).NE.dtgs%nogrp) THEN
!
! NINT(2**(LOG(DBLE(dffts%nogrp)) / LOG(2.0))) is the largest power of 2 that is smaller or equal to dffts%nogrp
! NINT(2**(LOG(DBLE(dtgs%nogrp)) / LOG(2.0))) is the largest power of 2 that is smaller or equal to dffts%nogrp
!
CALL errore('exx_module','EXX calculation error : &
& One needs number of task groups = 2^n where n is a positive integer when number of MPI tasks is greater than &
......@@ -339,7 +340,7 @@ CONTAINS
!
END IF
!
IF((dffts%nogrp.GT.1).AND.(dfftp%nr3*dffts%nogrp.GT.nproc_image)) CALL errore('exx_module','EXX calculation error : &
IF((dtgs%nogrp.GT.1).AND.(dfftp%nr3*dtgs%nogrp.GT.nproc_image)) CALL errore('exx_module','EXX calculation error : &
& (nr3x * number of taskgroups) is greater than the number of MPI tasks. Change the number of MPI tasks or the number &
& of taskgroups or both. To estimate ntg, find the value of nr3x in the output and compute (MPI task/nr3x) and take &
& the integer value.',1)
......@@ -382,9 +383,9 @@ CONTAINS
!
IF ( dffts%have_task_groups ) THEN
!
ALLOCATE( exx_potential(dffts%nnr,nproc_image/dffts%nogrp) )
ALLOCATE( exx_potential(dffts%nnr,nproc_image/dtgs%nogrp) )
!
IF(MOD(nproc_image,dffts%nogrp).NE.0) CALL errore &
IF(MOD(nproc_image,dtgs%nogrp).NE.0) CALL errore &
& ('exx_module','EXX calculation is not working when &
& number of MPI tasks (nproc_image) is not integer multiple of number of taskgroups',1)
!
......
......@@ -8,7 +8,7 @@ SUBROUTINE exx_psi(c, psitot2,nnrtot,my_nbsp, my_nxyz, nbsp)
!
USE kinds, ONLY : DP
USE fft_interfaces, ONLY : invfft
USE fft_base, ONLY : dffts, dfftp
USE fft_base, ONLY : dffts, dfftp, dtgs
USE gvecw, ONLY : ngw
USE mp_global, ONLY : nproc_image, me_image,intra_image_comm
USE cell_base, ONLY : omega
......@@ -105,7 +105,7 @@ SUBROUTINE exx_psi(c, psitot2,nnrtot,my_nbsp, my_nxyz, nbsp)
!write(stdout,'("dffts%nnr*nogrp:",I10)'), dffts%nnr*nogrp
!write(stdout,'("nogrp*nr3 should be smaller or equal to nproc_image:")')
!
nogrp = dffts%nogrp
nogrp = dtgs%nogrp
!
ALLOCATE( sdispls(nproc_image), sendcount(nproc_image) ); sdispls=0; sendcount=0
ALLOCATE( rdispls(nproc_image), recvcount(nproc_image) ); rdispls=0; recvcount=0
......@@ -168,14 +168,14 @@ SUBROUTINE exx_psi(c, psitot2,nnrtot,my_nbsp, my_nxyz, nbsp)
!
END DO
!
CALL invfft( 'Wave', psis, dffts )
CALL invfft( 'Wave', psis, dffts, dtgs )
!
#ifdef __MPI
!
CALL mp_barrier( dffts%ogrp_comm )
CALL mp_barrier( dtgs%ogrp_comm )
CALL MPI_ALLTOALLV(psis, sendcount1, sdispls1, MPI_DOUBLE_COMPLEX, &
& psis1, recvcount1, rdispls1, MPI_DOUBLE_COMPLEX, &
& dffts%ogrp_comm, ierr)
& dtgs%ogrp_comm, ierr)
#endif
!
ngpww1 = 0
......
......@@ -32,7 +32,7 @@
USE cell_base, ONLY: tpiba2
USE ensemble_dft, ONLY: tens
USE funct, ONLY: dft_is_meta, dft_is_hybrid, exx_is_active
USE fft_base, ONLY: dffts
USE fft_base, ONLY: dffts, dtgs
USE fft_interfaces, ONLY: fwfft, invfft
USE fft_parallel, ONLY: pack_group_sticks, unpack_group_sticks
USE fft_parallel, ONLY: fw_tg_cft3_z, bw_tg_cft3_z, fw_tg_cft3_xy, bw_tg_cft3_xy
......@@ -82,9 +82,9 @@
END IF
!=======================================================================
nogrp_ = dffts%nogrp
ALLOCATE( psi( dffts%tg_nnr * dffts%nogrp ) )
ALLOCATE( aux( dffts%tg_nnr * dffts%nogrp ) )
nogrp_ = dtgs%nogrp
ALLOCATE( psi( dtgs%tg_nnr * dtgs%nogrp ) )
ALLOCATE( aux( dtgs%tg_nnr * dtgs%nogrp ) )
!
ci = ( 0.0d0, 1.0d0 )
!
......@@ -97,9 +97,9 @@
DO idx = 1, 2*nogrp_ , 2
!$omp task default(none) &
!$omp firstprivate( idx ) &
!$omp firstprivate( idx, i, n, ngw, ci, nogrp_ ) &
!$omp private( igoff, ig ) &
!$omp shared( i, n, c, dffts, aux, ngw, ci, nogrp_, nlsm, nls )
!$omp shared( c, dffts, dtgs, aux, nlsm, nls )
!
! This loop is executed only ONCE when NOGRP=1.
! Equivalent to the case with no task-groups
......@@ -113,9 +113,9 @@
!
IF ( ( idx + i - 1 ) == n ) c( : , idx + i ) = 0.0d0
igoff = ( idx - 1 )/2 * dffts%tg_nnr
igoff = ( idx - 1 )/2 * dtgs%tg_nnr
aux( igoff + 1 : igoff + dffts%tg_nnr ) = (0.d0, 0.d0)
aux( igoff + 1 : igoff + dtgs%tg_nnr ) = (0.d0, 0.d0)
IF( idx + i - 1 <= n ) THEN
DO ig=1,ngw
......@@ -132,11 +132,11 @@
!$omp end single
!$omp end parallel
CALL pack_group_sticks( aux, psi, dffts )
CALL pack_group_sticks( aux, psi, dtgs )
CALL fw_tg_cft3_z( psi, dffts, aux )
CALL fw_tg_cft3_scatter( psi, dffts, aux )
CALL fw_tg_cft3_xy( psi, dffts )
CALL fw_tg_cft3_z( psi, dffts, aux, dtgs )
CALL fw_tg_cft3_scatter( psi, dffts, aux, dtgs )
CALL fw_tg_cft3_xy( psi, dffts, dtgs )
#else
......@@ -165,7 +165,7 @@
!exx_wf related
IF(dft_is_hybrid().AND.exx_is_active()) THEN
!$omp parallel do private(tmp1,tmp2)
DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 )
DO ir = 1, dffts%nr1x*dffts%nr2x*dtgs%tg_npp( me_bgrp + 1 )
tmp1 = v(ir,iss1) * DBLE( psi(ir) )+exx_potential(ir,i/nogrp_+1)
tmp2 = v(ir,iss2) * AIMAG(psi(ir) )+exx_potential(ir,i/nogrp_+2)
psi(ir) = CMPLX( tmp1, tmp2, kind=DP)
......@@ -173,7 +173,7 @@
!$omp end parallel do
ELSE
!$omp parallel do
DO ir = 1, dffts%nr1x*dffts%nr2x*dffts%tg_npp( me_bgrp + 1 )
DO ir = 1, dffts%nr1x*dffts%nr2x*dtgs%tg_npp( me_bgrp + 1 )
psi(ir) = CMPLX ( v(ir,iss1) * DBLE( psi(ir) ), &
v(ir,iss2) *AIMAG( psi(ir) ) ,kind=DP)
END DO
......@@ -265,11 +265,11 @@
END IF
!
#ifdef __MPI
CALL bw_tg_cft3_xy( psi, dffts )
CALL bw_tg_cft3_scatter( psi, dffts, aux )
CALL bw_tg_cft3_z( psi, dffts, aux )
CALL bw_tg_cft3_xy( psi, dffts, dtgs )
CALL bw_tg_cft3_scatter( psi, dffts, aux, dtgs )
CALL bw_tg_cft3_z( psi, dffts, aux, dtgs )
CALL unpack_group_sticks( psi, aux, dffts )
CALL unpack_group_sticks( psi, aux, dtgs )
#else
CALL fwfft( 'Wave', psi, dffts )
aux = psi
......@@ -292,8 +292,8 @@
!$omp task default(none) &
!$omp private( fi, fip, fp, fm, ig ) &
!$omp firstprivate( eig_offset, igno, idx ) &
!$omp shared( nogrp_ , f, ngw, aux, df, da, c, tpiba2, tens, dffts, me_bgrp, i, n, g2kin, nls, nlsm )
!$omp firstprivate( eig_offset, igno, idx, nogrp_, ngw, tpiba2, me_bgrp, i, n, tens ) &
!$omp shared( f, aux, df, da, c, dffts, g2kin, nls, nlsm )
IF( idx + i - 1 <= n ) THEN
if (tens) then
......@@ -355,9 +355,9 @@
DO idx = 1, 2*nogrp_ , 2
!$omp task default(none) &
!$omp firstprivate(igrp,idx) &
!$omp firstprivate(igrp,idx, nogrp_, ngw, i, n, nsp, na, nh, ish, iss1, iss2, tens ) &
!$omp private(iv,jv,ivoff,jvoff,dd,dv,inl,jnl,is,isa,ism,fi,fip) &
!$omp shared( nogrp_ , f, ngw, deeq, bec, af, aa, i, n, nsp, na, nh, dvan, tens, ish, iss1, iss2 )
!$omp shared( f, deeq, bec, af, aa, dvan )
IF( idx + i - 1 <= n ) THEN
......
......@@ -34,7 +34,7 @@
use gvecs, only: gcutms, gvecs_init
use gvecw, only: gkcut, gvecw_init, g2kin_init
USE smallbox_subs, ONLY: ggenb
USE fft_base, ONLY: dfftp, dffts, dfftb, dfft3d
USE fft_base, ONLY: dfftp, dffts, dfftb, dfft3d, dtgs
USE fft_smallbox, ONLY: cft_b_omp_init
USE stick_set, ONLY: pstickset
USE control_flags, ONLY: gamma_only, smallmem
......@@ -138,7 +138,7 @@
CALL pstickset( gamma_only, bg, gcutm, gkcut, gcutms, &
dfftp, dffts, ngw_ , ngm_ , ngs_ , me_bgrp, root_bgrp, &
nproc_bgrp, intra_bgrp_comm, ntask_groups, ionode, stdout, dfft3d )
nproc_bgrp, intra_bgrp_comm, ntask_groups, ionode, stdout, dtgs, dfft3d )
!
!
! ... Initialize reciprocal space local and global dimensions
......
......@@ -29,7 +29,7 @@
USE mp_global, ONLY : me_bgrp, &
my_bgrp_id, nbgrp, inter_bgrp_comm
USE mp, ONLY : mp_sum
USE fft_base, ONLY : dffts
USE fft_base, ONLY : dffts, dtgs
USE fft_parallel, ONLY : tg_gather
use wave_base, only : wave_steepest, wave_verlet
use control_flags, only : lwf, tsde
......@@ -82,8 +82,8 @@
END IF
IF( dffts%have_task_groups ) THEN
tg_rhos_siz = dffts%nogrp * dffts%tg_nnr
c2_siz = dffts%nogrp * ngwx
tg_rhos_siz = dtgs%nogrp * dtgs%tg_nnr
c2_siz = dtgs%nogrp * ngwx
ELSE
tg_rhos_siz = 1
c2_siz = ngw
......@@ -132,10 +132,10 @@
! processors of an orbital TASK-GROUP
!
DO i = 1, nspin
CALL tg_gather( dffts, rhos(:,i), tg_rhos(:,i) )
CALL tg_gather( dffts, dtgs, rhos(:,i), tg_rhos(:,i) )
END DO
incr = 2 * dffts%nogrp
incr = 2 * dtgs%nogrp
ELSE
......
......@@ -6,6 +6,7 @@ include ../make.inc
#MODFLAGS= $(MOD_FLAG)../iotk/src $(MOD_FLAG).
FFTX = \
task_groups.o \
scatter_mod.o \
fft_scalar.o \
fft_parallel.o \
......@@ -16,8 +17,7 @@ fft_smallbox.o \
fft_support.o \
fft_error.o \
fft_stick.o \
fft_types.o \
task_groups.o
fft_types.o
all : libqefft.a
......
......@@ -21,12 +21,14 @@ MODULE fft_interfaces
!! and to the "box-grid" version **invfft_b**, used only in CP
!! (the latter has an additional argument)
SUBROUTINE invfft_x( grid_type, f, dfft )
SUBROUTINE invfft_x( grid_type, f, dfft, dtgs )
USE fft_types, ONLY: fft_dlay_descriptor
USE task_groups, ONLY: task_groups_descriptor
IMPLICIT NONE
INTEGER, PARAMETER :: DP = selected_real_kind(14,200)
CHARACTER(LEN=*), INTENT(IN) :: grid_type
TYPE(fft_dlay_descriptor), INTENT(IN) :: dfft
TYPE(task_groups_descriptor), OPTIONAL, INTENT(IN) :: dtgs
COMPLEX(DP) :: f(:)
END SUBROUTINE invfft_x
!
......@@ -42,12 +44,14 @@ MODULE fft_interfaces
END INTERFACE
INTERFACE fwfft
SUBROUTINE fwfft_x( grid_type, f, dfft )
SUBROUTINE fwfft_x( grid_type, f, dfft, dtgs )
USE fft_types, ONLY: fft_dlay_descriptor
USE task_groups, ONLY: task_groups_descriptor
IMPLICIT NONE
INTEGER, PARAMETER :: DP = selected_real_kind(14,200)
CHARACTER(LEN=*), INTENT(IN) :: grid_type
TYPE(fft_dlay_descriptor), INTENT(IN) :: dfft
TYPE(task_groups_descriptor), OPTIONAL, INTENT(IN) :: dtgs
COMPLEX(DP) :: f(:)
END SUBROUTINE fwfft_x
END INTERFACE
......@@ -56,7 +60,7 @@ END MODULE fft_interfaces
!=---------------------------------------------------------------------------=!
!
!=---------------------------------------------------------------------------=!
SUBROUTINE invfft_x( grid_type, f, dfft )
SUBROUTINE invfft_x( grid_type, f, dfft, dtgs )
!! Compute G-space to R-space for a specific grid type
!!
!! **grid_type = 'Dense'** :
......@@ -88,6 +92,7 @@ SUBROUTINE invfft_x( grid_type, f, dfft )
USE fft_smallbox, ONLY: cft_b, cft_b_omp
USE fft_parallel, ONLY: tg_cft3s
USE fft_types, ONLY: fft_dlay_descriptor
USE task_groups, ONLY: task_groups_descriptor
IMPLICIT NONE
......@@ -96,6 +101,7 @@ SUBROUTINE invfft_x( grid_type, f, dfft )
TYPE(fft_dlay_descriptor), INTENT(IN) :: dfft
CHARACTER(LEN=*), INTENT(IN) :: grid_type
COMPLEX(DP) :: f(:)
TYPE(task_groups_descriptor), OPTIONAL, INTENT(IN) :: dtgs
!
IF( grid_type == 'Dense' ) THEN
CALL start_clock( 'fft' )
......@@ -119,7 +125,14 @@ SUBROUTINE invfft_x( grid_type, f, dfft )
grid_type == 'Custom' ) THEN
CALL tg_cft3s( f, dfft, 1 )
ELSE IF( grid_type == 'Wave' .OR. grid_type == 'CustomWave' ) THEN
CALL tg_cft3s( f, dfft, 2, dfft%have_task_groups )
IF( PRESENT( dtgs ) ) THEN
CALL tg_cft3s( f, dfft, 2, dtgs, dfft%have_task_groups )
ELSE
IF( dfft%have_task_groups ) THEN
CALL fftx_error__( ' invfft ', ' have_task_groups is true but dtgs is not present ', 1 )
END IF
CALL tg_cft3s( f, dfft, 2 )
END IF
END IF
#else
......@@ -162,7 +175,7 @@ END SUBROUTINE invfft_x
!=---------------------------------------------------------------------------=!
!
!=---------------------------------------------------------------------------=!
SUBROUTINE fwfft_x( grid_type, f, dfft )
SUBROUTINE fwfft_x( grid_type, f, dfft, dtgs )
!! Compute R-space to G-space for a specific grid type
!!
!! **grid_type = 'Dense'**
......@@ -193,6 +206,7 @@ SUBROUTINE fwfft_x( grid_type, f, dfft )
USE fft_scalar, ONLY: cfft3d, cfft3ds
USE fft_parallel, ONLY: tg_cft3s
USE fft_types, ONLY: fft_dlay_descriptor
USE task_groups, ONLY: task_groups_descriptor
IMPLICIT NONE
......@@ -201,6 +215,7 @@ SUBROUTINE fwfft_x( grid_type, f, dfft )
TYPE(fft_dlay_descriptor), INTENT(IN) :: dfft
CHARACTER(LEN=*), INTENT(IN) :: grid_type
COMPLEX(DP) :: f(:)
TYPE(task_groups_descriptor), OPTIONAL, INTENT(IN) :: dtgs
IF( grid_type == 'Dense' ) THEN
CALL start_clock( 'fft' )
......@@ -222,7 +237,14 @@ SUBROUTINE fwfft_x( grid_type, f, dfft )
grid_type == 'Custom' ) THEN
CALL tg_cft3s(f,dfft,-1)
ELSE IF( grid_type == 'Wave' .OR. grid_type == 'CustomWave' ) THEN
CALL tg_cft3s(f,dfft,-2, dfft%have_task_groups )
IF( PRESENT( dtgs ) ) THEN
CALL tg_cft3s(f,dfft,-2, dtgs, dfft%have_task_groups )
ELSE
IF( dfft%have_task_groups ) THEN
CALL fftx_error__( ' fwfft ', ' have_task_groups is true but dtgs is not present ', 1 )
END IF
CALL tg_cft3s(f,dfft,-2 )
END IF
END IF
#else
......
This diff is collapsed.
......@@ -73,6 +73,7 @@ MODULE fft_types
!
LOGICAL :: have_task_groups
!
#ifdef PIPPONE
INTEGER :: me_pgrp = 0 ! task id for plane wave task group
INTEGER :: nogrp = 1 ! number of proc. in an orbital "task group"
INTEGER :: npgrp = 1 ! number of proc. in a plane-wave "task group"
......@@ -91,6 +92,7 @@ MODULE fft_types
INTEGER, POINTER :: tg_rdsp(:)! receive displacement for all to all
INTEGER :: tg_nppx = 0 ! max of tg_npp
INTEGER :: tg_ncpx = 0 ! max of tg_ncpx
#endif
!
END TYPE
......@@ -182,28 +184,6 @@ CONTAINS
desc%nproc = nproc
desc%root = root
desc%have_task_groups = ( nogrp > 1 )
desc%me_pgrp = 0
!
IF( MOD( nproc, MAX( 1, nogrp ) ) /= 0 ) &
CALL fftx_error__( " fft_dlay_allocate ", "the number of task groups should be a divisor of the number of MPI task ", 1 )
IF( nogrp > nproc ) &
CALL fftx_error__( " fft_dlay_allocate ", "the number of task groups should be less than the number of MPI task ", 1 )
desc%nogrp = MAX( 1, nogrp )
desc%npgrp = nproc / MAX( 1, nogrp )
desc%ogrp_comm = 0
desc%pgrp_comm = 0
ALLOCATE( desc%nolist( desc%nogrp ) )
ALLOCATE( desc%nplist( desc%npgrp ) )
desc%nolist = 0
desc%nplist = 0
NULLIFY( desc%tg_nsw )
NULLIFY( desc%tg_npp )
NULLIFY( desc%tg_snd )
NULLIFY( desc%tg_rcv )
NULLIFY( desc%tg_psdsp )
NULLIFY( desc%tg_usdsp )
NULLIFY( desc%tg_rdsp )
desc%arrays_have_been_allocated = .TRUE.
......@@ -222,22 +202,9 @@ CONTAINS
IF ( associated( desc%ismap ) ) DEALLOCATE( desc%ismap )
IF ( associated( desc%iplp ) ) DEALLOCATE( desc%iplp )
IF ( associated( desc%iplw ) ) DEALLOCATE( desc%iplw )
IF ( associated( desc%nolist ) ) DEALLOCATE( desc%nolist )
IF ( associated( desc%nplist ) ) DEALLOCATE( desc%nplist )
desc%id = 0
IF( desc%have_task_groups ) THEN
IF ( associated( desc%tg_nsw ) ) DEALLOCATE( desc%tg_nsw )
IF ( associated( desc%tg_npp ) ) DEALLOCATE( desc%tg_npp )
IF ( associated( desc%tg_snd ) ) DEALLOCATE( desc%tg_snd )
IF ( associated( desc%tg_rcv ) ) DEALLOCATE( desc%tg_rcv )
IF ( associated( desc%tg_psdsp ) ) DEALLOCATE( desc%tg_psdsp )
IF ( associated( desc%tg_usdsp ) ) DEALLOCATE( desc%tg_usdsp )
IF ( associated( desc%tg_rdsp ) ) DEALLOCATE( desc%tg_rdsp )
ENDIF
desc%have_task_groups = .FALSE.
desc%arrays_have_been_allocated = .FALSE.