Commit 7dbff750 authored by sponce's avatar sponce

New level of memory optimization: etf_mem == 2.

And addition of a test for this. 



git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@13612 c92efa57-630b-4861-b058-cf58834340f0
parent 8d60c255
......@@ -54,8 +54,11 @@ elphon_shuffle_wrap.o \
ephbloch2wane.o \
ephbloch2wanp.o \
ephwan2bloch.o \
ephwan2bloch_mem.o \
ephwan2blochp.o \
ephwan2blochp_mem.o \
ephwann_shuffle.o \
ephwann_shuffle_mem.o \
epwcom.o \
epw_init.o \
epw_readin.o \
......@@ -78,6 +81,7 @@ loadqmesh.o \
loadumat.o \
nesting_fn.o \
openfilepw.o \
rgd_blk_epw_fine_mem.o \
pade.o \
plot_band.o \
poolgather.o \
......
......@@ -49,7 +49,7 @@
USE qpoint, ONLY : xq
USE modes, ONLY : nmodes
USE lr_symm_base, ONLY : minus_q, rtau, gi, gimq, irotmq, nsymq, invsymq
USE epwcom, ONLY : epbread, epbwrite, epwread, lifc, &
USE epwcom, ONLY : epbread, epbwrite, epwread, lifc, etf_mem, &
nbndsub, iswitch, kmaps, eig_read, dvscf_dir, lpolar
USE elph2, ONLY : epmatq, dynq, sumr, et_all, xk_all, et_mb, et_ks, &
zstar, epsi, cu, cuq, lwin, lwinq, bmat, igk_k_all, &
......@@ -766,7 +766,8 @@
!
! the electron-phonon wannier interpolation
!
CALL ephwann_shuffle ( nqc, xqc )
IF(etf_mem == 0 .OR. etf_mem == 1 ) CALL ephwann_shuffle ( nqc, xqc )
IF(etf_mem == 2 ) CALL ephwann_shuffle_mem ( nqc, xqc )
!
5 format (8x,"q(",i5," ) = (",3f12.7," )")
!
......
!
! Copyright (C) 2010-2016 Samuel Ponce', Roxana Margine, Carla Verdi, Feliciano Giustino
! Copyright (C) 2007-2009 Jesse Noffsinger, Brad Malone, Feliciano Giustino
!
! This file is distributed under the terms of the GNU General Public
! License. See the file `LICENSE' in the root directory of the
! present distribution, or http://www.gnu.org/copyleft.gpl.txt .
!
!
!---------------------------------------------------------------------------
SUBROUTINE ephwan2bloch_mem ( imode, nbnd, nrr, irvec, ndegen, epmatw, &
xk, cufkk, cufkq, epmatf, nmodes)
!---------------------------------------------------------------------------
!!
!! Interpolation from Wannier to the fine Bloch grid of the electron-phonon
!! matrix elements
!!
USE kinds, ONLY : DP
USE constants_epw, ONLY : twopi, ci, czero, cone
implicit none
!
INTEGER, INTENT (in) :: nbnd
!! number of bands (possibly in the optimal subspace)
INTEGER, INTENT (in) :: nrr
!! Number of Wigner-Size points
INTEGER, INTENT (in) :: irvec ( 3, nrr)
!! Coordinates of WS points
INTEGER, INTENT (in) :: ndegen (nrr)
!! Degeneracy of WS points
INTEGER, INTENT (in) :: nmodes
!! number of phonon modes
!
REAL(kind=DP), INTENT (in) :: xk(3)
!! kpoint for the interpolation (WARNING: this must be in crystal coord!)
!
COMPLEX(kind=DP), INTENT (in) :: epmatw ( nbnd, nbnd, nrr)
!! e-p matrix in Wannier representation
COMPLEX(kind=DP), INTENT (in) :: cufkk (nbnd, nbnd)
!! rotation matrix U(k)
COMPLEX(kind=DP), INTENT (in) :: cufkq (nbnd, nbnd)
!! rotation matrix U(k+q)
COMPLEX(kind=DP), INTENT (out) :: epmatf (nbnd, nbnd)
!! e-p matrix in Bloch representation, fine grid
!
! work variables
integer :: ir, imode
real(kind=DP) :: rdotk
complex(kind=DP) :: cfac, eptmp( nbnd, nbnd)
!
!----------------------------------------------------------
! STEP 3: inverse Fourier transform of g to fine k mesh
!----------------------------------------------------------
!
! g~ (k') = sum_R 1/ndegen(R) e^{-ik'R} g (R)
!
! g~(k') is epmatf (nbnd, nbnd, ik )
! every pool works with its own subset of k points on the fine grid
!
epmatf = czero
!
DO ir = 1, nrr
!
! note xk is assumed to be already in cryst coord
!
rdotk = twopi * dot_product ( xk, dble(irvec(:, ir)) )
cfac = exp( ci*rdotk ) / dble( ndegen(ir) )
!
epmatf (:, :) = epmatf (:, :) + cfac * epmatw ( :, :, ir)
!
ENDDO
!
!----------------------------------------------------------
! STEP 4: un-rotate to Bloch space, fine grid
!----------------------------------------------------------
!
! g (k') = U_q^\dagger (k') g~ (k') U_k (k')
!
! the two zgemm calls perform the following ops:
! epmatf = [ cufkq * epmatf ] * cufkk^\dagger
!
!
CALL zgemm ('n', 'n', nbnd, nbnd, nbnd, cone, cufkq, &
nbnd, epmatf (:,:), nbnd, czero, eptmp, nbnd)
CALL zgemm ('n', 'c', nbnd, nbnd, nbnd, cone, eptmp, &
nbnd, cufkk, nbnd, czero, epmatf(:,:), nbnd)
!
END SUBROUTINE ephwan2bloch_mem
!
! Copyright (C) 2010-2016 Samuel Ponce', Roxana Margine, Carla Verdi, Feliciano Giustino
! Copyright (C) 2007-2009 Jesse Noffsinger, Brad Malone, Feliciano Giustino
!
! This file is distributed under the terms of the GNU General Public
! License. See the file `LICENSE' in the root directory of the
! present distribution, or http://www.gnu.org/copyleft.gpl.txt .
!
!
!---------------------------------------------------------------------------
subroutine ephwan2blochp_mem (imode, nmodes, xxq, irvec, ndegen, nrr_q, cuf, epmatf, nbnd, nrr_k )
!---------------------------------------------------------------------------
!!
!! Even though this is for phonons, I use the same notations
!! adopted for the electronic case (nmodes->nmodes etc)
!!
USE kinds, only : DP
USE epwcom, only : parallel_k, parallel_q, etf_mem
USE elph2, only : epmatwp
USE constants_epw, ONLY : twopi, ci, czero
USE io_files, ONLY : prefix, tmp_dir
USE io_epw, ONLY : iunepmatwp
USE mp_global, ONLY : mp_sum
USE mp_world, ONLY : world_comm
USE parallel_include
implicit none
!
! input variables
!
INTEGER, INTENT (in) :: imode
!! Current mode
INTEGER, INTENT (in) :: nmodes
!! Total number of modes
INTEGER, INTENT (in) :: nrr_q
!! Number of WS points
INTEGER, INTENT (in) :: irvec ( 3, nrr_q)
!! Coordinates of WS points
INTEGER, INTENT (in) :: ndegen (nrr_q)
!! Number of degeneracy of WS points
INTEGER, INTENT (in) :: nbnd
!! Number of bands
INTEGER, INTENT (in) :: nrr_k
!! Number of electronic WS points
REAL(kind=DP) :: xxq(3)
!! Kpoint for the interpolation (WARNING: this must be in crystal coord!)
COMPLEX(kind=DP), INTENT (in) :: cuf (nmodes, nmodes)
!! e-p matrix in Wanner representation
COMPLEX(kind=DP), INTENT (out) :: epmatf (nbnd, nbnd, nrr_k)
!! e-p matrix in Bloch representation, fine grid
!
! Local variables
!
CHARACTER (len=256) :: filint
!! File name
!
INTEGER :: ir
!! Real space WS index
INTEGER :: ir_start
!! Starting ir for this cores
INTEGER :: ir_stop
!! Ending ir for this pool
INTEGER :: iunepmatwp2
!! Return the file unit
INTEGER :: ierr
!! Return if there is an error
INTEGER (kind=MPI_OFFSET_KIND) :: lrepmatw
!! Offset to tell where to start reading the file
INTEGER (kind=MPI_OFFSET_KIND) :: lrepmatw2
!! Offset to tell where to start reading the file
!
REAL(kind=DP) :: rdotk
!! Exponential for the FT
!
COMPLEX(kind=DP) :: cfac(nrr_q)
!! Factor for the FT
COMPLEX(kind=DP), ALLOCATABLE :: epmatw ( :,:,:)
!! El-ph matrix elements
!
CALL start_clock('ephW2Bp')
!----------------------------------------------------------
! STEP 3: inverse Fourier transform of g to fine k mesh
!----------------------------------------------------------
!
! g~ (k') = sum_R 1/ndegen(R) e^{-ik'R} g (R)
!
! g~(k') is epmatf (nmodes, nmodes, ik )
! every pool works with its own subset of k points on the fine grid
!
CALL para_bounds(ir_start, ir_stop, nrr_q)
!
filint = trim(tmp_dir)//trim(prefix)//'.epmatwp1'
CALL MPI_FILE_OPEN(world_comm,filint,MPI_MODE_RDONLY,MPI_INFO_NULL,iunepmatwp2,ierr)
IF( ierr /= 0 ) CALL errore( 'ephwan2blochp_mem', 'error in MPI_FILE_OPEN',1 )
!
cfac(:) = czero
!
DO ir = ir_start, ir_stop
!
! note xxq is assumed to be already in cryst coord
rdotk = twopi * dot_product ( xxq, dble(irvec(:, ir)) )
cfac(ir) = exp( ci*rdotk ) / dble( ndegen(ir) )
ENDDO
!
ALLOCATE(epmatw ( nbnd, nbnd, nrr_k))
!
lrepmatw2 = 2_MPI_OFFSET_KIND * INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nrr_k , kind = MPI_OFFSET_KIND )
!
DO ir = ir_start, ir_stop
!
! SP: The following needs a small explaination: although lrepmatw is correctly defined as kind 8 bits or
! kind=MPI_OFFSET_KIND, the number "2" and "8" are default kind 4. The other as well. Therefore
! if the product is too large, this will crash. The solution (kind help recieved from Ian Bush) is below:
lrepmatw = 2_MPI_OFFSET_KIND * INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nrr_k , kind = MPI_OFFSET_KIND ) * &
INT( nmodes, kind = MPI_OFFSET_KIND ) * &
8_MPI_OFFSET_KIND * ( INT( ir , kind = MPI_OFFSET_KIND ) - 1_MPI_OFFSET_KIND ) + &
2_MPI_OFFSET_KIND * INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nrr_k , kind = MPI_OFFSET_KIND ) * &
8_MPI_OFFSET_KIND * ( INT( imode , kind = MPI_OFFSET_KIND ) - 1_MPI_OFFSET_KIND )
!
! SP: mpi seek is used to set the position at which we should start
! reading the file. It is given in bits.
! Note : The process can be collective (=blocking) if using MPI_FILE_SET_VIEW & MPI_FILE_READ_ALL
! or noncollective (=non blocking) if using MPI_FILE_SEEK & MPI_FILE_READ.
! Here we want non blocking because not all the process have the same nb of ir.
!
CALL MPI_FILE_SEEK(iunepmatwp2,lrepmatw,MPI_SEEK_SET,ierr)
IF( ierr /= 0 ) CALL errore( 'ephwan2blochp', 'error in MPI_FILE_SEEK',1 )
CALL MPI_FILE_READ(iunepmatwp2, epmatw, lrepmatw2, MPI_DOUBLE_PRECISION, MPI_STATUS_IGNORE,ierr)
IF( ierr /= 0 ) CALL errore( 'ephwan2blochp', 'error in MPI_FILE_READ_ALL',1 )
!
!
CALL ZAXPY(nbnd * nbnd * nrr_k, cfac(ir), epmatw, 1, epmatf, 1)
!
ENDDO
DEALLOCATE(epmatw)
!
CALL mp_sum(epmatf, world_comm)
!
CALL MPI_FILE_CLOSE(iunepmatwp2,ierr)
IF( ierr /= 0 ) CALL errore( 'ephwan2blochp_mem', 'error in MPI_FILE_CLOSE',1 )
!
CALL stop_clock('ephW2Bp')
!
end subroutine ephwan2blochp_mem
......@@ -1177,6 +1177,15 @@
!
ENDIF ! end parallel_q
!
! Check Memory usage
CALL system_mem_usage(valueRSS)
!
WRITE(stdout, '(a)' ) ' ==================================================================='
WRITE(stdout, '(a,i10,a)' ) ' Memory usage: VmHWM =',valueRSS(2)/1024,'Mb'
WRITE(stdout, '(a,i10,a)' ) ' VmPeak =',valueRSS(1)/1024,'Mb'
WRITE(stdout, '(a)' ) ' ==================================================================='
WRITE(stdout, '(a)' )
!
! ---------------------------------------------------------------------------------------
! ---------------------------------------------------------------------------------------
!
......
This diff is collapsed.
......@@ -532,6 +532,11 @@
&'Error: longrange and shortrange cannot be both true.',1)
IF ( epwread .AND. .not. kmaps .AND. .not. epbread) CALL errore('epw_init',&
&'Error: kmaps has to be true for a restart run. ',1)
IF ( etf_mem == 2 .AND. parallel_q) CALL errore('epw_init',&
&'Error: Memory optimized version and q-parallelization not implemented. ',1)
#ifndef __MPI
IF ( etf_mem == 2 ) CALL errore('epw_init','Error: etf_mem == 2 only works with MPI.',1)
#endif
!
! thickness and smearing width of the Fermi surface
! from eV to Ryd
......
!
! Copyright (C) 2010-2017 Samuel Ponce', Roxana Margine, Carla Verdi, Feliciano Giustino
! Copyright (C) 2001-2008 Quantum-Espresso group
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
!-------------------------------------------------------------------------------
SUBROUTINE rgd_blk_epw_fine_mem(imode,nq1,nq2,nq3,q,uq,epmat,nmodes,epsil,zeu,bmat,signe)
!-------------------------------------------------------------------------------
!!
!! Compute the long range term for the e-ph vertex
!! to be added or subtracted from the vertex
!!
!! The long-range part can be computed using Eq. (4) of PRL 115, 176401 (2015).
!! The sum over G is converged using the Ewald summation technique (see for example
!! F.2, p.500 in Martin Electronic structure book) where the Ewald factor is ((q+G)**2)/alph/4.0_DP.
!!
!! Technical note: From the solution of the Poisson equation, there is an additional factor
!! e^{-i(q+G)\tau_\kappa} with respect to Eq. (4) of PRL 115, 176401 (2015).
!! The full equation can be found in Eq. (S4) of the supplemental materials of PRL 115, 176401 (2015).
!!
!! The final implemented formula is:
!!
!! $$ g_{mn\nu}^{\mathcal L}({\bf k},{\bf q) = i\frac{4\pi e^2}{\Omega} \sum_{\kappa}
!! \left(\frac{\hbar}{2 {M_\kappa \omega_{{\bf q}\nu}}}\right)^{\!\!\frac{1}{2}}
!! \sum_{{\bf G}\ne -{\bf q}} e^{-({\bf q}+{\bf G})^2/4\alpha}
!! \frac{ ({\bf q}+{\bf G})\cdot{\bf Z}^*_\kappa \cdot {\bf e}_{\kappa\nu}({\bf q}) }
!! {({\bf q}+{\bf G})\cdot\bm\epsilon^\infty\!\cdot({\bf q}+{\bf G})}\,
!! \left[ U_{{\bf k}+{\bf q}}\:U_{{\bf k}}^{\dagger} \right]_{mn} $$
!!
!! 10/2016 - SP: Optimization
!!
USE kinds, ONLY : dp
USE cell_base, ONLY : bg, omega, alat
USE ions_base, ONLY : tau, nat
USE constants_epw, ONLY : twopi, fpi, e2, ci, czero, cone, two, ryd2mev
USE epwcom, ONLY : shortrange, nbndsub
!
implicit none
!
INTEGER, INTENT (in) :: nq1
!! Coarse q-point grid
INTEGER, INTENT (in) :: nq2
!! Coarse q-point grid
INTEGER, INTENT (in) :: nq3
!! Coarse q-point grid
INTEGER, INTENT (in) :: nmodes
!! Max number of modes
!
REAL (kind=DP), INTENT (in) :: q(3)
!! q-vector from the full coarse or fine grid.
REAL (kind=DP), INTENT (in) :: epsil(3,3)
!! dielectric constant tensor
REAL (kind=DP), INTENT (in) :: zeu(3,3,nat)
!! effective charges tensor
REAL (kind=DP), INTENT (in) :: signe
!! signe=+/-1.0 ==> add/subtract long range term
!
COMPLEX (kind=DP), INTENT (in) :: uq(nmodes, nmodes)
!! phonon eigenvec associated with q
COMPLEX (kind=DP), INTENT (inout) :: epmat(nbndsub,nbndsub)
!! e-ph matrix elements
COMPLEX (kind=DP), INTENT (in) :: bmat(nbndsub,nbndsub)
!! Overlap matrix elements $$<U_{mk+q}|U_{nk}>$$
!
! work variables
!
REAL(kind=DP) :: qeq, &! <q+G| epsil | q+G>
arg, zaq, g1, g2, g3, gmax, alph, geg
INTEGER :: na, ipol, im, m1,m2,m3, nrx1,nrx2,nrx3, imode
COMPLEX(kind=DP) :: fac, facqd, facq, matsq
COMPLEX(kind=DP) :: epmatl(nbndsub,nbndsub)
!
IF (abs(signe) /= 1.0) &
CALL errore ('rgd_blk',' wrong value for signe ',1)
!
gmax= 14.d0
alph= 1.0d0
geg = gmax*alph*4.0d0
fac = signe*e2*fpi/omega * ci
!
epmatl(:,:) = czero
!
DO m1 = -nq1,nq1
DO m2 = -nq2,nq2
DO m3 = -nq3,nq3
!
g1 = m1*bg(1,1) + m2*bg(1,2) + m3*bg(1,3) + q(1)
g2 = m1*bg(2,1) + m2*bg(2,2) + m3*bg(2,3) + q(2)
g3 = m1*bg(3,1) + m2*bg(3,2) + m3*bg(3,3) + q(3)
!
qeq = (g1*(epsil(1,1)*g1+epsil(1,2)*g2+epsil(1,3)*g3 )+ &
g2*(epsil(2,1)*g1+epsil(2,2)*g2+epsil(2,3)*g3 )+ &
g3*(epsil(3,1)*g1+epsil(3,2)*g2+epsil(3,3)*g3 )) !*twopi/alat
!
IF (qeq > 0.0_DP .and. qeq/alph/4.0_DP < gmax ) THEN
!
qeq=qeq*twopi/alat
facqd = fac*exp(-qeq/alph/4.0d0)/qeq !/(two*wq)
!
DO na = 1,nat
arg = -twopi* ( g1*tau(1,na)+ g2*tau(2,na)+ g3*tau(3,na) )
facq = facqd * CMPLX(cos(arg),sin(arg),kind=DP)
DO ipol=1,3
zaq=g1*zeu(1,ipol,na)+g2*zeu(2,ipol,na)+g3*zeu(3,ipol,na)
!
CALL zaxpy(nbndsub**2,facq * zaq * uq(3*(na-1)+ipol,imode), bmat(:,:),1, epmat(:,:),1)
CALL zaxpy(nbndsub**2,facq * zaq * uq(3*(na-1)+ipol,imode), bmat(:,:),1, epmatl(:,:),1)
!
ENDDO !ipol
ENDDO !nat
ENDIF
!
ENDDO
ENDDO
ENDDO
!
! In case we want only the short-range we do
! g_s = sqrt(g*g - g_l*g_l)
!
! Important notice: It is possible that (g*g - g_l*g_l) < 0, in which
! case the sqrt will give an pure imaginary number. If it is positive we
! will get a pure real number.
! In any case, when g_s will be squared both will become real numbers.
IF (shortrange) THEN
!epmat = ZSQRT(epmat*conjg(epmat) - epmatl*conjg(epmatl))
epmat = SQRT(epmat*conjg(epmat) - epmatl*conjg(epmatl))
ENDIF
!
!
END SUBROUTINE rgd_blk_epw_fine_mem
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
--
&inputepw
prefix = 'sic'
amass(1) = 28.0855
amass(2) = 12.0107
outdir = './'
elph = .true.
kmaps = .false.
epbwrite = .true.
epbread = .false.
epwwrite = .true.
epwread = .false.
etf_mem = 2
lpolar = .true.
nbndsub = 4
nbndskip = 0
wannierize = .true.
num_iter = 300
iprint = 2
dis_win_max = 12
dis_froz_max= 7
proj(1) = 'Si:sp3'
wdata(1) = 'bands_plot = .true.'
wdata(2) = 'begin kpoint_path'
wdata(3) = 'L 0.50 0.00 0.00 G 0.00 0.00 0.00'
wdata(4) = 'G 0.00 0.00 0.00 X 0.50 0.50 0.00'
wdata(5) = 'end kpoint_path'
wdata(6) = 'bands_plot_format = gnuplot'
wdata(7) = 'use_ws_distance = T'
iverbosity = 0
elecselfen = .true.
phonselfen = .false.
a2f = .false.
parallel_k = .true.
parallel_q = .false.
fsthick = 2.0 ! eV
eptemp = 300 ! K
degaussw = 0.1 ! eV
dvscf_dir = './save'
nkf1 = 6
nkf2 = 6
nkf3 = 6
nqf1 = 6
nqf2 = 6
nqf3 = 6
nk1 = 3
nk2 = 3
nk3 = 3
nq1 = 3
nq2 = 3
nq3 = 3
/
4 cartesian
0.000000000000 0.000000000000 0.000000000000 0.0740741
-0.333333333333 0.333333333333 -0.333333333333 0.5925926
0.000000000000 0.666666666667 0.000000000000 0.4444444
0.666666666667 0.000000000000 0.666666666667 0.8888889
......@@ -56,7 +56,7 @@ inputs_args = ('scf.in', '1'), ('ph.in', '2'), ('scf_epw.in', '1'), ('nscf_epw.i
[epw_polar/]
program = EPW
inputs_args = ('scf.in', '1'), ('ph.in', '2'), ('scf_epw.in', '1'), ('nscf_epw.in', '1'), ('epw1.in', '3')
inputs_args = ('scf.in', '1'), ('ph.in', '2'), ('scf_epw.in', '1'), ('nscf_epw.in', '1'), ('epw1.in', '3'), ('epw2.in', '3')
[tddfpt_CH4/]
program = TDDFPT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment