Commit 47a48484 authored by Nick R. Papior's avatar Nick R. Papior
Browse files

Made transiesta analyzation start sooner

- Moving the analyzation step more up uses less
  memory and allows extremely large systems to be
  analyzed on small laptops (execution time may be
  long).

- Introducing a pivoting table for looking up the
  min/max columns improved rgn2tri algorithm by orders
  of magnitude.

- Moved a couple of the integers to long for large
  scale calculations.

- Removed METIS from rgn2tri analyzis
parent abad420f
......@@ -97,6 +97,8 @@ contains
integer :: MPIerror
#endif
call timer('TS-rgn2tri',1)
lpar = .true.
if ( present(par) ) lpar = par
if ( Nodes == 1 ) lpar = .false.
......@@ -118,13 +120,9 @@ contains
! create array containing max-min for each ts-orbital
call re_alloc(mm_col, 1, 2, 1, no, &
routine='tsR2TM', name='mm_col')
!$OMP parallel do default(shared), private(i)
do i = 1 , no
mm_col(:,i) = minmax_col(sp,r,r%r(i))
! print '(a,tr1,i5,tr3,2(tr1,i5),'' d'',tr1,i4,tr4,i0)', &
! 'Orb: ',i,mm_col(:,i),mm_col(2,i)-mm_col(1,i),no
end do
!$OMP end parallel do
! Set the min/max column indices in the pivoted matrix
call set_minmax_col(sp, r, mm_col)
parts = 2
n_part(1) = no / 2
......@@ -286,6 +284,8 @@ contains
call de_alloc(mm_col,routine='tsR2TM',name='mm_col')
call timer('TS-rgn2tri',2)
if ( .not. IONode ) return
fname = fdf_get('TS.BTD.Output',' ')
......@@ -530,14 +530,14 @@ contains
! We will check in between the above selected rows and find the
! difference in size...
n_part(part) = 0
mcol = 0
do i = sRow, eRow
! this is the # of elements from the RHS of the 'part-1'
! part of the tridiagonal matrix and out to the last element of
! this row...
mcol = mm_col(2,i) - eRow
n_part(part) = max(n_part(part),mcol)
if ( mm_col(2,i) > mcol ) mcol = mm_col(2,i)
end do
n_part(part) = max(0, mcol - eRow)
! In case there is actually no connection, we should
! force the next-part to be 1!
......@@ -749,30 +749,60 @@ contains
! Min and max column requires that the sparsity pattern
! supplied has already stripped off the buffer orbitals.
! Otherwise this will fail
function minmax_col(sp,r,row)
subroutine set_minmax_col(sp, r, mm_col)
use class_Sparsity
use geom_helper, only : UCORB
! The sparsity pattern
type(Sparsity), intent(inout) :: sp
type(tRgn), intent(in) :: r
! the row which we will check for (in TranSIESTA counting)
integer, intent(in) :: row
! The result
integer :: minmax_col(2), ptr, nr, j
integer, intent(out) :: mm_col(2,r%n)
! The results
type(tRgn) :: pvt
integer :: ir, row, ptr, nr, j
integer, pointer :: l_col(:), l_ptr(:), ncol(:)
call attach(sp,n_col=ncol,list_ptr=l_ptr,list_col=l_col,nrows_g=nr)
! Using a pivoting table reduces overhead
! of performing rgn_pivot on a non-sorted
! region! SUBSTANTIALLY!
call rgn_init(pvt, nr)
!$OMP parallel default(shared)
!$OMP do private(ir)
do ir = 1 , nr
pvt%r(ir) = rgn_pivot(r, ir)
end do
!$OMP end do
minmax_col(:) = rgn_pivot(r,row)
do ptr = l_ptr(row) + 1 , l_ptr(row) + ncol(row)
j = rgn_pivot(r,ucorb(l_col(ptr),nr))
if ( j > 0 ) then
if ( j < minmax_col(1) ) minmax_col(1) = j
if ( j > minmax_col(2) ) minmax_col(2) = j
end if
!$OMP do private(ir,row,ptr,j)
do ir = 1 , r%n
! Get original sparse matrix row
row = r%r(ir)
! initialize to region row
mm_col(1,ir) = ir
mm_col(2,ir) = ir
! Loop on the sparse entries
do ptr = l_ptr(row) + 1 , l_ptr(row) + ncol(row)
j = pvt%r( ucorb(l_col(ptr),nr) )
if ( j > 0 ) then
if ( j < mm_col(1,ir) ) mm_col(1,ir) = j
if ( j > mm_col(2,ir) ) mm_col(2,ir) = j
end if
end do
end do
!$OMP end do nowait
!$OMP end parallel
call rgn_delete(pvt)
end function minmax_col
end subroutine set_minmax_col
function valid_tri(no,r,mm_col,parts,n_part,last_eq) result(val)
integer, intent(in) :: no, mm_col(2,no)
......
......@@ -662,29 +662,6 @@ contains
call tri(r_El)
end if
#ifdef SIESTA__METIS
fmethod = trim(corb)//'+metis'
if ( IONode ) write(*,fmt) trim(corb),'metis'
call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS, sub = full)
if ( orb_atom == 1 ) then
call tri(r_tmp)
else
call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
call tri(r_El)
end if
fmethod = trim(corb)//'+metis+priority'
if ( IONode ) write(*,fmt) trim(corb),'metis+priority'
call sp_pvt(n,tmpSp2,r_tmp, PVT_METIS, sub = full, &
priority = priority%r)
if ( orb_atom == 1 ) then
call tri(r_tmp)
else
call rgn_atom2orb(r_tmp,na_u,lasto,r_El)
call tri(r_El)
end if
#endif
end do orb_atom_switch
call rgn_delete(r_tmp,r_Els,r_El,full,priority)
......@@ -704,8 +681,9 @@ contains
use fdf, only : fdf_overwrite
type(tRgn), intent(inout) :: r_pvt
integer :: bw, els, pad, work, i
integer(i8b) :: prof
integer :: bw, i
! Possibly very large numbers
integer(i8b) :: prof, els, pad, work
type(tRgn) :: ctri
character(len=132) :: fname
......@@ -751,7 +729,9 @@ contains
if ( ts_A_method == TS_BTD_A_COLUMN ) then
! Get the padding for the array to hold the entire column
call GFGGF_needed_worksize(ctri%n, ctri%r, &
N_Elec, Elecs, pad, work)
N_Elec, Elecs, i, bw)
pad = i
work = bw
else
pad = 0
work = 0
......
......@@ -387,7 +387,8 @@
end if
call delete(g_Sp)
end if
! Copy over xijo array (we can first do it here... :( )
call newdData2D(tmp_2D,xijo,'xijo')
deallocate(xijo)
......@@ -397,6 +398,43 @@
call delete(tmp_2D) ! decrement container...
xijo => val(xij_2D)
! Calculate the super-cell offsets...
if ( Gamma ) then
! Here we create the super-cell offsets
call re_alloc(isc_off,1,3,1,1)
isc_off(:,:) = 0
else
call xij_offset(ucell,nsc, na_u,xa,lasto,
& xij_2D, isc_off,
& Bcast=.true.)
end if
#ifdef TRANSIESTA
! When the user requests to only do an analyzation, we can call
! appropriate routines and quit
if ( TS_Analyze ) then
! Force the creation of the full sparsity pattern
call ts_sparse_init(slabel,IsVolt, N_Elec, Elecs,
& ucell, nsc, na_u, xa, lasto, block_dist, sparse_pattern,
& Gamma, isc_off)
! create the tri-diagonal matrix
call ts_tri_analyze( block_dist, sparse_pattern , N_Elec,
& Elecs, ucell, na_u, lasto, nsc, isc_off,
& BTD_method )
! Print-out timers
call timer('TS-rgn2tri',3)
! Bye also waits for all processors
call bye('transiesta analyzation performed')
end if
#endif
write(oname,"(a,i0)") "EDM at geom step ", istep
call newdSpData2D(sparse_pattern,e_spin_dim,block_dist,EDM_2D,
& name=oname)
......@@ -461,15 +499,6 @@
call overlap( na_u, na_s, no_s, scell, xa, indxua, rmaxo, maxnh,
& lasto, iphorb, isa, numh, listhptr, listh, S )
if ( Gamma ) then
! Here we create the super-cell offsets
call re_alloc(isc_off,1,3,1,1)
isc_off(:,:) = 0
else
call xij_offset(ucell,nsc, na_u,xa,lasto,
& xij_2D, isc_off,
& Bcast=.true.)
end if
!
! Here we could also read a Hamiltonian, either to proceed to
......@@ -520,25 +549,6 @@
endif
! When the user requests to only do an analyzation, we can call
! appropriate routines and quit
if ( TS_Analyze ) then
! Force the creation of the full sparsity pattern
call ts_sparse_init(slabel,IsVolt, N_Elec, Elecs,
& ucell, nsc, na_u, xa, lasto, block_dist, sparse_pattern,
& Gamma, isc_off)
! create the tri-diagonal matrix
call ts_tri_analyze( block_dist, sparse_pattern , N_Elec,
& Elecs, ucell, na_u, lasto, nsc, isc_off,
& BTD_method )
! Bye also waits for all processors
call bye('transiesta analyzation performed')
end if
! In case the user is requesting a Fermi-correction
! we need to delete the TS_FERMI file after each iteration
if ( TSmode .and. TS_RHOCORR_METHOD == TS_RHOCORR_FERMI
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment