Micael Oliveira · Sebastian Ohlmann
--- a/src/system/eigen_evolution_inc.F90

+ 87

− 54
+++ b/src/system/eigen_evolution_inc.F90

+ 87

− 54
 @@ -20,7 +20,7 @@
 subroutine X(eigensolver_evolution)(mesh, st, hm, te, tol, niter, converged, ik, diff, tau)
  type(mesh_t),             target, intent(in)    :: mesh
  type(states_elec_t),              intent(inout) :: st
-  type(hamiltonian_elec_t), target, intent(in)    :: hm
+  type(hamiltonian_elec_t), target, intent(inout) :: hm
  type(exponential_t),              intent(inout) :: te
  FLOAT,                            intent(in)    :: tol
  integer,                          intent(inout) :: niter
 @@ -29,33 +29,68 @@ subroutine X(eigensolver_evolution)(mesh, st, hm, te, tol, niter, converged, ik,
  FLOAT,                            intent(out)   :: diff(:) !< (1:st%nst)
  FLOAT,                            intent(in)    :: tau

-  integer :: ist, iter, maxiter, conv, matvec, i, j
-  R_TYPE, allocatable :: hpsi(:, :), c(:, :), psi(:, :)
+  integer :: ib, minst, maxst, ist, iter, maxiter, conv, convb, matvec, i
+  R_TYPE, allocatable :: c(:, :), zeig(:), res(:)
  FLOAT, allocatable :: eig(:)
+  type(batch_t) :: hpsib
+#if defined(R_TREAL)
+  type(batch_t) :: zpsib
+  CMPLX, allocatable :: zpsi(:,:)
+  FLOAT, allocatable :: psi(:,:)
+#endif

  PUSH_SUB(X(eigensolver_evolution))

  maxiter = niter
  matvec = 0

-  SAFE_ALLOCATE(psi(1:mesh%np_part, 1:st%d%dim))
-  SAFE_ALLOCATE(hpsi(1:mesh%np_part, 1:st%d%dim))
  SAFE_ALLOCATE(c(1:st%nst, 1:st%nst))
  SAFE_ALLOCATE(eig(1:st%nst))
+  SAFE_ALLOCATE(zeig(1:st%nst))
+  SAFE_ALLOCATE(res(1:st%nst))
+#if defined(R_TREAL)
+  SAFE_ALLOCATE(zpsi(1:mesh%np_part, 1:st%d%dim))
+  SAFE_ALLOCATE(psi(1:mesh%np_part, 1:st%d%dim))
+#endif

  ! Warning: it seems that the algorithm is improved if some extra states are added -- states
  ! whose convergence should not be checked.
-  conv = converged
+  convb = st%group%block_start - 1
+  conv = 0
+  do ib = st%group%block_start, st%group%block_end
+    if (conv + st%group%psib(ib, ik)%nst <= converged) then
+      conv = conv + st%group%psib(ib, ik)%nst
+      convb = convb + 1
+    else
+      exit
+    end if
+  end do

  do iter = 1, maxiter
-
+#if defined(R_TREAL)
+    ! The application of the exponential for the real case is still done one state at a time, as we need
+    ! to do a couple of type conversions. To avoid this, we need either a function to convert the
+    ! type of a batch, or to modify the batch_copy_data routine to allow the copy between batches of
+    ! different types.
    do ist = conv + 1, st%nst
-      call states_elec_get_state(st, mesh, ist, ik, psi)
-      !TODO: convert this opperation to batched versions 
-      call exponentiate(psi, j)
+      call batch_init(zpsib, hm%d%dim, 1)
+      call states_elec_get_state(st, mesh, ist, ik, zpsi)
+      call batch_add_state(zpsib, ist, zpsi)
+
+      call exponential_apply_batch(te, mesh, hm, zpsib, ik, -tau, imag_time = .true.)
+
+      call batch_get_state(zpsib, 1, mesh%np, zpsi)
+      psi(1:mesh%np, 1:st%d%dim) = R_TOTYPE(zpsi(1:mesh%np, 1:st%d%dim))
      call states_elec_set_state(st, mesh, ist, ik, psi)
-      matvec = matvec + j
+      call batch_end(zpsib)
+      matvec = matvec + te%exp_order
+    end do
+#else
+    do ib = convb + 1, st%group%block_end
+      call exponential_apply_batch(te, mesh, hm, st%group%psib(ib, ik), ik, -tau, imag_time = .true.)
+      matvec = matvec + te%exp_order*(states_elec_block_max(st, ib) - states_elec_block_min(st, ib) + 1)
    end do
+#endif

    ! This is the orthonormalization suggested by Aichinger and Krotschek
    ! [Comp. Mat. Science 34, 188 (2005)]
 @@ -70,65 +105,63 @@ subroutine X(eigensolver_evolution)(mesh, st, hm, te, tol, niter, converged, ik,
    call states_elec_rotate(mesh, st, c, ik)
    
    ! Get the eigenvalues and the residues.
-    do ist = conv + 1, st%nst
-      call states_elec_get_state(st, mesh, ist, ik, psi)
-      !TODO: convert these opperations to batched versions 
-      call X(hamiltonian_elec_apply)(hm, mesh, psi, hpsi, ist, ik)
-      st%eigenval(ist, ik) = real(X(mf_dotp)(mesh, st%d%dim, psi, hpsi), REAL_PRECISION)
-      diff(ist) = X(states_elec_residue)(mesh, st%d%dim, hpsi, st%eigenval(ist, ik), psi)
-
-      if(debug%info) then
-        write(message(1), '(a,i4,a,i4,a,i4,a,es12.6)') 'Debug: Evolution Eigensolver - ik', ik, &
-          ' ist ', ist, ' iter ', iter, ' res ', diff(ist)
-        call messages_info(1)
+    do ib = convb + 1, st%group%block_end
+      minst = states_elec_block_min(st, ib)
+      maxst = states_elec_block_max(st, ib)
+
+      if (hamiltonian_elec_apply_packed(hm, mesh)) call batch_pack(st%group%psib(ib, ik))
+
+      call batch_copy(st%group%psib(ib, ik), hpsib)
+
+      call X(hamiltonian_elec_apply_batch)(hm, mesh, st%group%psib(ib, ik), hpsib, ik)
+      call X(mesh_batch_dotp_vector)(mesh, st%group%psib(ib, ik), hpsib, zeig(minst:maxst))
+      call batch_axpy(mesh%np, -zeig(minst:maxst), st%group%psib(ib, ik), hpsib)
+      call X(mesh_batch_dotp_vector)(mesh, st%group%psib(ib, ik), hpsib, res(minst:maxst))
+      call batch_end(hpsib)
+
+      do ist = minst, maxst
+        diff(ist) = sqrt(abs(res(ist)))
+        st%eigenval(ist, ik) = R_REAL(zeig(ist))
+      end do
+
+      if (hamiltonian_elec_apply_packed(hm, mesh)) call batch_unpack(st%group%psib(ib, ik))
+
+      if (debug%info) then
+        do ist = minst, maxst
+          write(message(1), '(a,i4,a,i4,a,i4,a,es12.6)') 'Debug: Evolution Eigensolver - ik', ik, &
+            ' ist ', ist, ' iter ', iter, ' res ', diff(ist)
+          call messages_info(1)
+        end do
      end if
+
    end do

    ! And check for convergence. Note that they must be converged *in order*, so that they can be frozen.
-    do ist = conv + 1, st%nst
-      if( (diff(ist) < tol) .and. (ist == conv + 1) ) conv = conv + 1
+    do ib = convb + 1, st%group%block_end
+      minst = states_elec_block_min(st, ib)
+      maxst = states_elec_block_max(st, ib)
+      if (all(diff(minst:maxst) < tol) .and. ib == convb + 1) then
+        convb = convb + 1
+        conv = conv + maxst - minst + 1
+      end if
    end do
-    if(conv == st%nst) exit
+    if (convb == st%group%block_end) exit
  end do

  converged = conv

  niter = matvec

+#if defined(R_TREAL)
  SAFE_DEALLOCATE_A(psi)
-  SAFE_DEALLOCATE_A(hpsi)
+  SAFE_DEALLOCATE_A(zpsi)
+#endif
  SAFE_DEALLOCATE_A(c)
  SAFE_DEALLOCATE_A(eig)
+  SAFE_DEALLOCATE_A(zeig)
+  SAFE_DEALLOCATE_A(res)

  POP_SUB(X(eigensolver_evolution))
-contains
-
-  ! ---------------------------------------------------------
-  subroutine exponentiate(psi, order)
-    R_TYPE, target, intent(inout) :: psi(:, :)
-    integer,        intent(out)   :: order
-    
-    CMPLX,          pointer       :: zpsi(:, :)
-
-    PUSH_SUB(X(eigensolver_evolution).exponentiate)
-
-#if defined(R_TREAL)
-    SAFE_ALLOCATE(zpsi(1:mesh%np_part, 1:st%d%dim))
-    zpsi(1:mesh%np, 1:st%d%dim) = psi(1:mesh%np, 1:st%d%dim)
-#else
-    zpsi => psi
-#endif
-    call exponential_apply(te, mesh, hm, zpsi, ist, ik, -tau, order = order, imag_time = .true.)
-#if defined(R_TREAL)
-    psi(1:mesh%np, 1:st%d%dim) = R_TOTYPE(zpsi(1:mesh%np, 1:st%d%dim))
-    SAFE_DEALLOCATE_P(zpsi)
-#else
-    nullify(zpsi)
-#endif
-
-    POP_SUB(X(eigensolver_evolution).exponentiate)
-  end subroutine exponentiate
-
 end subroutine X(eigensolver_evolution)