Commit 5c1219f8 authored by Nick R. Papior's avatar Nick R. Papior
Browse files

Removed superfluous print-outs

- The memory print-outs was more than likely related
  to PEXSI for checking that the splitting of the workers
  and PEXSI-nodes

- The entropy print-out is now only performed in PEXSI compilations

- The defaults for the timers are now reverted in case of non-PEXSI
  runs.

- Updated the manual for a couple of entries related to timings.
parent e85f28b1
......@@ -8285,89 +8285,96 @@ Localized Wannier Function radius
\end{description}
\vspace{5pt}
\subsection{Efficiency options}
\begin{description}
\itemsep 10pt
\parsep 0pt
\begin{fdflogicalF}{DirectPhi}
\item[\textbf{DirectPhi}] (\textit{logical}):
\index{DirectPhi@\textbf{DirectPhi}}
The calculation of the matrix elements on the mesh requires the
value of the orbitals on the mesh points. This array represents one of
the largest uses of memory within the code. If set to true this option
allows the code to generate the orbital values when needed rather than
storing the values. This obviously costs more computer time but will
make it possible to run larger jobs where memory is the limiting factor.
The calculation of the matrix elements on the mesh requires the
value of the orbitals on the mesh points. This array represents one
of the largest uses of memory within the code. If set to true this
option allows the code to generate the orbital values when needed
rather than storing the values. This obviously costs more computer
time but will make it possible to run larger jobs where memory is
the limiting factor.
\textit{Use:} Controls whether the values of the orbitals at the mesh points
This controls whether the values of the orbitals at the mesh points
are stored or calculated on the fly.
\end{fdflogicalF}
\textit{Default value:} false
\end{description}
\subsection{Memory, CPU-time, and Wall time accounting options}
\begin{description}
\item[\textbf{AllocReportLevel}] (\textit{integer}):
\index{AllocReportLevel@\textbf{AllocReportLevel}}
Sets the level of the allocation report, printed in file
\texttt{SystemLabel}.alloc. However, not all the allocated arrays are
included in the report (this will be corrected in future versions).
The allowed values are:
\begin{itemize}
\item
level 0 : no report at all (the default)
\item
level 1 : only total memory peak and where it occurred
\item
level 2 : detailed report printed only at
normal program termination
\item
level 3 : detailed report printed at every new memory peak
\item
level 4 : print every individual (re)allocation or deallocation
\end{itemize}
\begin{fdfentry}{AllocReportLevel}[integer]<$0$>
Sets the level of the allocation report, printed in file
\sysfile{alloc}. However, not all the allocated arrays are included
in the report (this will be corrected in future versions). The
allowed values are:
\begin{itemize}
\item%
level 0 : no report at all (the default)
\item%
level 1 : only total memory peak and where it occurred
\item%
level 2 : detailed report printed only at
normal program termination
\item%
level 3 : detailed report printed at every new memory peak
\item%
level 4 : print every individual (re)allocation or deallocation
\end{itemize}
NOTE: In MPI runs, only node-0 peak reports are produced.
\note In MPI runs, only node-0 peak reports are produced.
\end{fdfentry}
\item[\textbf{AllocReportThreshold}] (\textit{real}):
\index{AllocReportThreshold@\textbf{AllocReportThreshold}}
Sets the minimum size (in bytes) of the arrays whose memory use
is individually printed in the detailed allocation reports
(levels 2 and 3). It does not affect the reported memory sums
and peaks, which always include all arrays.
\textit{Default value:} \texttt{0.0}
\begin{fdfentry}{AllocReportThreshold}[real]<$0.$>
Sets the minimum size (in bytes) of the arrays whose memory use is
individually printed in the detailed allocation reports (levels 2 and
3). It does not affect the reported memory sums and peaks, which
always include all arrays.
\end{fdfentry}
\item[\textbf{TimerReportThreshold}] (\textit{real}):
\index{TimerReportThreshold@\textbf{TimerReportThreshold}}
Sets the minimum fraction, of total CPU time, of the subroutines or
code sections whose CPU time is individually printed in the detailed
timer reports. To obtain the accounting of MPI communication times
in parallel executions, you must compile with option \texttt{-DMPI\_TIMING}.
In serial execution, the CPU times are printed at the end of the
output file. In parallel execution, they are reported in a separated
file named \texttt{SystemLabel}.times.
\begin{fdfentry}{TimerReportThreshold}[real]<$0.$>
\textit{Default value:} \texttt{0.0}
Sets the minimum fraction, of total CPU time, of the subroutines or
code sections whose CPU time is individually printed in the detailed
timer reports. To obtain the accounting of MPI communication times
in parallel executions, you must compile with option
\shell{-DMPI\_TIMING}\index{compile!pre-processor!-DMPI\_TIMING}.
In serial execution, the CPU times are printed at the end of the
output file. In parallel execution, they are reported in a separated
file named \sysfile{times}.
\end{fdfentry}
\item[\textbf{UseTreeTimer}] (\textit{logical}):
\index{UseTreeTimer@\textbf{UseTreeTimer}}
\begin{fdflogicalF}{UseTreeTimer}
Enable an experimental timer which is based on wall time on the master
node and is aware of the tree-structure of the timed sections.
Enable an experimental timer which is based on wall time on the
master node and is aware of the tree-structure of the timed
sections.
\textit{Default value:} \texttt{.false.}
\note, if used with the PEXSI solver (see Sec.~\ref{SolverPEXSI})
this defaults to \fdftrue.
\end{fdflogicalF}
\begin{fdflogicalT}{UseParallelTimer}
Determine whether timings are performed in parallel. This may
introduce slight overhead.
\note, if used with the PEXSI solver (see Sec.~\ref{SolverPEXSI})
this defaults to \fdffalse.
\end{fdflogicalT}
\end{description}
\vspace{5pt}
\subsection{The catch-all option UseSaveData}
\index{reading saved data}
......
......@@ -95,7 +95,7 @@
if (iscf > 1) then
call compute_Ebs_shift(Dscf,H,Hprev,delta_Ebs)
delta_Ef = delta_Ebs / qtot
if (ionode) then
if (ionode.and.isolve.eq.SOLVE_PEXSI) then
write(6,"(a,f16.5)")
$ "Estimated change in band-structure energy:",
$ delta_Ebs/eV, "Estimated shift in E_fermi: ",
......@@ -171,7 +171,9 @@ c$$$ & S, 'S.matrix')
& occtol, iscf, neigwanted)
Ecorrec = 0.0_dp
PreviousCallDiagon=.true.
#ifdef SIESTA__PEXSI
if (ionode) write (6,"(/a,f14.6)") 'Entropy/k:', Entropy
#endif
elseif (isolve .eq. SOLVE_ORDERN) then
if (.not. gamma) call die("Cannot do O(N) with k-points.")
if (NonCol .or. SpOrb)
......@@ -275,8 +277,10 @@ c$$$ & S, 'S.matrix')
Hprev = H
call timer( 'compute_dm', 2 )
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after compute_DM")
#endif
!----------------------------------------------------------------------- END
!-----------------------------------------------------------------------END
END subroutine compute_dm
END MODULE m_compute_dm
subroutine memory_all(str,comm)
use m_rusage, only : rss_max
use m_rusage, only : rss_max
#ifdef MPI
use mpi
use mpi
#endif
character(len=*), intent(in) :: str
integer, intent(in) :: comm
character(len=*), intent(in) :: str
integer, intent(in) :: comm
integer :: mpierror
real :: max_mem, min_mem, my_mem
integer :: nprocs, myrank
integer :: mpierror
real :: mem(2), tmem(2)
integer :: nprocs, myrank
#ifdef MPI
call MPI_Comm_Size( Comm, nprocs, MPIerror )
call MPI_Comm_Rank( Comm, myrank, MPIerror )
call MPI_Comm_Size( Comm, nprocs, MPIerror )
call MPI_Comm_Rank( Comm, myrank, MPIerror )
#else
nprocs = 1
myrank = 0
nprocs = 1
myrank = 0
#endif
my_mem = rss_max()
mem(2) = rss_max()
mem(1) = - mem(2)
#ifdef MPI
call MPI_Reduce(my_mem,max_mem,1,MPI_Real,MPI_max,0,comm,MPIerror)
call MPI_Reduce(my_mem,min_mem,1,MPI_Real,MPI_min,0,comm,MPIerror)
#else
max_mem = my_mem
min_mem = my_mem
#ifdef MPI
call MPI_Reduce(mem,tmem,2,MPI_Real,MPI_max,0,comm,MPIerror)
mem = tmem
#endif
if (myrank == 0) then
write(6,"(a,2f12.2)") " &m -- Peak memory (Mb) " // trim(str) // " (max,min): ", max_mem, min_mem
endif
if (myrank == 0) then
write(6,"(a,2f12.2)") " &m -- Peak memory (Mb) " // trim(str) // " (max,min): ", mem(2), -mem(1)
endif
end subroutine memory_all
......@@ -255,7 +255,9 @@
endif
call timer('setup_H',2)
#ifdef SIESTA__PEXSI
if (node==0) call memory_snapshot("after setup_H")
#endif
if ( h_setup_only ) then
call timer( 'all', 2 ) ! New call to close the tree
......
......@@ -55,7 +55,9 @@
call extrae_shutdown
#endif
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after siesta_init")
#endif
C Begin of coordinate relaxation iteration
relaxd = .false.
......@@ -77,16 +79,22 @@ C Begin of coordinate relaxation iteration
if (.not. relaxd) then
istep = istep + 1
endif
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after geometry step")
#endif
ENDDO
C End of coordinate-relaxation loop
call siesta_analysis( relaxd )
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after siesta_analysis")
#endif
call siesta_end()
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("at end of program")
#endif
!----------------------------------------------------------------------- END
!-----------------------------------------------------------------------END
END program siesta
......@@ -125,7 +125,9 @@
call state_init( istep )
endif
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after state_init")
#endif
if (fdf_get("Sonly",.false.)) then
if (SIESTA_worker) then
......@@ -166,7 +168,9 @@
! would suit "analysis" runs in which nscf = 0
if (SIESTA_worker) call setup_H0()
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after setup_H0")
#endif
#ifdef NCDF_4
! Initialize the SIESTA.nc file
......@@ -418,7 +422,9 @@
if (fdf_get("compute-forces",.true.)) then
call post_scf_work( istep, iscf , SCFconverged )
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after post_scf_work")
#endif
endif
! ... so H at this point is the latest generator of the DM, except
......@@ -426,7 +432,9 @@
! without convergence while mixing H
call state_analysis( istep )
#ifdef SIESTA__PEXSI
if (ionode) call memory_snapshot("after state_analysis")
#endif
! If siesta is running as a subroutine, send forces to master program
if (siesta_server)
......
......@@ -309,8 +309,13 @@ C Initialise MPI and set processor number
. threshold=threshold )
! Note that the parallel timer might be inefficient for reports
! when large numbers of processors are used
use_parallel_timer = fdf_get('UseParallelTimer', .false.)
use_tree_timer = fdf_get('UseTreeTimer', .true.)
if ( PEXSINodes /= Nodes ) then
use_parallel_timer = fdf_get('UseParallelTimer', .false.)
use_tree_timer = fdf_get('UseTreeTimer', .true.)
else
use_parallel_timer = fdf_get('UseParallelTimer', .true.)
use_tree_timer = fdf_get('UseTreeTimer', .false.)
end if
! Start time counter
! Note new placement of this first use, so that
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment