Skip to content
Snippets Groups Projects
Commit 6072f272 authored by Sebastian Ohlmann's avatar Sebastian Ohlmann
Browse files

Fix logic of checking if component in sum is non-zero

Moreover, add some documentation on the algorithm used here.
parent 9ee8145f
No related branches found
No related tags found
Loading
Pipeline #149759788 passed
......@@ -923,28 +923,37 @@ subroutine X(priv_mesh_batch_nrm2)(mesh, aa, nrm2)
ithread = 1
!$ ithread = omp_get_thread_num() + 1
! The algorithm for the squared sum is the same as used, e.g., in openblas.
! The idea is that one wants to avoid an overflow caused by squaring a big
! number by using separate values for the sum of squares and the scale.
! Only at the end, the norm is computed as scal*sqrt(ssq) - in this way
! the largest number which is stored in scal is never squared.
if(.not. mesh%use_curvilinear) then
do ip = sp, sp + np - 1
do ist = 1, aa%nst_linear
! first add real part
a0 = abs(R_REAL(aa%X(ff_pack)(ist, ip)))
if(a0 <= M_EPSILON) cycle
if(scal(ist, ithread) < a0) then
ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
scal(ist, ithread) = a0
else
ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
! only add a0 if it is non-zero
if(a0 > M_EPSILON) then
if(scal(ist, ithread) < a0) then
ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
scal(ist, ithread) = a0
else
ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
end if
end if
#ifdef R_TCOMPLEX
! then add imaginary part for complex numbers
a0 = abs(R_AIMAG(aa%X(ff_pack)(ist, ip)))
if(a0 <= M_EPSILON) cycle
if(scal(ist, ithread) < a0) then
ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
scal(ist, ithread) = a0
else
ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
! only add a0 if it is non-zero
if(a0 > M_EPSILON) then
if(scal(ist, ithread) < a0) then
ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
scal(ist, ithread) = a0
else
ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
end if
end if
#endif
end do
......@@ -954,14 +963,30 @@ subroutine X(priv_mesh_batch_nrm2)(mesh, aa, nrm2)
do ip = sp, sp + np - 1
do ist = 1, aa%nst_linear
a0 = abs(aa%X(ff_pack)(ist, ip))
if(a0 < M_EPSILON) cycle
if(scal(ist, ithread) < a0) then
ssq(ist, ithread) = mesh%vol_pp(ip) + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
scal(ist, ithread) = a0
else
ssq(ist, ithread) = ssq(ist, ithread) + mesh%vol_pp(ip)*(a0/scal(ist, ithread))**2
! first add real part
a0 = abs(R_REAL(aa%X(ff_pack)(ist, ip)))
! only add a0 if it is non-zero
if(a0 > M_EPSILON) then
if(scal(ist, ithread) < a0) then
ssq(ist, ithread) = mesh%vol_pp(ip) + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
scal(ist, ithread) = a0
else
ssq(ist, ithread) = ssq(ist, ithread) + mesh%vol_pp(ip)*(a0/scal(ist, ithread))**2
end if
end if
#ifdef R_TCOMPLEX
! then add imaginary part for complex numbers
a0 = abs(R_AIMAG(aa%X(ff_pack)(ist, ip)))
! only add a0 if it is non-zero
if(a0 > M_EPSILON) then
if(scal(ist, ithread) < a0) then
ssq(ist, ithread) = mesh%vol_pp(ip) + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
scal(ist, ithread) = a0
else
ssq(ist, ithread) = ssq(ist, ithread) + mesh%vol_pp(ip)*(a0/scal(ist, ithread))**2
end if
end if
#endif
end do
end do
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment