Fix logic of checking if component in sum is non-zero

Moreover, add some documentation on the algorithm used here.

Fix logic of checking if component in sum is non-zero
6072f272 · Sebastian Ohlmann · 9ee8145f · 6072f272
Commit 6072f272 authored 4 years ago by Sebastian Ohlmann
--- a/src/grid/mesh_batch_inc.F90
+++ b/src/grid/mesh_batch_inc.F90
@@ -923,28 +923,37 @@ subroutine X(priv_mesh_batch_nrm2)(mesh, aa, nrm2)
    ithread = 1
    !$ ithread = omp_get_thread_num() + 1
    
+    ! The algorithm for the squared sum is the same as used, e.g., in openblas.
+    ! The idea is that one wants to avoid an overflow caused by squaring a big
+    ! number by using separate values for the sum of squares and the scale.
+    ! Only at the end, the norm is computed as scal*sqrt(ssq) - in this way
+    ! the largest number which is stored in scal is never squared.
    if(.not. mesh%use_curvilinear) then

      do ip = sp, sp + np - 1
        do ist = 1, aa%nst_linear
          ! first add real part
          a0 = abs(R_REAL(aa%X(ff_pack)(ist, ip)))
-          if(a0 <= M_EPSILON) cycle
-          if(scal(ist, ithread) < a0) then
-            ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
-            scal(ist, ithread) = a0
-          else
-            ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
+          ! only add a0 if it is non-zero
+          if(a0 > M_EPSILON) then
+            if(scal(ist, ithread) < a0) then
+              ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
+              scal(ist, ithread) = a0
+            else
+              ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
+            end if
          end if
 #ifdef R_TCOMPLEX
          ! then add imaginary part for complex numbers
          a0 = abs(R_AIMAG(aa%X(ff_pack)(ist, ip)))
-          if(a0 <= M_EPSILON) cycle
-          if(scal(ist, ithread) < a0) then
-            ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
-            scal(ist, ithread) = a0
-          else
-            ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
+          ! only add a0 if it is non-zero
+          if(a0 > M_EPSILON) then
+            if(scal(ist, ithread) < a0) then
+              ssq(ist, ithread) = M_ONE + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
+              scal(ist, ithread) = a0
+            else
+              ssq(ist, ithread) = ssq(ist, ithread) + (a0/scal(ist, ithread))**2
+            end if
          end if
 #endif
        end do
@@ -954,14 +963,30 @@ subroutine X(priv_mesh_batch_nrm2)(mesh, aa, nrm2)

      do ip = sp, sp + np - 1
        do ist = 1, aa%nst_linear
-          a0 = abs(aa%X(ff_pack)(ist, ip))
-          if(a0 < M_EPSILON) cycle
-          if(scal(ist, ithread) < a0) then
-            ssq(ist, ithread) =  mesh%vol_pp(ip) + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
-            scal(ist, ithread) = a0
-          else
-            ssq(ist, ithread) = ssq(ist, ithread) + mesh%vol_pp(ip)*(a0/scal(ist, ithread))**2
+          ! first add real part
+          a0 = abs(R_REAL(aa%X(ff_pack)(ist, ip)))
+          ! only add a0 if it is non-zero
+          if(a0 > M_EPSILON) then
+            if(scal(ist, ithread) < a0) then
+              ssq(ist, ithread) =  mesh%vol_pp(ip) + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
+              scal(ist, ithread) = a0
+            else
+              ssq(ist, ithread) = ssq(ist, ithread) + mesh%vol_pp(ip)*(a0/scal(ist, ithread))**2
+            end if
          end if
+#ifdef R_TCOMPLEX
+          ! then add imaginary part for complex numbers
+          a0 = abs(R_AIMAG(aa%X(ff_pack)(ist, ip)))
+          ! only add a0 if it is non-zero
+          if(a0 > M_EPSILON) then
+            if(scal(ist, ithread) < a0) then
+              ssq(ist, ithread) =  mesh%vol_pp(ip) + ssq(ist, ithread)*(scal(ist, ithread)/a0)**2
+              scal(ist, ithread) = a0
+            else
+              ssq(ist, ithread) = ssq(ist, ithread) + mesh%vol_pp(ip)*(a0/scal(ist, ithread))**2
+            end if
+          end if
+#endif
        end do
      end do