Commit fc0f148a authored by Florian Maurin's avatar Florian Maurin Committed by Charles Schlosser
Browse files

Fix: GCC 9.2 fast-math complex products

!2579

Closes #1839
parent 289e9eed
Loading
Loading
Loading
Loading
Loading
+24 −8
Original line number Diff line number Diff line
@@ -607,18 +607,34 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
  static constexpr bool SameType =
      std::is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value;

  static constexpr bool CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1);
  static constexpr bool CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1);
#if defined(__FAST_MATH__) && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0) && \
    (EIGEN_GNUC_STRICT_LESS_THAN(8, 4, 0) ||                                                   \
     (EIGEN_GNUC_STRICT_AT_LEAST(9, 0, 0) && EIGEN_GNUC_STRICT_LESS_THAN(9, 3, 0)))
  // Work around GCC PR tree-optimization/92420, a reversed-access vectorizer miscompile under -ffast-math.
  // The bug was introduced by GCC r238039, fixed on the GCC 8 branch by
  // https://gcc.gnu.org/g:785eda9390473e42f0e0b7199c42032a0432de68 and on the GCC 9 branch by
  // https://gcc.gnu.org/g:2d8ea3a0a6095a56b7c59c50b1068d602cde934a.
  // See also GitLab issue #1839.
  static constexpr bool AllowComplexPacketProduct = !NumTraits<Scalar>::IsComplex;
#else
  static constexpr bool AllowComplexPacketProduct = true;
#endif

  static constexpr bool CanVectorizeRhs =
      AllowComplexPacketProduct && bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1);
  static constexpr bool CanVectorizeLhs =
      AllowComplexPacketProduct && (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1);

  static constexpr int EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
                                        : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
                                            ? 0
                                            : (bool(RhsRowMajor) && !CanVectorizeLhs);

  static constexpr int Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
  static constexpr int Flags =
      ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
      (EvalToRowMajor ? RowMajorBit : 0)
      // TODO: enable vectorization for mixed types
                               | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
      | (SameType && AllowComplexPacketProduct && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
      (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0);

  static constexpr int LhsOuterStrideBytes =
@@ -642,7 +658,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
   * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
   * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
   */
  static constexpr bool CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
  static constexpr bool CanVectorizeInner = SameType && AllowComplexPacketProduct && LhsRowMajor && (!RhsRowMajor) &&
                                            (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
                                            (int(InnerSize) % packet_traits<Scalar>::size == 0);

+23 −0
Original line number Diff line number Diff line
@@ -179,6 +179,26 @@ void check_inf_nan(bool dryrun) {
  }
}

template <typename RealScalar>
void check_complex_rowmajor_adjoint_product() {
  typedef std::complex<RealScalar> Scalar;
  typedef Matrix<Scalar, Dynamic, Dynamic, RowMajor> RowMatrix;
  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> ColMatrix;

  RowMatrix mat(2, 2);
  mat << Scalar(1, 2), Scalar(3, -4), Scalar(-5, 6), Scalar(7, 8);

  RowMatrix expected(2, 2);
  expected << Scalar(66, 0), Scalar(8, -92), Scalar(8, 92), Scalar(138, 0);

  const RowMatrix row_major_result = mat.adjoint() * mat;
  const ColMatrix col_major_result = mat.adjoint() * mat;

  VERIFY_IS_APPROX(mat.adjoint() * mat, expected);
  VERIFY_IS_APPROX(row_major_result, expected);
  VERIFY_IS_APPROX(col_major_result, expected);
}

EIGEN_DECLARE_TEST(fastmath) {
  std::cout << "*** float *** \n\n";
  check_inf_nan<float>(true);
@@ -190,4 +210,7 @@ EIGEN_DECLARE_TEST(fastmath) {
  check_inf_nan<float>(false);
  check_inf_nan<double>(false);
  check_inf_nan<long double>(false);

  CALL_SUBTEST_1(check_complex_rowmajor_adjoint_product<float>());
  CALL_SUBTEST_2(check_complex_rowmajor_adjoint_product<double>());
}