Fix: GCC 9.2 fast-math complex products (fc0f148a) · Commits · libeigen / eigen

Eigen/src/Core/ProductEvaluators.h

+24 −8

Original line number	Diff line number	Diff line
		@@ -607,18 +607,34 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
		static constexpr bool SameType =
		std::is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value;

		static constexpr bool CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1);
		static constexpr bool CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1);
		#if defined(__FAST_MATH__) && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0) && \
		(EIGEN_GNUC_STRICT_LESS_THAN(8, 4, 0) \|\| \
		(EIGEN_GNUC_STRICT_AT_LEAST(9, 0, 0) && EIGEN_GNUC_STRICT_LESS_THAN(9, 3, 0)))
		// Work around GCC PR tree-optimization/92420, a reversed-access vectorizer miscompile under -ffast-math.
		// The bug was introduced by GCC r238039, fixed on the GCC 8 branch by
		// https://gcc.gnu.org/g:785eda9390473e42f0e0b7199c42032a0432de68 and on the GCC 9 branch by
		// https://gcc.gnu.org/g:2d8ea3a0a6095a56b7c59c50b1068d602cde934a.
		// See also GitLab issue #1839.
		static constexpr bool AllowComplexPacketProduct = !NumTraits<Scalar>::IsComplex;
		#else
		static constexpr bool AllowComplexPacketProduct = true;
		#endif

		static constexpr bool CanVectorizeRhs =
		AllowComplexPacketProduct && bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1);
		static constexpr bool CanVectorizeLhs =
		AllowComplexPacketProduct && (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1);

		static constexpr int EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
		: (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
		? 0
		: (bool(RhsRowMajor) && !CanVectorizeLhs);

		static constexpr int Flags = ((int(LhsFlags) \| int(RhsFlags)) & HereditaryBits & ~RowMajorBit) \|
		static constexpr int Flags =
		((int(LhsFlags) \| int(RhsFlags)) & HereditaryBits & ~RowMajorBit) \|
		(EvalToRowMajor ? RowMajorBit : 0)
		// TODO: enable vectorization for mixed types
		\| (SameType && (CanVectorizeLhs \|\| CanVectorizeRhs) ? PacketAccessBit : 0) \|
		\| (SameType && AllowComplexPacketProduct && (CanVectorizeLhs \|\| CanVectorizeRhs) ? PacketAccessBit : 0) \|
		(XprType::IsVectorAtCompileTime ? LinearAccessBit : 0);

		static constexpr int LhsOuterStrideBytes =
		@@ -642,7 +658,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
		* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
		* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
		*/
		static constexpr bool CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
		static constexpr bool CanVectorizeInner = SameType && AllowComplexPacketProduct && LhsRowMajor && (!RhsRowMajor) &&
		(int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
		(int(InnerSize) % packet_traits<Scalar>::size == 0);

test/fastmath.cpp

+23 −0

Original line number	Diff line number	Diff line
		@@ -179,6 +179,26 @@ void check_inf_nan(bool dryrun) {
		}
		}

		template <typename RealScalar>
		void check_complex_rowmajor_adjoint_product() {
		typedef std::complex<RealScalar> Scalar;
		typedef Matrix<Scalar, Dynamic, Dynamic, RowMajor> RowMatrix;
		typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> ColMatrix;

		RowMatrix mat(2, 2);
		mat << Scalar(1, 2), Scalar(3, -4), Scalar(-5, 6), Scalar(7, 8);

		RowMatrix expected(2, 2);
		expected << Scalar(66, 0), Scalar(8, -92), Scalar(8, 92), Scalar(138, 0);

		const RowMatrix row_major_result = mat.adjoint() * mat;
		const ColMatrix col_major_result = mat.adjoint() * mat;

		VERIFY_IS_APPROX(mat.adjoint() * mat, expected);
		VERIFY_IS_APPROX(row_major_result, expected);
		VERIFY_IS_APPROX(col_major_result, expected);
		}

		EIGEN_DECLARE_TEST(fastmath) {
		std::cout << "* float * \n\n";
		check_inf_nan<float>(true);
		@@ -190,4 +210,7 @@ EIGEN_DECLARE_TEST(fastmath) {
		check_inf_nan<float>(false);
		check_inf_nan<double>(false);
		check_inf_nan<long double>(false);

		CALL_SUBTEST_1(check_complex_rowmajor_adjoint_product<float>());
		CALL_SUBTEST_2(check_complex_rowmajor_adjoint_product<double>());
		}