Loading Eigen/src/Core/ProductEvaluators.h +24 −8 Original line number Diff line number Diff line Loading @@ -607,18 +607,34 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, static constexpr bool SameType = std::is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value; static constexpr bool CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1); static constexpr bool CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1); #if defined(__FAST_MATH__) && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0) && \ (EIGEN_GNUC_STRICT_LESS_THAN(8, 4, 0) || \ (EIGEN_GNUC_STRICT_AT_LEAST(9, 0, 0) && EIGEN_GNUC_STRICT_LESS_THAN(9, 3, 0))) // Work around GCC PR tree-optimization/92420, a reversed-access vectorizer miscompile under -ffast-math. // The bug was introduced by GCC r238039, fixed on the GCC 8 branch by // https://gcc.gnu.org/g:785eda9390473e42f0e0b7199c42032a0432de68 and on the GCC 9 branch by // https://gcc.gnu.org/g:2d8ea3a0a6095a56b7c59c50b1068d602cde934a. // See also GitLab issue #1839. static constexpr bool AllowComplexPacketProduct = !NumTraits<Scalar>::IsComplex; #else static constexpr bool AllowComplexPacketProduct = true; #endif static constexpr bool CanVectorizeRhs = AllowComplexPacketProduct && bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1); static constexpr bool CanVectorizeLhs = AllowComplexPacketProduct && (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1); static constexpr int EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 : (bool(RhsRowMajor) && !CanVectorizeLhs); static constexpr int Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) | static constexpr int Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) // TODO: enable vectorization for mixed types | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | | (SameType && AllowComplexPacketProduct && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0); static constexpr int LhsOuterStrideBytes = Loading @@ -642,7 +658,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. */ static constexpr bool CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) && static constexpr bool CanVectorizeInner = SameType && AllowComplexPacketProduct && LhsRowMajor && (!RhsRowMajor) && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) && (int(InnerSize) % packet_traits<Scalar>::size == 0); Loading test/fastmath.cpp +23 −0 Original line number Diff line number Diff line Loading @@ -179,6 +179,26 @@ void check_inf_nan(bool dryrun) { } } template <typename RealScalar> void check_complex_rowmajor_adjoint_product() { typedef std::complex<RealScalar> Scalar; typedef Matrix<Scalar, Dynamic, Dynamic, RowMajor> RowMatrix; typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> ColMatrix; RowMatrix mat(2, 2); mat << Scalar(1, 2), Scalar(3, -4), Scalar(-5, 6), Scalar(7, 8); RowMatrix expected(2, 2); expected << Scalar(66, 0), Scalar(8, -92), Scalar(8, 92), Scalar(138, 0); const RowMatrix row_major_result = mat.adjoint() * mat; const ColMatrix col_major_result = mat.adjoint() * mat; VERIFY_IS_APPROX(mat.adjoint() * mat, expected); VERIFY_IS_APPROX(row_major_result, expected); VERIFY_IS_APPROX(col_major_result, expected); } EIGEN_DECLARE_TEST(fastmath) { std::cout << "*** float *** \n\n"; check_inf_nan<float>(true); Loading @@ -190,4 +210,7 @@ EIGEN_DECLARE_TEST(fastmath) { check_inf_nan<float>(false); check_inf_nan<double>(false); check_inf_nan<long double>(false); CALL_SUBTEST_1(check_complex_rowmajor_adjoint_product<float>()); CALL_SUBTEST_2(check_complex_rowmajor_adjoint_product<double>()); } Loading
Eigen/src/Core/ProductEvaluators.h +24 −8 Original line number Diff line number Diff line Loading @@ -607,18 +607,34 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, static constexpr bool SameType = std::is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value; static constexpr bool CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1); static constexpr bool CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1); #if defined(__FAST_MATH__) && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0) && \ (EIGEN_GNUC_STRICT_LESS_THAN(8, 4, 0) || \ (EIGEN_GNUC_STRICT_AT_LEAST(9, 0, 0) && EIGEN_GNUC_STRICT_LESS_THAN(9, 3, 0))) // Work around GCC PR tree-optimization/92420, a reversed-access vectorizer miscompile under -ffast-math. // The bug was introduced by GCC r238039, fixed on the GCC 8 branch by // https://gcc.gnu.org/g:785eda9390473e42f0e0b7199c42032a0432de68 and on the GCC 9 branch by // https://gcc.gnu.org/g:2d8ea3a0a6095a56b7c59c50b1068d602cde934a. // See also GitLab issue #1839. static constexpr bool AllowComplexPacketProduct = !NumTraits<Scalar>::IsComplex; #else static constexpr bool AllowComplexPacketProduct = true; #endif static constexpr bool CanVectorizeRhs = AllowComplexPacketProduct && bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1); static constexpr bool CanVectorizeLhs = AllowComplexPacketProduct && (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1); static constexpr int EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 : (bool(RhsRowMajor) && !CanVectorizeLhs); static constexpr int Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) | static constexpr int Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) // TODO: enable vectorization for mixed types | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | | (SameType && AllowComplexPacketProduct && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0); static constexpr int LhsOuterStrideBytes = Loading @@ -642,7 +658,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. */ static constexpr bool CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) && static constexpr bool CanVectorizeInner = SameType && AllowComplexPacketProduct && LhsRowMajor && (!RhsRowMajor) && (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) && (int(InnerSize) % packet_traits<Scalar>::size == 0); Loading
test/fastmath.cpp +23 −0 Original line number Diff line number Diff line Loading @@ -179,6 +179,26 @@ void check_inf_nan(bool dryrun) { } } template <typename RealScalar> void check_complex_rowmajor_adjoint_product() { typedef std::complex<RealScalar> Scalar; typedef Matrix<Scalar, Dynamic, Dynamic, RowMajor> RowMatrix; typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> ColMatrix; RowMatrix mat(2, 2); mat << Scalar(1, 2), Scalar(3, -4), Scalar(-5, 6), Scalar(7, 8); RowMatrix expected(2, 2); expected << Scalar(66, 0), Scalar(8, -92), Scalar(8, 92), Scalar(138, 0); const RowMatrix row_major_result = mat.adjoint() * mat; const ColMatrix col_major_result = mat.adjoint() * mat; VERIFY_IS_APPROX(mat.adjoint() * mat, expected); VERIFY_IS_APPROX(row_major_result, expected); VERIFY_IS_APPROX(col_major_result, expected); } EIGEN_DECLARE_TEST(fastmath) { std::cout << "*** float *** \n\n"; check_inf_nan<float>(true); Loading @@ -190,4 +210,7 @@ EIGEN_DECLARE_TEST(fastmath) { check_inf_nan<float>(false); check_inf_nan<double>(false); check_inf_nan<long double>(false); CALL_SUBTEST_1(check_complex_rowmajor_adjoint_product<float>()); CALL_SUBTEST_2(check_complex_rowmajor_adjoint_product<double>()); }