Vector4f result = a1*v1 + a2*v2 compiled like ass by eigen3; much better with eigen2
Submitted by Benoit Jacob
Assigned to Gael Guennebaud @ggael
Link to original bugzilla bug (#203)
Operating system: Linux
Description
This test program:
#include <Eigen/Core>
using namespace Eigen;
void foo(float a1, const Vector4f& v1,
float a2, const Vector4f& v2,
Vector4f& result)
{
asm volatile("#begin");
result = a1v1 + a2v2;
asm volatile("#end");
}
compiled like this with eigen3 and gcc 4.4.5 x86-64 linux:
$ g++ -c -S -O2 -I eigen derf.cpp -DNDEBUG -o derf.s
gives this crappy assembly:
#APP
9 "derf.cpp" 1
#begin
0 "" 2
#NO_APP
xorps %xmm2, %xmm2
movss %xmm1, %xmm2
pshufd $0, %xmm2, %xmm1
xorps %xmm2, %xmm2
mulps (%rsi), %xmm1
movss %xmm0, %xmm2
pshufd $0, %xmm2, %xmm0
mulps (%rdi), %xmm0
addps %xmm1, %xmm0
movaps %xmm0, (%rdx)
#APP
11 "derf.cpp" 1
#end
0 "" 2
#NO_APP
while with eigen2, it gives this good assembly:
#APP
9 "derf.cpp" 1
#begin
0 "" 2
#NO_APP
shufps $0, %xmm1, %xmm1
shufps $0, %xmm0, %xmm0
mulps (%rsi), %xmm1
mulps (%rdi), %xmm0
addps %xmm1, %xmm0
movaps %xmm0, (%rdx)
#APP
11 "derf.cpp" 1
#end
0 "" 2
#NO_APP