Vector4f result = a1v1 + a2v2 compiled like ass by eigen3; much better with eigen2

Submitted by Benoit Jacob

Assigned to Gael Guennebaud

Link to original bugzilla bug (#203)
Operating system: Linux

Description

This test program:

#include <Eigen/Core>

using namespace Eigen;

void foo(float a1, const Vector4f& v1,

     float a2, const Vector4f& v2,  

     Vector4f& result)  

{

asm volatile("#begin");

result = a1v1 + a2v2;

asm volatile("#end");

}

compiled like this with eigen3 and gcc 4.4.5 x86-64 linux:

$ g++ -c -S -O2 -I eigen derf.cpp -DNDEBUG -o derf.s

gives this crappy assembly:

#APP

9 "derf.cpp" 1

    #begin  

0 "" 2

#NO_APP

    xorps   %xmm2, %xmm2  

    movss   %xmm1, %xmm2  

    pshufd  $0, %xmm2, %xmm1  

    xorps   %xmm2, %xmm2  

    mulps   (%rsi), %xmm1  

    movss   %xmm0, %xmm2  

    pshufd  $0, %xmm2, %xmm0  

    mulps   (%rdi), %xmm0  

    addps   %xmm1, %xmm0  

    movaps  %xmm0, (%rdx)  

#APP

11 "derf.cpp" 1

    #end  

0 "" 2

#NO_APP

while with eigen2, it gives this good assembly:

#APP

9 "derf.cpp" 1

    #begin  

0 "" 2

#NO_APP

    shufps  $0, %xmm1, %xmm1  

    shufps  $0, %xmm0, %xmm0  

    mulps   (%rsi), %xmm1  

    mulps   (%rdi), %xmm0  

    addps   %xmm1, %xmm0  

    movaps  %xmm0, (%rdx)  

#APP

11 "derf.cpp" 1

    #end  

0 "" 2

#NO_APP

Blocking

#25 (closed)