bind(c) single precision complex argument by value ABI issue
After !1223 (merged), the double precision complex arguments by value work.
The single precision complex by value does not. The issue is that the x86 ABI expects the {float, float}
struct (that represents float complex
) to be packed into 64bits and passed in the xmm0
register. That is how the C file expects it. Our LLVM backend however passes the two floats in xmm0
and xmm1
registers. Using C code, our LLVM backend generates this C ABI:
f(float a, float b)
and passes a
in xmm0
and b
in xmm1
.
But the float _Complex
type in C is actually compiled to:
f(double coerced_ab)
and so both a
and b
are passed in one xmm0
register.
We have to update our LLVM backend to produce the same code as the Clang compiler.
To test the Clang compiler, one can do:
struct float_complex {
float re, im;
};
typedef struct float_complex float_complex_t;
int f_int_double_complex_value(int a, float_complex_t b) {
return a + b.re + b.im;
}
Generate LLVM IR by:
clang -S -emit-llvm test_float_complex.c -o a.ll
and view a.ll
:
; ModuleID = 'test_float_complex.c'
source_filename = "test_float_complex.c"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx11.0.0"
%struct.float_complex = type { float, float }
; Function Attrs: noinline nounwind optnone ssp uwtable
define i32 @f_int_double_complex_value(i32 %0, <2 x float> %1) #0 {
%3 = alloca %struct.float_complex, align 4
%4 = alloca i32, align 4
%5 = bitcast %struct.float_complex* %3 to <2 x float>*
store <2 x float> %1, <2 x float>* %5, align 4
store i32 %0, i32* %4, align 4
%6 = load i32, i32* %4, align 4
%7 = sitofp i32 %6 to float
%8 = getelementptr inbounds %struct.float_complex, %struct.float_complex* %3, i32 0, i32 0
%9 = load float, float* %8, align 4
%10 = fadd float %7, %9
%11 = getelementptr inbounds %struct.float_complex, %struct.float_complex* %3, i32 0, i32 1
%12 = load float, float* %11, align 4
%13 = fadd float %10, %12
%14 = fptosi float %13 to i32
ret i32 %14
}
attributes #0 = { noinline nounwind optnone ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="64" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 1]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 7, !"PIC Level", i32 2}
!3 = !{!"Apple clang version 12.0.0 (clang-1200.0.32.29)"}
See the %5 = bitcast %struct.float_complex* %3 to <2 x float>*
line. We have to do the same.
After this is fixed, uncomment the line: https://gitlab.com/lfortran/lfortran/-/blob/675b54aa5d4e7fd1c729cd189df4e132f26055a7/integration_tests/modules_15.f90#L37