Improving mul and div for powers of 2
Summary
As of now, FPC optimizes unsigned values div and mul when the right side is a power of 2, eg I div 2 will be changed to I shr 1. But it does not do the optimization for signed values as they may be negative. But as in almost all cases of loops in Pascal, we use signed integers, it can be very helpful, as FPC can analyze the data flow and consider it when the value is always positive, so it can be changed to shl or shr and gain almost 2x speedup.
Example Project
Result in: Lazarus 2.3.0 (rev main-2_3-1428-g9f142eef14) FPC 3.3.1 x86_64-win64-win32/win64 without optimization
program project1;
var
I, V: Cardinal;
begin
I := 0;
V := I shr 1;
V := I div 2;
end.
# [7] V := I shr 1;
shrl $1,%eax
movl %eax,U_$P$PROJECT1_$$_V(%rip)
.Ll4:
# [8] V := I div 2;
movl U_$P$PROJECT1_$$_I(%rip),%eax
shrl $1,%eax
movl %eax,U_$P$PROJECT1_$$_V(%rip)
Same instructions.
Change the type to Integer:
program project1;
var
I, V: Integer;
begin
I := 0;
V := I shr 1;
V := I div 2;
end.
# [7] V := I shr 1;
shrl $1,%eax
movl %eax,U_$P$PROJECT1_$$_V(%rip)
.Ll4:
# [8] V := I div 2;
movslq U_$P$PROJECT1_$$_I(%rip),%rax
movq %rax,%rdx
shrq $63,%rdx
addq %rdx,%rax
sarq $1,%rax
movl %eax,U_$P$PROJECT1_$$_V(%rip)
I understand this, compiler thinks that as the value is integer, it may be negative so it does not use the same simple shr.
Now for this code and even with -O4 the results will be:
program Project1;
uses
SysUtils;
procedure TestDiv;
var
I, V: Integer;
begin
for I := 1 to 1000 * 1000 * 1000 do
V := I div 2;
WriteLn(V);
end;
procedure TestSHR;
var
I, V: Integer;
begin
for I := 1 to 1000 * 1000 * 1000 do
V := I shr 1;
WriteLn(V);
end;
var
T: QWord;
begin
T := GetTickCount64;
TestDiv;
WriteLn('div: ', GetTickCount64 - T);
T := GetTickCount64;
TestSHR;
WriteLn('shr: ', GetTickCount64 - T);
ReadLn;
end.
# [11] V := I div 2;
movslq %eax,%rdx
movq %rdx,%rcx
shrq $63,%rcx
addq %rcx,%rdx
sarq $1,%rdx
movl %edx,%ebx
addl $1,%eax
cmpl $1000000000,%eax
jng .Lj5
# [20] V := I shr 1;
movl %eax,%ebx
shrl $1,%ebx
addl $1,%eax
cmpl $1000000000,%eax
jng .Lj10
and the times are: div: 469 shr: 218
and if I change integer to cardinal:
# [11] V := I div 2;
movl %eax,%ebx
shrl $1,%ebx
addl $1,%eax
cmpl $1000000000,%eax
jna .Lj5
# [20] V := I shr 1;
movl %eax,%ebx
shrl $1,%ebx
addl $1,%eax
cmpl $1000000000,%eax
jna .Lj10
And the times: div: 219 shr: 219
Relevant 3rd party information
https://forum.lazarus.freepascal.org/index.php?topic=58705.new;topicseen#new