Wrong Optimization on -O2 -O3 of register shifts
Summary
Some code was wrongly optimized, resulting in incorrect generated assembly.
System Information
Tested on Linux x86_64 with FPC trunk. Run with -O3 or -O2 would fail. Run with -O1 or -O0 would pass.
Steps to reproduce
Compile and run the following code snippet:
function strspn(s, accept: pointer): integer;
var
p: PCardinal;
c: AnsiChar;
d: cardinal;
begin
// returns size of initial segment of s which are in accept
result := 0;
repeat
c := PAnsiChar(s)[result];
if c = #0 then
break;
p := accept;
repeat // stop as soon as we find any character not from accept
d := p^;
inc(p);
if AnsiChar(d) = c then
break
else if AnsiChar(d) = #0 then
exit;
d := d shr 8;
if AnsiChar(d) = c then
break
else if AnsiChar(d) = #0 then
exit;
d := d shr 8;
if AnsiChar(d) = c then
break
else if AnsiChar(d) = #0 then
exit;
d := d shr 8;
if AnsiChar(d) = c then
break
else if AnsiChar(d) = #0 then
exit;
until false;
inc(result);
until false;
end;
procedure TestIt;
begin
writeln(strspn(PAnsiChar('abcdef'), PAnsiChar('debca')));
// it should return 5 - but 1 is returned
end;
What is the current bug behavior?
It displays 1 with -O3 or -O2. Note that run with -O1 or -O0 would pass and return 5 as expected.
What is the expected (correct) behavior?
It should display the number 5. As it was the case with FPC 3.2.2 or with -O1 or -O0.
Relevant logs and/or screenshots
Here is the generated asm, which is clearly wrong:
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6221 result := 0;
00000000004D2E60 31C0 xor eax,eax
00000000004D2E62 660F1F440000 nop word ptr [rax+rax+$00]
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6223 c := PAnsiChar(s)[result];
00000000004D2E68 4863D0 movsxd rdx,eax
00000000004D2E6B 448A0C17 mov r9b,[rdx+rdi]
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6224 if c = #0 then
00000000004D2E6F 4584C9 test r9b,r9b
00000000004D2E72 7445 jz +$45 # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6226 p := accept;
00000000004D2E74 4889F1 mov rcx,rsi
00000000004D2E77 660F1F840000000000 nop word ptr [rax+rax+$00000000]
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6228 d := p^;
00000000004D2E80 448B01 mov r8d,[rcx]
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6229 inc(p);
00000000004D2E83 4883C104 add rcx,$04
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6230 if AnsiChar(d) = c then
00000000004D2E87 4538C8 cmp r8b,r9b
00000000004D2E8A 7428 jz +$28 # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6232 else if AnsiChar(d) = #0 then
00000000004D2E8C 4584C0 test r8b,r8b
00000000004D2E8F 7428 jz +$28 # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6234 d := d shr 8;
00000000004D2E91 41C1E818 shr r8d,$18
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6235 if AnsiChar(d) = c then
00000000004D2E95 4538C8 cmp r8b,r9b
00000000004D2E98 741A jz +$1A # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6237 else if AnsiChar(d) = #0 then
00000000004D2E9A 4584C0 test r8b,r8b
00000000004D2E9D 741A jz +$1A # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6240 if AnsiChar(d) = c then
00000000004D2E9F 4538C8 cmp r8b,r9b
00000000004D2EA2 7410 jz +$10 # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6242 else if AnsiChar(d) = #0 then
00000000004D2EA4 4584C0 test r8b,r8b
00000000004D2EA7 7410 jz +$10 # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6245 if AnsiChar(d) = c then
00000000004D2EA9 4538C8 cmp r8b,r9b
00000000004D2EAC 7406 jz +$06 # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6247 else if AnsiChar(d) = #0 then
00000000004D2EAE 4584C0 test r8b,r8b
00000000004D2EB1 75CD jnz -$33 # $00000000004D2E80 strspn+32 mormot.core.unicode.pas:6228
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6249 until false;
00000000004D2EB3 C3 ret
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6250 inc(result);
00000000004D2EB4 83C001 add eax,$01
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6251 until false;
00000000004D2EB7 EBAF jmp -$51 # $00000000004D2E68 strspn+8 mormot.core.unicode.pas:6223
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6252 end;
00000000004D2EB9 C3 ret
The code is reading the input per 32-bit values at once, then try the last 8-bit, making a shr 8
between each try.
The several d := d shr 8
were wrongly optimized to:
/home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6234 d := d shr 8;
00000000004D2E91 41C1E818 shr r8d,$18