Wrong Optimization on -O2 -O3 of register shifts
## Summary <!-- Summarize the bug encountered concisely --> Some code was wrongly optimized, resulting in incorrect generated assembly. ## System Information Tested on Linux x86_64 with FPC trunk. Run with -O3 or -O2 would fail. Run with -O1 or -O0 would pass. ## Steps to reproduce <!-- How one can reproduce the issue - this is very important! --> Compile and run the following code snippet: ``` function strspn(s, accept: pointer): integer; var p: PCardinal; c: AnsiChar; d: cardinal; begin // returns size of initial segment of s which are in accept result := 0; repeat c := PAnsiChar(s)[result]; if c = #0 then break; p := accept; repeat // stop as soon as we find any character not from accept d := p^; inc(p); if AnsiChar(d) = c then break else if AnsiChar(d) = #0 then exit; d := d shr 8; if AnsiChar(d) = c then break else if AnsiChar(d) = #0 then exit; d := d shr 8; if AnsiChar(d) = c then break else if AnsiChar(d) = #0 then exit; d := d shr 8; if AnsiChar(d) = c then break else if AnsiChar(d) = #0 then exit; until false; inc(result); until false; end; procedure TestIt; begin writeln(strspn(PAnsiChar('abcdef'), PAnsiChar('debca'))); // it should return 5 - but 1 is returned end; ``` ## What is the current bug behavior? <!-- What actually happens --> It displays 1 with -O3 or -O2. Note that run with -O1 or -O0 would pass and return 5 as expected. ## What is the expected (correct) behavior? <!-- What you should see instead --> It should display the number 5. As it was the case with FPC 3.2.2 or with -O1 or -O0. ## Relevant logs and/or screenshots Here is the generated asm, which is clearly wrong: ``` /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6221 result := 0; 00000000004D2E60 31C0 xor eax,eax 00000000004D2E62 660F1F440000 nop word ptr [rax+rax+$00] /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6223 c := PAnsiChar(s)[result]; 00000000004D2E68 4863D0 movsxd rdx,eax 00000000004D2E6B 448A0C17 mov r9b,[rdx+rdi] /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6224 if c = #0 then 00000000004D2E6F 4584C9 test r9b,r9b 00000000004D2E72 7445 jz +$45 # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6226 p := accept; 00000000004D2E74 4889F1 mov rcx,rsi 00000000004D2E77 660F1F840000000000 nop word ptr [rax+rax+$00000000] /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6228 d := p^; 00000000004D2E80 448B01 mov r8d,[rcx] /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6229 inc(p); 00000000004D2E83 4883C104 add rcx,$04 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6230 if AnsiChar(d) = c then 00000000004D2E87 4538C8 cmp r8b,r9b 00000000004D2E8A 7428 jz +$28 # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6232 else if AnsiChar(d) = #0 then 00000000004D2E8C 4584C0 test r8b,r8b 00000000004D2E8F 7428 jz +$28 # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6234 d := d shr 8; 00000000004D2E91 41C1E818 shr r8d,$18 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6235 if AnsiChar(d) = c then 00000000004D2E95 4538C8 cmp r8b,r9b 00000000004D2E98 741A jz +$1A # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6237 else if AnsiChar(d) = #0 then 00000000004D2E9A 4584C0 test r8b,r8b 00000000004D2E9D 741A jz +$1A # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6240 if AnsiChar(d) = c then 00000000004D2E9F 4538C8 cmp r8b,r9b 00000000004D2EA2 7410 jz +$10 # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6242 else if AnsiChar(d) = #0 then 00000000004D2EA4 4584C0 test r8b,r8b 00000000004D2EA7 7410 jz +$10 # $00000000004D2EB9 strspn+89 mormot.core.unicode.pas:6252 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6245 if AnsiChar(d) = c then 00000000004D2EA9 4538C8 cmp r8b,r9b 00000000004D2EAC 7406 jz +$06 # $00000000004D2EB4 strspn+84 mormot.core.unicode.pas:6250 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6247 else if AnsiChar(d) = #0 then 00000000004D2EAE 4584C0 test r8b,r8b 00000000004D2EB1 75CD jnz -$33 # $00000000004D2E80 strspn+32 mormot.core.unicode.pas:6228 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6249 until false; 00000000004D2EB3 C3 ret /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6250 inc(result); 00000000004D2EB4 83C001 add eax,$01 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6251 until false; 00000000004D2EB7 EBAF jmp -$51 # $00000000004D2E68 strspn+8 mormot.core.unicode.pas:6223 /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6252 end; 00000000004D2EB9 C3 ret ``` The code is reading the input per 32-bit values at once, then try the last 8-bit, making a `shr 8` between each try. The several `d := d shr 8` were wrongly optimized to: ``` /home/ab/github/mORMot2/src/core/mormot.core.unicode.pas:6234 d := d shr 8; 00000000004D2E91 41C1E818 shr r8d,$18 ```
issue