Skip to content

UnicodeToUtf8 and Utf8ToUnicode give not expected results.

{$codepage utf8}

var a1 : array [0..99] of byte;

procedure pritnA1;
var k : longint;
begin
   k:=0;
   while true do
   begin
     if a1[k]=$ff then break;
     write( hexstr(a1[k],2),' ');
     inc(k);
     if k>99 then break;
   end;
   writeln;
end;

var uS : UnicodeString;
    len : longint;
    u8 : Utf8String;

begin

   uS := 'ĀĀ';  {  Unicode 256 }
   u8 := 'ĀĀ';

   writeln('Test  UnicodeToUtf8 ');
   len:=  UnicodeToUtf8 (nil,100,@uS[1],Length(uS));
   writeln(len);

   FillChar(a1,sizeof(a1),$ff);
   len:=  UnicodeToUtf8 (@a1,len,@uS[1],Length(uS));
   writeln(len);
   pritnA1;

   writeln;

   FillChar(a1,sizeof(a1),$ff);

   len:=  UnicodeToUtf8 (@a1,  len-1  ,@uS[1],Length(uS));
   writeln(len);                 { ^  shorter by one  }


   pritnA1;
   writeln('         ^-- nil is added at the end but should not be');
   writeln;

   writeln('Test  Utf8ToUnicode');
   len:=  Utf8ToUnicode (nil,50,@u8[1],Length(u8));
   writeln(len);

   FillChar(a1,sizeof(a1),$ff);
   len:=  Utf8ToUnicode (@a1,len,@u8[1],Length(u8));
   writeln(len);
   pritnA1;
   writeln;

   FillChar(a1,sizeof(a1),$ff);
   len:=  Utf8ToUnicode (@a1, len-1 ,@u8[1],Length(u8));
                                { ^  shorter by one  }

   writeln(len,' <-- should be  2');  { <-- what? as if null is there }
   pritnA1;
   writeln;
end.

fpc 3.3.1 output

Test  UnicodeToUtf8 
5
5
C4 80 C4 80 00 

4
C4 80 C4 00 
         ^-- nil is added at the end but should not be

Test  Utf8ToUnicode
3
3
00 01 00 01 00 00 

3 <-- should be  2
00 01 00 01 
To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information