## Threadsafe protection against zero state of xoshiro128ss

When using random from multiple threads, it may happen that all elements of `state`

are equal to 0,
which is not valid for Xoshiro128ss.

The problem is that storing into `state[0]`

, `state[1]`

, `state[2]`

, `state[3]`

is not an atomic operation.

Accordingly, the completely correct solution to the problem is to use an atomic store of 16 bytes.

But, firstly, this is not a fast operation on cpu64, and secondly, such an operation is absent in cpu32.

Also, the solution is to use some other RNG instead of Xoshiro128ss, which has a 64-bit state, but this is not the subject of this MR...

Below is an example of how you can get an invalid `state`

, after which `Xoshiro128ss_32.Next`

will always return only `0`

.

## XoshiroThreadsafeBugTest

```
program XoshiroThreadsafeBugTest;
{$mode objfpc}
uses
Classes;
var
RandSeed : Cardinal;
var
OldRandSeed : Cardinal = Cardinal(not Cardinal(0));
type
SplitMix64 = object
procedure Setup(const seed: uint64); inline;
function Next: uint64;
private
state: uint64;
end;
procedure SplitMix64.Setup(const seed: uint64);
begin
state := seed;
end;
function SplitMix64.Next: uint64;
var
z: uint64;
begin
z := state + $9e3779b97f4a7c15;
state := z;
z := (z xor (z shr 30)) * $bf58476d1ce4e5b9;
z := (z xor (z shr 27)) * $94d049bb133111eb;
result := z xor (z shr 31);
end;
type
Xoshiro128ss_32 = object
procedure Setup(const seed: uint64);
function Next: uint32;
private
state: array[0 .. 3] of uint32;
end;
procedure Xoshiro128ss_32.Setup(const seed: uint64);
var
sm: SplitMix64;
x: uint64;
begin
sm.Setup(seed);
x := sm.Next;
state[0] := Lo(x); state[1] := Hi(x);
x := sm.Next;
state[2] := Lo(x); state[3] := Hi(x);
end;
var
BUGFIX: Boolean = false;
threadvar
T1: Boolean;
function Xoshiro128ss_32.Next: uint32;
var
s0, s1, s2, s3: uint32;
begin
s0 := state[0];
s1 := state[1];
s2 := state[2];
s3 := state[3];
{$ifdef FPC_HAS_FEATURE_THREADING}
if BUGFIX then
if s0 or s1 or s2 or s3 = 0 then
s0 := 1;
{$endif}
s2 := s2 xor s0;
s3 := s3 xor s1;
result := RolDWord(s1 * 5, 7) * 9;
state[0] := s0 xor s3;
if T1 then
TThread.Sleep(2000);
state[1] := s1 xor s2;
state[2] := s2 xor (s1 shl 9);
state[3] := RolDWord(s3, 11);
end;
var
GlobalXsr128_32: Xoshiro128ss_32 = (state: ($AFF181C0, $73B13BA2, $1340D3B4, $61204305));
procedure ReseedGlobalRNG;
begin
GlobalXsr128_32.Setup(RandSeed);
RandSeed := not RandSeed;
OldRandSeed := RandSeed;
end;
function xsr128_32_u32rand: uint32;
begin
if RandSeed <> OldRandSeed then ReseedGlobalRNG;
result := GlobalXsr128_32.Next;
end;
procedure TreadProc1;
begin
T1 := True;
xsr128_32_u32rand;
end;
var
Thread1: TThread;
R1, R2, R3, R4: uint32;
begin
//BUGFIX := true; // <-- !!!
//RandSeed := X;
GlobalXsr128_32.state[0] := high(uint32);
GlobalXsr128_32.state[1] := 0;
GlobalXsr128_32.state[2] := high(uint32);
GlobalXsr128_32.state[3] := 0;
OldRandSeed := RandSeed;
Thread1 := TThread.CreateAnonymousThread(@TreadProc1);
Thread1.FreeOnTerminate := False;
Thread1.Start;
TThread.Sleep(1000);
//RandSeed := X;
GlobalXsr128_32.state[0] := high(uint32);
GlobalXsr128_32.state[1] := 0;
GlobalXsr128_32.state[2] := high(uint32);
GlobalXsr128_32.state[3] := 0;
OldRandSeed := RandSeed;
xsr128_32_u32rand;
xsr128_32_u32rand;
xsr128_32_u32rand;
Thread1.WaitFor;
Thread1.Free;
with GlobalXsr128_32 do
if (state[0] = 0) and (state[1] = 0) and (state[2] = 0) and (state[3] = 0) then
begin
R1 := xsr128_32_u32rand;
R2 := xsr128_32_u32rand;
R3 := xsr128_32_u32rand;
R4 := xsr128_32_u32rand;
// ...
WriteLn('R1: ', R1);
WriteLn('R2: ', R2);
WriteLn('R3: ', R3);
WriteLn('R4: ', R4);
if (R1 = R2) and (R2 = R3) and (R3 = R4) then
Halt(1); // BUG
end;
end.
```

A workaround and simple solution to the problem is after reading `state`

into local variables, check `s0`

, `s1`

, `s2`

, `s3`

and if all are equal to `0`

, then fix this, for example, by setting `s0`

to `1`

.

Related to #38237 (closed)