Overloads Are Slow.
Maybe it does not have a solution at all (I saw somewhere on the bugtracker, “Overloads are O(N²) and I don't know how to do them faster"), but this program, which models a less severe case than one I had:
{-$define faster}
// ---
{$mode objfpc} {$modeswitch advancedrecords} {$modeswitch anonymousfunctions} {$macro on}
{$ifdef faster} {$define maximpl := a.max(a, b);}
{$else} {$define maximpl := max(a, b);}
{$endif}
{$define vecimpl :=
type
vec = record
x: int32;
class function max(a, b: vec): vec; static;
end;
class function vec.max(a, b: vec): vec;
begin
if a.x > b.x then result.x := a.x else result.x := b.x;
end;
function max(const a, b: vec): vec;
begin
if a.x > b.x then result.x := a.x else result.x := b.x;
end;}
{$define vec := Vec2i8} vecimpl {$define vec := Vec3i8} vecimpl {$define vec := Vec4i8} vecimpl
{$define vec := Vec2i16} vecimpl {$define vec := Vec3i16} vecimpl {$define vec := Vec4i16} vecimpl
{$define vec := Vec2i32} vecimpl {$define vec := Vec3i32} vecimpl {$define vec := Vec4i32} vecimpl
{$define vec := Vec2i64} vecimpl {$define vec := Vec3i64} vecimpl {$define vec := Vec4i64} vecimpl
{$define vec := Vec2u8} vecimpl {$define vec := Vec3u8} vecimpl {$define vec := Vec4u8} vecimpl
{$define vec := Vec2u16} vecimpl {$define vec := Vec3u16} vecimpl {$define vec := Vec4u16} vecimpl
{$define vec := Vec2u32} vecimpl {$define vec := Vec3u32} vecimpl {$define vec := Vec4u32} vecimpl
{$define vec := Vec2u64} vecimpl {$define vec := Vec3u64} vecimpl {$define vec := Vec4u64} vecimpl
{$define vec := Vec2f16} vecimpl {$define vec := Vec3f16} vecimpl {$define vec := Vec4f16} vecimpl
{$define vec := Vec2f32} vecimpl {$define vec := Vec3f32} vecimpl {$define vec := Vec4f32} vecimpl
{$define vec := Vec2f64} vecimpl {$define vec := Vec3f64} vecimpl {$define vec := Vec4f64} vecimpl
{$undef vec} {$undef vecimpl}
begin
{$define rep1 :=
TProcedure(procedure
begin
{$define rep2 :=
TProcedure(procedure
begin
{$define rep3 :=
TProcedure(procedure
begin
{$define rep4 :=
TProcedure(procedure
var
a, b: Vec4f64;
begin
a.x := 0; b.x := 0;
maximpl maximpl maximpl maximpl maximpl maximpl maximpl maximpl maximpl maximpl
maximpl maximpl maximpl maximpl maximpl maximpl maximpl maximpl maximpl maximpl // 20 ‘max’es.
end)();} rep4 rep4 rep4 rep4 rep4 rep4 rep4 rep4 rep4 rep4 {$undef rep4} // × 10 = 200 ‘max’es.
end)();} rep3 rep3 rep3 rep3 rep3 rep3 rep3 rep3 rep3 rep3 {$undef rep3} // 10 = 2,000 ‘max’es.
end)();} rep2 rep2 rep2 rep2 rep2 rep2 rep2 rep2 rep2 rep2 {$undef rep2} // × 10 = 20,000 ‘max’es.
end)();} rep1 rep1 rep1 rep1 rep1 rep1 rep1 rep1 rep1 rep1 {$undef rep1} // × 10 = 200,000 ‘max’es.
end.
takes 45 seconds to compile as is (so 0.2 ms/max) and 4.5 seconds with {$define faster}, which changes max to vector’s (static) function.