Skip to content

Simplification to div/mod nodes (remove comparison against -1)

Summary

This merge request removes the comparison against -1 when processing div/mod nodes, which ultimately causes slowdown due to the additional comparison and conditional branch when the divisor in question is comparatively uncommon. The issue of downsizing, where min_int div -1 must return min_int does not seem to trigger any more.

An additional commit that's a pure refactor also makes an internal comparison in tmoddivnode.simplify more efficient.

System

  • Processor architecture: All (but fully tested on i386 and x86_64)

What is the current bug behavior?

N/A

What is the behavior after applying this patch?

Expressions that integer divide (including mod) by a variable should now run faster and compile into smaller code. x86_64-win64 and i386-win32 have been tested and found to have no regressions.

Relevant logs and/or screenshots

A simple example in the ZipUtils unit (x86_64-win64 -O4) - before:

.section .text.n_ziputils_$$_fread$pointer$longint$longint$fileptr$$longint,"ax"
	...
	cmpl	$-1,%ebx
	jne	.Lj24
	movl	32(%rsp),%ecx
	negl	%ecx
	movl	%ecx,%eax
	jmp	.Lj27
	.p2align 4,,10
	.p2align 3
.Lj24:
	movl	32(%rsp),%eax
	cltd
	idivl	%ebx
	jmp	.Lj27
	...
.Lj27:
	...

After:

.section .text.n_ziputils_$$_fread$pointer$longint$longint$fileptr$$longint,"ax"
	...
	movl	32(%rsp),%eax
	cltd
	idivl	%ebx
	jmp	.Lj24
	...
.Lj24:
	...

A long-winded example in the Graph unit - before:

.Lj14:
	...
	movq	-32(%rbp),%rax
	movswl	(%rax),%ecx
	movq	-16(%rbp),%rdx
	movswl	(%rdx),%eax
	movq	-24(%rbp),%rdx
	movswl	(%rdx),%r8d
	subl	%r8d,%eax
	cmpl	$-1,%eax
	jne	.Lj22
	movq	-8(%rbp),%rax
	movswl	(%rax),%r9d
	movq	-32(%rbp),%rdx
	movswl	(%rdx),%eax
	subl	%eax,%r9d
	movswl	48(%rbp),%eax
	movq	-24(%rbp),%r8
	movswl	(%r8),%edx
	subl	%edx,%eax
	imull	%eax,%r9d
	movl	%r9d,%r8d
	negl	%r8d
	jmp	.Lj23
	.p2align 4,,10
	.p2align 3
.Lj22:```
	movq	-8(%rbp),%rdx
	movswl	(%rdx),%eax
	movq	-32(%rbp),%r9
	movswl	(%r9),%edx
	subl	%edx,%eax
	movswl	48(%rbp),%r9d
	movq	-24(%rbp),%r10
	movswl	(%r10),%edx
	subl	%edx,%r9d
	imull	%r9d,%eax
	movq	-16(%rbp),%r10
	movswl	(%r10),%r9d
	subl	%edx,%r9d
	cltd
	idivl	%r9d
	movl	%eax,%r8d
.Lj23:
	leal	(%ecx,%r8d),%eax
	movw	%ax,%r12w
	movw	48(%rbp),%r13w
	jmp	.Lj24
	.p2align 4,,10
	.p2align 3
.Lj20:
	...

After

.Lj14:
	...
	movq	-16(%rbp),%rdx
	movswl	(%rdx),%ecx
	movq	-32(%rbp),%rdx
	movswl	(%rdx),%eax
	subl	%ecx,%eax
	movswl	48(%rbp),%r9d
	movq	-8(%rbp),%r8
	movswl	(%r8),%edx
	subl	%edx,%r9d
	imull	%r9d,%eax
	movq	-24(%rbp),%r9
	movswl	(%r9),%r8d
	subl	%edx,%r8d
	cltd
	idivl	%r8d
	addl	%ecx,%eax
	movw	%ax,%r12w
	movw	48(%rbp),%r13w
	jmp	.Lj21 ; <-- Same destination as .Lj24 above.
	.p2align 4,,10
	.p2align 3
.Lj20:
	...
Edited by J. Gareth "Kit" Moreton

Merge request reports

Loading