Skip to content

[AArch64] B (unconditional jump) -> RET peephole optimisation

Summary

This merge request adds a new peephole optimisation to the AArch64 compiler, converting unconditional jumps to RET instructions if they jump to a label where the next instruction is RET, thus shortcutting a jump for an immediate exit.

System

  • Operating system: Linux (Raspberry Pi OS) and others
  • Processor architecture: AArch64
  • Device: Raspberry Pi and others

What is the current bug behavior?

N/A

What is the behavior after applying this patch?

Some jumps to the end of simple subroutines (no epilogue) are converted into RET instructions

Relevant logs and/or screenshots

For a very simple case in aoptutils (-O4) - before:

.section .text.n_aoptutils_$$_matchoptype$taicpu$toptype$$boolean,"ax"
	.balign 8
.globl	AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN
	.type	AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN,@function
AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN:
.Lc2:
	uxtb	w1,w1
	ldrb	w2,[x0, #115]
	cmp	w2,#1
	b.ne	.Lj6
	ldr	x0,[x0, #64]
	ldrb	w0,[x0, #4]
	cmp	w0,w1
	cset	w0,eq
	b	.Lj8
.Lj6:
	mov	w0,wzr
.Lj8:
	ret
.Lc1:
.Le0:
	.size	AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN, .Le0 - AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN

After (label .Lj8 gets dereferenced and removed):

.section .text.n_aoptutils_$$_matchoptype$taicpu$toptype$$boolean,"ax"
	.balign 8
.globl	AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN
	.type	AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN,@function
AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN:
.Lc2:
	uxtb	w1,w1
	ldrb	w2,[x0, #115]
	cmp	w2,#1
	b.ne	.Lj6
	ldr	x0,[x0, #64]
	ldrb	w0,[x0, #4]
	cmp	w0,w1
	cset	w0,eq
	ret
.Lj6:
	mov	w0,wzr
	ret
.Lc1:
.Le0:
	.size	AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN, .Le0 - AOPTUTILS_$$_MATCHOPTYPE$TAICPU$TOPTYPE$$BOOLEAN

In bufstream - before:

.section .text.n_bufstream$_$tbufferedfilestream_$__$$_setposition$int64,"ax"
	.balign 8
.globl	BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64
	.type	BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64,@function
BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64:
.Lc106:
	cmp	x1,#0
	b.ge	.Lj324
	str	xzr,[x0, #40]
	b	.Lj325
.Lj324:
	str	x1,[x0, #40]
.Lj325:
	ret
.Lc105:
.Le28:
	.size	BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64, .Le28 - BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64

After (this one is a potential new extension for the CSEL optimisations too in the future):

.section .text.n_bufstream$_$tbufferedfilestream_$__$$_setposition$int64,"ax"
	.balign 8
.globl	BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64
	.type	BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64,@function
BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64:
.Lc106:
	cmp	x1,#0
	b.ge	.Lj324
	str	xzr,[x0, #40]
	ret
.Lj324:
	str	x1,[x0, #40]
	ret
.Lc105:
.Le28:
	.size	BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64, .Le28 - BUFSTREAM$_$TBUFFEREDFILESTREAM_$__$$_SETPOSITION$INT64

Merge request reports