Skip to content

matching on ordered enum variants emits too many branches #139702

@Rudxain

Description

@Rudxain

It seems LLVM cannot recognize that the order of the variants matches the order of the tuple fields, so it emits a lot of jumps for code that could be branch-less:

Playground. Assembly generated with 1.88.0-nightly (2025-04-07 e643f59):

f0:                                     # @f0
# %bb.0:
	pushq	%rbx
	movq	%rdi, %rax
	testq	%rdx, %rdx
	je	.LBB0_2
# %bb.1:
	xorl	%ecx, %ecx
	movzbl	(%rsi,%rcx), %r8d
	leaq	.LJTI0_0(%rip), %rdi
	movslq	(%rdi,%r8,4), %rbx
	addq	%rdi, %rbx
	xorl	%r8d, %r8d
	xorl	%r9d, %r9d
	xorl	%r10d, %r10d
	xorl	%r11d, %r11d
	jmpq	*%rbx

.LBB0_2:
	xorps	%xmm0, %xmm0
	movups	%xmm0, (%rax)
	popq	%rbx
	retq

.LBB0_3:
	movzbl	(%rsi,%rcx), %ebx
	movslq	(%rdi,%rbx,4), %rbx
	addq	%rdi, %rbx
	jmpq	*%rbx

.LBB0_4:
	incl	%r11d
	incq	%rcx
	cmpq	%rcx, %rdx
	jne	.LBB0_3
	jmp	.LBB0_9

.LBB0_7:
	incl	%r8d
	incq	%rcx
	cmpq	%rcx, %rdx
	jne	.LBB0_3
	jmp	.LBB0_9

.LBB0_5:
	incl	%r10d
	incq	%rcx
	cmpq	%rcx, %rdx
	jne	.LBB0_3
	jmp	.LBB0_9

.LBB0_6:
	incl	%r9d
	incq	%rcx
	cmpq	%rcx, %rdx
	jne	.LBB0_3

.LBB0_9:
	movl	%r11d, (%rax)
	movl	%r10d, 4(%rax)
	movl	%r9d, 8(%rax)
	movl	%r8d, 12(%rax)
	popq	%rbx
	retq

.LJTI0_0:
	.long	.LBB0_4-.LJTI0_0
	.long	.LBB0_5-.LJTI0_0
	.long	.LBB0_6-.LJTI0_0
	.long	.LBB0_7-.LJTI0_0
	.long	.LBB0_4-.LJTI0_0
                                        # -- End function

f1:                                     # @f1
# %bb.0:
	movq	%rdi, %rax
	xorps	%xmm0, %xmm0
	testq	%rdx, %rdx
	je	.LBB1_4
# %bb.1:
	movaps	%xmm0, -40(%rsp)
	xorl	%ecx, %ecx

.LBB1_2:                                # =>This Inner Loop Header: Depth=1
	movaps	-40(%rsp), %xmm0
	movaps	%xmm0, -24(%rsp)
	movq	%rsi, -8(%rsp)
	movzbl	(%rsi), %edi
	cmpb	$4, %dil
	cmovel	%ecx, %edi
	movzbl	%dil, %edi
	incl	-24(%rsp,%rdi,4)
	movaps	-24(%rsp), %xmm0
	movaps	%xmm0, -40(%rsp)
	incq	%rsi
	decq	%rdx
	jne	.LBB1_2
# %bb.3:
	movaps	-40(%rsp), %xmm0

.LBB1_4:
	movups	%xmm0, (%rax)
	retq
                                        # -- End function

f1 is just f0, but rewritten such that the numeric value of the variants is used as an index to an array. Despite the fact that f1 uses indexing, the optimizer successfully eliminated the bounds-check anyways.

@rustbot label: +I-slow

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions