-
-
Notifications
You must be signed in to change notification settings - Fork 14.1k
Open
Labels
A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchCategory: An issue highlighting optimization opportunities or PRs implementing suchT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.Relevant to the compiler team, which will review and decide on the PR/issue.
Description
It seems LLVM cannot recognize that the order of the variants matches the order of the tuple fields, so it emits a lot of jumps for code that could be branch-less:
Playground. Assembly generated with 1.88.0-nightly (2025-04-07 e643f59):
f0: # @f0
# %bb.0:
pushq %rbx
movq %rdi, %rax
testq %rdx, %rdx
je .LBB0_2
# %bb.1:
xorl %ecx, %ecx
movzbl (%rsi,%rcx), %r8d
leaq .LJTI0_0(%rip), %rdi
movslq (%rdi,%r8,4), %rbx
addq %rdi, %rbx
xorl %r8d, %r8d
xorl %r9d, %r9d
xorl %r10d, %r10d
xorl %r11d, %r11d
jmpq *%rbx
.LBB0_2:
xorps %xmm0, %xmm0
movups %xmm0, (%rax)
popq %rbx
retq
.LBB0_3:
movzbl (%rsi,%rcx), %ebx
movslq (%rdi,%rbx,4), %rbx
addq %rdi, %rbx
jmpq *%rbx
.LBB0_4:
incl %r11d
incq %rcx
cmpq %rcx, %rdx
jne .LBB0_3
jmp .LBB0_9
.LBB0_7:
incl %r8d
incq %rcx
cmpq %rcx, %rdx
jne .LBB0_3
jmp .LBB0_9
.LBB0_5:
incl %r10d
incq %rcx
cmpq %rcx, %rdx
jne .LBB0_3
jmp .LBB0_9
.LBB0_6:
incl %r9d
incq %rcx
cmpq %rcx, %rdx
jne .LBB0_3
.LBB0_9:
movl %r11d, (%rax)
movl %r10d, 4(%rax)
movl %r9d, 8(%rax)
movl %r8d, 12(%rax)
popq %rbx
retq
.LJTI0_0:
.long .LBB0_4-.LJTI0_0
.long .LBB0_5-.LJTI0_0
.long .LBB0_6-.LJTI0_0
.long .LBB0_7-.LJTI0_0
.long .LBB0_4-.LJTI0_0
# -- End function
f1: # @f1
# %bb.0:
movq %rdi, %rax
xorps %xmm0, %xmm0
testq %rdx, %rdx
je .LBB1_4
# %bb.1:
movaps %xmm0, -40(%rsp)
xorl %ecx, %ecx
.LBB1_2: # =>This Inner Loop Header: Depth=1
movaps -40(%rsp), %xmm0
movaps %xmm0, -24(%rsp)
movq %rsi, -8(%rsp)
movzbl (%rsi), %edi
cmpb $4, %dil
cmovel %ecx, %edi
movzbl %dil, %edi
incl -24(%rsp,%rdi,4)
movaps -24(%rsp), %xmm0
movaps %xmm0, -40(%rsp)
incq %rsi
decq %rdx
jne .LBB1_2
# %bb.3:
movaps -40(%rsp), %xmm0
.LBB1_4:
movups %xmm0, (%rax)
retq
# -- End functionf1 is just f0, but rewritten such that the numeric value of the variants is used as an index to an array. Despite the fact that f1 uses indexing, the optimizer successfully eliminated the bounds-check anyways.
@rustbot label: +I-slow
Metadata
Metadata
Assignees
Labels
A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchCategory: An issue highlighting optimization opportunities or PRs implementing suchT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.Relevant to the compiler team, which will review and decide on the PR/issue.