Skip to content

[MCP] Move dependencies if they block copy propagation #105562

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
288 changes: 264 additions & 24 deletions llvm/lib/CodeGen/MachineCopyPropagation.cpp

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,10 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) #0 {
; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_from_load:
; CHECK-OUTLINE-O1: ; %bb.0:
; CHECK-OUTLINE-O1-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT: ldr w8, [x2]
; CHECK-OUTLINE-O1-NEXT: mov x3, x0
; CHECK-OUTLINE-O1-NEXT: mov w0, w1
; CHECK-OUTLINE-O1-NEXT: ldr w1, [x2]
; CHECK-OUTLINE-O1-NEXT: mov x2, x3
; CHECK-OUTLINE-O1-NEXT: mov w1, w8
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_cas4_acq
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT: ret
Expand Down
196 changes: 98 additions & 98 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,8 @@ define dso_local i32 @load_between_stores(i32 %x, ptr %p, ptr %ptr) {
; CHECK: ; %bb.0:
; CHECK-NEXT: strh w0, [x1]
; CHECK-NEXT: lsr w9, w0, #16
; CHECK-NEXT: ldr w8, [x2]
; CHECK-NEXT: ldr w0, [x2]
; CHECK-NEXT: strh w9, [x1, #2]
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%t1 = trunc i32 %x to i16
%sh = lshr i32 %x, 16
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/aarch64-mulv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -510,10 +510,9 @@ define i128 @mulv_v2i128(<2 x i128> %a) {
; CHECK-GI-LABEL: mulv_v2i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mul x9, x0, x3
; CHECK-GI-NEXT: mul x8, x0, x2
; CHECK-GI-NEXT: umulh x10, x0, x2
; CHECK-GI-NEXT: madd x9, x1, x2, x9
; CHECK-GI-NEXT: mov x0, x8
; CHECK-GI-NEXT: mul x0, x0, x2
; CHECK-GI-NEXT: add x1, x9, x10
; CHECK-GI-NEXT: ret
entry:
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,12 @@ entry:
define <16 x i32> @mla_i32(<16 x i8> %a, <16 x i8> %b, <16 x i32> %c) {
; CHECK-SD-LABEL: mla_i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: umull2 v7.8h, v0.16b, v1.16b
; CHECK-SD-NEXT: umull v6.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: uaddw2 v5.4s, v5.4s, v7.8h
; CHECK-SD-NEXT: umull2 v7.8h, v0.16b, v1.16b
; CHECK-SD-NEXT: uaddw v0.4s, v2.4s, v6.4h
; CHECK-SD-NEXT: uaddw2 v1.4s, v3.4s, v6.8h
; CHECK-SD-NEXT: uaddw2 v3.4s, v5.4s, v7.8h
; CHECK-SD-NEXT: uaddw v2.4s, v4.4s, v7.4h
; CHECK-SD-NEXT: mov v3.16b, v5.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: mla_i32:
Expand Down Expand Up @@ -170,18 +169,17 @@ define <16 x i64> @mla_i64(<16 x i8> %a, <16 x i8> %b, <16 x i64> %c) {
; CHECK-SD-NEXT: umull2 v0.8h, v0.16b, v1.16b
; CHECK-SD-NEXT: ldp q20, q21, [sp]
; CHECK-SD-NEXT: ushll v17.4s, v16.4h, #0
; CHECK-SD-NEXT: ushll v18.4s, v0.4h, #0
; CHECK-SD-NEXT: ushll2 v16.4s, v16.8h, #0
; CHECK-SD-NEXT: ushll2 v19.4s, v0.8h, #0
; CHECK-SD-NEXT: ushll v18.4s, v0.4h, #0
; CHECK-SD-NEXT: uaddw2 v1.2d, v3.2d, v17.4s
; CHECK-SD-NEXT: uaddw v0.2d, v2.2d, v17.2s
; CHECK-SD-NEXT: uaddw2 v3.2d, v5.2d, v16.4s
; CHECK-SD-NEXT: uaddw v2.2d, v4.2d, v16.2s
; CHECK-SD-NEXT: uaddw2 v16.2d, v21.2d, v19.4s
; CHECK-SD-NEXT: uaddw v4.2d, v6.2d, v18.2s
; CHECK-SD-NEXT: uaddw2 v5.2d, v7.2d, v18.4s
; CHECK-SD-NEXT: uaddw2 v7.2d, v21.2d, v19.4s
; CHECK-SD-NEXT: uaddw v6.2d, v20.2d, v19.2s
; CHECK-SD-NEXT: mov v7.16b, v16.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: mla_i64:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/addp-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@ define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: addp v2.2d, v2.2d, v3.2d
; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d
; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: addp v1.2d, v2.2d, v3.2d
; CHECK-NEXT: ret
%r0 = shufflevector <8 x i64> %a, <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%r1 = shufflevector <8 x i64> %a, <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
Expand Down Expand Up @@ -123,9 +122,8 @@ define <8 x half> @deinterleave_shuffle_v16f16(<16 x half> %a) {
define <4 x double> @deinterleave_shuffle_v8f64(<8 x double> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: faddp v2.2d, v2.2d, v3.2d
; CHECK-NEXT: faddp v0.2d, v0.2d, v1.2d
; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: faddp v1.2d, v2.2d, v3.2d
; CHECK-NEXT: ret
%r0 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%r1 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
Expand Down
201 changes: 201 additions & 0 deletions llvm/test/CodeGen/AArch64/anti-dependencies-mcp.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=aarch64 -run-pass machine-cp -verify-machineinstrs -o - %s | FileCheck %s
--- |
source_filename = "llvmirrepoW.ll"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"

declare dso_local i32 @chain(i32 noundef, i32 noundef) local_unnamed_addr

declare dso_local void @init_var(ptr noundef) local_unnamed_addr

define dso_local void @fun2(i64 %a, i64 %b) local_unnamed_addr {
entry:
%c = alloca i32, align 4
ret void
}
define dso_local void @blocker(i64 %a, i64 %b) local_unnamed_addr {
entry:
%c = alloca i32, align 4
ret void
}

...
---
name: fun2
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: true
registers: []
liveins:
- { reg: '$x0', virtual-reg: '' }
- { reg: '$x1', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
stackProtector: ''
functionContext: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
isCalleeSavedInfoValid: false
localFrameSize: 4
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: c, type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: -4, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $x0, $x1

; CHECK-LABEL: name: fun2
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $w0 = KILL killed renamable $w0, implicit $x0
; CHECK-NEXT: $w1 = KILL killed renamable $w1, implicit $x1
; CHECK-NEXT: BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: renamable $w1 = COPY killed $w0
; CHECK-NEXT: $w0 = LDRWui %stack.0.c, 0 :: (dereferenceable load (s32) from %ir.c)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp, implicit-def dead $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: RET_ReallyLR
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
$w0 = KILL renamable $w0, implicit killed $x0
$w1 = KILL renamable $w1, implicit killed $x1
BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def $sp, implicit-def $w0
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
renamable $w8 = LDRWui %stack.0.c, 0 :: (dereferenceable load (s32) from %ir.c)
renamable $w1 = COPY $w0
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
$w0 = COPY killed renamable $w8
BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def $sp, implicit-def dead $w0
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
RET_ReallyLR

...

---
name: blocker
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: true
registers: []
liveins:
- { reg: '$x0', virtual-reg: '' }
- { reg: '$x1', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
stackProtector: ''
functionContext: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
isCalleeSavedInfoValid: false
localFrameSize: 4
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: c, type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: -4, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $x0, $x1

; CHECK-LABEL: name: blocker
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $w0 = KILL killed renamable $w0, implicit $x0
; CHECK-NEXT: $w1 = KILL killed renamable $w1, implicit $x1
; CHECK-NEXT: BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: renamable $w8 = LDRWui %stack.0.c, 0 :: (dereferenceable load (s32) from %ir.c)
; CHECK-NEXT: renamable $w1 = COPY $w0
; CHECK-NEXT: $w0 = ADDWrr killed $w0, $w0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp, implicit-def dead $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: RET_ReallyLR
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
$w0 = KILL renamable $w0, implicit killed $x0
$w1 = KILL renamable $w1, implicit killed $x1
BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def $sp, implicit-def $w0
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
renamable $w8 = LDRWui %stack.0.c, 0 :: (dereferenceable load (s32) from %ir.c)
renamable $w8 = ADDWrr $w0, $w0
renamable $w1 = COPY $w0
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
$w0 = COPY killed renamable $w8
BL @chain, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def $sp, implicit-def dead $w0
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
RET_ReallyLR

...
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ define i56 @ldi56(ptr %p) nounwind {
define i80 @ldi80(ptr %p) nounwind {
; CHECK-LABEL: ldi80:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ldrh w1, [x0, #8]
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ldr x0, [x0]
; CHECK-NEXT: ret
%r = load i80, ptr %p
ret i80 %r
Expand All @@ -55,10 +54,10 @@ define i280 @ldi280(ptr %p) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldrb w9, [x0, #34]
; CHECK-NEXT: ldrh w10, [x0, #32]
; CHECK-NEXT: ldp x8, x1, [x0]
; CHECK-NEXT: ldp x2, x3, [x0, #16]
; CHECK-NEXT: ldr x3, [x0, #24]
; CHECK-NEXT: ldp x1, x2, [x0, #8]
; CHECK-NEXT: ldr x0, [x0]
; CHECK-NEXT: orr x4, x10, x9, lsl #16
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
%r = load i280, ptr %p
ret i280 %r
Expand Down
Loading
Loading