Skip to content

[PHIElimination] Reuse existing COPY in predecessor basic block #131837

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/PHIElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,15 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
continue;
}

// Reuse an existing copy in the block if possible.
if (MachineInstr *DefMI = MRI->getUniqueVRegDef(SrcReg)) {
if (DefMI->isCopy() && DefMI->getParent() == &opBlock &&
MRI->use_empty(SrcReg)) {
DefMI->getOperand(0).setReg(IncomingReg);
continue;
}
}

// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
MachineBasicBlock::iterator InsertPos =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
; -O0: bl __aarch64_cas16_relax
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -131,8 +131,8 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
; -O0: bl __aarch64_cas16_relax
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -144,8 +144,8 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
; -O0: bl __aarch64_cas16_rel
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -157,8 +157,8 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
; -O0: bl __aarch64_cas16_acq_rel
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
; -O0: subs x9, x0, x9
; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {

define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)

define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
; -O0: ldaxp x10, x12, [x9]
; -O0: ldaxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {

define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stxp w8, x14, x15, [x9]
; -O0: stxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stxp w12, x14, x15, [x13]
; -O0: stxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr

define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
; -O0: ldxp x10, x12, [x9]
; -O0: ldxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
Expand All @@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)

define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
; -O0: ldaxp x10, x12, [x9]
; -O0: ldaxp x8, x10, [x13]
; -O0: cmp x8, x9
; -O0: cmp x10, x11
; -O0: cmp x12, x13
; -O0: stlxp w8, x14, x15, [x9]
; -O0: stlxp w8, x10, x12, [x9]
; -O0: subs x12, x12, x13
; -O0: ccmp x10, x11, #0, eq
; -O0: stlxp w12, x14, x15, [x13]
; -O0: stlxp w12, x8, x10, [x13]
; -O0: subs x10, x10, x11
; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ body: |
bb.1:
%x:gpr32 = COPY $wzr
; Test that the debug location is not copied into bb1!
; CHECK: %3:gpr32 = COPY killed %x{{$}}
; CHECK: %3:gpr32 = COPY $wzr
; CHECK-LABEL: bb.2:
bb.2:
%y:gpr32 = PHI %x:gpr32, %bb.1, undef %undef:gpr32, %bb.0, debug-location !14
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/AArch64/PHIElimination-reuse-copy.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -run-pass=phi-node-elimination -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s

# Verify that the original COPY in bb.1 is reappropriated as the PHI source in bb.2,
# instead of creating a new COPY with the same source register.

---
name: copy_virtual_reg
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: copy_virtual_reg
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $nzcv, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:gpr32 = COPY $w0
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK-NEXT: Bcc 8, %bb.2, implicit $nzcv
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = COPY %a
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: %c:gpr32 = COPY [[DEF]]
; CHECK-NEXT: %d:gpr32 = COPY %c
bb.0:
liveins: $nzcv, $w0
%a:gpr32 = COPY $w0
Bcc 8, %bb.2, implicit $nzcv
bb.1:
%b:gpr32 = COPY %a:gpr32
bb.2:
%c:gpr32 = PHI %b:gpr32, %bb.1, undef %undef:gpr32, %bb.0
%d:gpr32 = COPY %c:gpr32
...

---
name: copy_physical_reg
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: copy_physical_reg
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $nzcv, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK-NEXT: Bcc 8, %bb.2, implicit $nzcv
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x0 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: %b:gpr32 = COPY [[DEF]]
bb.0:
liveins: $nzcv, $w0
Bcc 8, %bb.2, implicit $nzcv
bb.1:
$x0 = IMPLICIT_DEF
%a:gpr32 = COPY $w0
bb.2:
%b:gpr32 = PHI %a:gpr32, %bb.1, undef %undef:gpr32, %bb.0
...

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -587,8 +587,8 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A,
; CHECK-SD-NEXT: mov w10, w2
; CHECK-SD-NEXT: b.hi .LBB5_4
; CHECK-SD-NEXT: // %bb.2:
; CHECK-SD-NEXT: mov x11, xzr
; CHECK-SD-NEXT: mov w8, wzr
; CHECK-SD-NEXT: mov x11, xzr
; CHECK-SD-NEXT: b .LBB5_7
; CHECK-SD-NEXT: .LBB5_3:
; CHECK-SD-NEXT: mov w8, wzr
Expand Down
Loading
Loading