Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ static void expandCopyBundles(
SmallSet<Register, 8> RegistersToRepair;
for (auto [VReg, SubRegs] : ExpandableRegs) {

for (MachineInstr &MI : MRI.def_instructions(VReg)) {
for (MachineInstr &MI : MRI.reg_instructions(VReg)) {

// Finding the last instruction in a COPY/KILL bundle (which has a
// predecessor but no successor).
Expand Down
67 changes: 34 additions & 33 deletions llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-cycle-in-bundle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,31 @@ define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0
; FINE-GRAINED-NEXT: mova p2, #0; st dj0, [sp, #-64]; or r13, r2, r2; mov r14, r3 // 4-byte Folded Spill Delay Slot 3
; FINE-GRAINED-NEXT: mova p0, #0; st dj0, [sp, #-60]; or r15, r4, r4; mov r9, r5 // 4-byte Folded Spill Delay Slot 2
; FINE-GRAINED-NEXT: mova p1, #0; or r10, r6, r6; mov r11, r7 // Delay Slot 1
; FINE-GRAINED-NEXT: movs dn3, r10; mov dj3, r15
; FINE-GRAINED-NEXT: mova dn1, #0; movs m3, r14; mov dj7, r9
; FINE-GRAINED-NEXT: vlda x2, [sp, #-192]; movs dn7, r11; mov dj1, #1 // 64-byte Folded Reload
; FINE-GRAINED-NEXT: vlda x3, [sp, #-128]; movs m4, dj1; mov r3, dn1 // 64-byte Folded Reload
; FINE-GRAINED-NEXT: mova dc0, #0; movs dc2, dn1; mov r4, dn1
; FINE-GRAINED-NEXT: lda r22, [sp, #-64]; movs dc7, dn1; mov r20, dn1 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: lda r21, [sp, #-60]; movs dc3, dn1; mov r19, dn1 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: lda r0, [sp, #-56]; movs dc4, dj1; mov r5, dn1 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: mova m5, #0; movs dj4, dj1; mov r6, dj1
; FINE-GRAINED-NEXT: mova r7, #1; movs dj0, m5; movx r18, #0; vmov lfl0, x2
; FINE-GRAINED-NEXT: lda r1, [sp, #-52]; movs dn4, m5; and r16, r12, r7; vmov lfh0, x3 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: mova m5, #0; nopb ; nops ; nopx ; mov dc0, #0; nopv
; FINE-GRAINED-NEXT: vlda x2, [sp, #-192]; movs dn3, r10; nopx ; mov dj3, r15 // 64-byte Folded Reload
; FINE-GRAINED-NEXT: vlda x3, [sp, #-128]; movs dn7, r11; mov dj7, r9 // 64-byte Folded Reload
; FINE-GRAINED-NEXT: mova dj1, #1; movs m3, r14; mov dn1, #0
; FINE-GRAINED-NEXT: mova dj4, #1; movs m4, dj1; mov r3, dn1
; FINE-GRAINED-NEXT: lda r19, [sp, #-64]; movs dc2, dn1; mov r4, dn1 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: lda r20, [sp, #-60]; movs dc7, dn1; mov r22, dj1 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: lda r0, [sp, #-56]; movs dc3, dn1; mov r5, dn1 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: mova r7, #1; movs dc6, dn1; movx r18, #0; vmov lfl0, x2
; FINE-GRAINED-NEXT: lda r1, [sp, #-52]; movs dc5, dn1; and r16, r12, r7; vmov lfh0, x3 // 4-byte Folded Reload
; FINE-GRAINED-NEXT: .LBB0_1: // %for.body.i
; FINE-GRAINED-NEXT: // =>This Loop Header: Depth=1
; FINE-GRAINED-NEXT: // Child Loop BB0_2 Depth 2
; FINE-GRAINED-NEXT: nopa ; nopb ; nops ; nopx ; mov dn2, r3; nopv
; FINE-GRAINED-NEXT: movs dj2, p6; nopx ; mov dn6, r3
; FINE-GRAINED-NEXT: nopa ; nopb ; nopx ; mov dn2, r3; movs dj2, p6
; FINE-GRAINED-NEXT: movs dn6, r3; mov r17, dc6
; FINE-GRAINED-NEXT: movs dj6, p6; mov m2, m4
; FINE-GRAINED-NEXT: mova p1, #0; movs dc6, r4; mov r25, r18
; FINE-GRAINED-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d2]
; FINE-GRAINED-NEXT: mov m1, m5
; FINE-GRAINED-NEXT: movs dj1, m5; mov dn1, r3
; FINE-GRAINED-NEXT: nop
; FINE-GRAINED-NEXT: movs m1, m5; mov dn1, r3
; FINE-GRAINED-NEXT: movs dc1, dc0; vmov lfl1, lfl0
; FINE-GRAINED-NEXT: movs dn5, r3; vmov lfh1, lfh0
; FINE-GRAINED-NEXT: mova p0, #0; movs dj5, m5; mov dc5, r19
; FINE-GRAINED-NEXT: paddb.3d [p0], d1
; FINE-GRAINED-NEXT: mova p0, #0; mov r19, dc5
; FINE-GRAINED-NEXT: movs dj1, m5; vmov lfh1, lfh0
; FINE-GRAINED-NEXT: mova p0, #0; movs dn5, r3; mov dj5, m5
; FINE-GRAINED-NEXT: paddb.3d [p0], d1; or r6, r5, r5; mov r5, dj4
; FINE-GRAINED-NEXT: mova p0, #0; mov r21, dc5
; FINE-GRAINED-NEXT: .LBB0_2: // %for.body125.i
; FINE-GRAINED-NEXT: // Parent Loop BB0_1 Depth=1
; FINE-GRAINED-NEXT: // => This Inner Loop Header: Depth=2
Expand All @@ -70,21 +69,23 @@ define void @heavy_3d_user(i32 %dimsAI.sroa.5.0.copyload.i, i32 %dimsAI.sroa.7.0
; FINE-GRAINED-NEXT: nopa ; vldb.pop.576.3d ex4, [p1, lf1, r25, d2]; nops ; nopx ; vmov lfh1, x3; nopv
; FINE-GRAINED-NEXT: // %bb.3: // %for.cond.cleanup124.i
; FINE-GRAINED-NEXT: // in Loop: Header=BB0_1 Depth=1
; FINE-GRAINED-NEXT: nopa ; nopb ; nops ; nopx ; mov m0, m5; nopv
; FINE-GRAINED-NEXT: movs dn0, m5; nopx ; mov m1, m3
; FINE-GRAINED-NEXT: movs dn1, dn3; mov dj1, dj3
; FINE-GRAINED-NEXT: mova p0, #0; movs dn5, dn7; mov dj5, dj7
; FINE-GRAINED-NEXT: movs dc0, r5; paddb.3d [p0], d3; mov dj7, r21
; FINE-GRAINED-NEXT: movs dj3, r22; mov dn3, m5
; FINE-GRAINED-NEXT: movs m3, m5; mov dn7, m5
; FINE-GRAINED-NEXT: movs dc1, dc3; xor r17, r12, r7; mov dc5, dc7
; FINE-GRAINED-NEXT: movs dc3, r20; and r17, r17, r7; mov dc7, dc4
; FINE-GRAINED-NEXT: mova p1, #0; movs dc4, m5; jnz r17, #.LBB0_1
; FINE-GRAINED-NEXT: nopa ; nopb ; nops ; nopx ; mov dn1, dn3; nopv
; FINE-GRAINED-NEXT: movs m1, m3; nopx ; mov dn5, dn7
; FINE-GRAINED-NEXT: movs dc0, r6; mov dj1, dj3
; FINE-GRAINED-NEXT: movs m0, m5; mov dj5, dj7
; FINE-GRAINED-NEXT: movs dn0, m5; mov dj4, r5
; FINE-GRAINED-NEXT: mova p0, #0; movs dn4, m5; mov dj0, m5
; FINE-GRAINED-NEXT: movs dc4, m5; paddb.3d [p0], d3; mov dj3, r19
; FINE-GRAINED-NEXT: movs dj7, r20; mov m3, m5
; FINE-GRAINED-NEXT: movs dn3, m5; mov dc5, dc7
; FINE-GRAINED-NEXT: movs dc1, dc3; xor r17, r12, r7; mov dc3, r17
; FINE-GRAINED-NEXT: movs dc7, r22; and r6, r17, r7; mov r22, m5
; FINE-GRAINED-NEXT: mova p1, #0; movs dn7, m5; jnz r6, #.LBB0_1
; FINE-GRAINED-NEXT: movs m3, m1; paddb.3d [p1], d3; mov dn3, dn1 // Delay Slot 5
; FINE-GRAINED-NEXT: mova p0, #0; movs dj3, dj1; mov dn7, dn5 // Delay Slot 4
; FINE-GRAINED-NEXT: movs dj7, dj5; paddb.3d [p0], d0; mov r20, dc3 // Delay Slot 3
; FINE-GRAINED-NEXT: movs dc4, m5; mov dc3, dc1 // Delay Slot 2
; FINE-GRAINED-NEXT: mova dc0, #0; movs dc7, dc5; mov r5, dc0 // Delay Slot 1
; FINE-GRAINED-NEXT: movs dj3, dj1; mov dn7, dn5 // Delay Slot 4
; FINE-GRAINED-NEXT: mova p0, #0; movs dj7, dj5; mov dc6, dc3 // Delay Slot 3
; FINE-GRAINED-NEXT: movs dc3, dc1; paddb.3d [p0], d0; mov dc7, dc5 // Delay Slot 2
; FINE-GRAINED-NEXT: mova dc0, #0; movs dc5, r21; mov r5, dc0 // Delay Slot 1
; FINE-GRAINED-NEXT: // %bb.4: // %ret.exit
; FINE-GRAINED-NEXT: lda p6, [sp, #-12] // 4-byte Folded Reload
; FINE-GRAINED-NEXT: lda r15, [sp, #-16] // 4-byte Folded Reload
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates

# RUN: llc -O2 -mtriple=aie2p -verify-machineinstrs -start-before=greedy \
# RUN: -stop-after=aie-unallocated-superreg-rewrite %s -o - | FileCheck %s
Expand Down Expand Up @@ -60,17 +60,15 @@ body: |
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub_avail:epsrfldf = COPY [[MOV_RLC_imm11_pseudo]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:spill_edn_to_er = COPY [[COPY4]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:spill_edn_to_er = COPY [[COPY5]]
; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY15]].sub_lo_dim {
; CHECK-NEXT: internal [[COPY19]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY15]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: internal [[COPY19]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY15]].sub_hi_dim_then_sub_dim_size
; CHECK-NEXT: internal [[COPY19]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY15]].sub_hi_dim_then_sub_dim_stride
; CHECK-NEXT: }
; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY15]].sub_lo_dim
; CHECK-NEXT: [[COPY19:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY15]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: [[COPY19:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY15]].sub_hi_dim_then_sub_dim_size
; CHECK-NEXT: [[COPY19:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY15]].sub_hi_dim_then_sub_dim_stride
; CHECK-NEXT: dead [[VLD_POP_576_3D_pseudo_split:%[0-9]+]]:vec576, dead [[COPY16:%[0-9]+]].sub_ptr:epsrfldf, dead [[COPY16:%[0-9]+]].sub_fifo:epsrfldf, dead [[COPY16:%[0-9]+]].sub_avail:epsrfldf, [[COPY19:%[0-9]+]].sub_dim_count:eds, [[COPY19:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_POP_576_3D_pseudo_split [[COPY16]].sub_ptr, [[COPY16]].sub_fifo, [[COPY16]].sub_avail, [[COPY19]].sub_mod, [[COPY19]].sub_dim_size, [[COPY19]].sub_dim_stride, [[COPY19]].sub_dim_count, undef [[COPY19]].sub_hi_dim_then_sub_mod, [[COPY19]].sub_hi_dim_then_sub_dim_size, [[COPY19]].sub_hi_dim_then_sub_dim_stride, [[COPY19]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_uf :: (load unknown-size from `ptr addrspace(5) null`, align 1, addrspace 5)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:spill_edc_to_er = COPY [[COPY19]].sub_dim_count
; CHECK-NEXT: [[COPY21:%[0-9]+]]:spill_edc_to_er = COPY [[COPY19]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ednl = COPY [[COPY17]] {
; CHECK-NEXT: internal [[COPY5]]:ednh = COPY [[COPY18]]
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ednl = COPY [[COPY17]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:ednh = COPY [[COPY18]]
; CHECK-NEXT: [[COPY22:%[0-9]+]]:em_as_32bit = COPY [[MOV_PD_imm11_pseudo]]
; CHECK-NEXT: [[COPY23:%[0-9]+]]:edjl = COPY [[COPY]]
; CHECK-NEXT: [[COPY24:%[0-9]+]]:edcl = COPY [[COPY20]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,21 +88,19 @@ body: |
; CHECK-NEXT: [[COPY34:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY33]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: [[COPY34:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY33]].sub_hi_dim_then_sub_dim_size
; CHECK-NEXT: [[COPY34:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY33]].sub_hi_dim_then_sub_dim_stride
; CHECK-NEXT: undef [[COPY35:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY34]].sub_lo_dim {
; CHECK-NEXT: internal [[COPY35]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY34]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: internal [[COPY35]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY34]].sub_hi_dim_then_sub_dim_size
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you know why we now drop internal?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are unbundling those copies, so they are not internal anymore.

; CHECK-NEXT: internal [[COPY35]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY34]].sub_hi_dim_then_sub_dim_stride
; CHECK-NEXT: }
; CHECK-NEXT: undef [[COPY35:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY34]].sub_lo_dim
; CHECK-NEXT: [[COPY35:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY34]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: [[COPY35:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY34]].sub_hi_dim_then_sub_dim_size
; CHECK-NEXT: [[COPY35:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY34]].sub_hi_dim_then_sub_dim_stride
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo2:%[0-9]+]]:ep, [[COPY35:%[0-9]+]].sub_dim_count:eds, [[COPY35:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo2]], [[COPY35]].sub_mod, [[COPY35]].sub_dim_size, [[COPY35]].sub_dim_stride, [[COPY35]].sub_dim_count, undef [[COPY35]].sub_hi_dim_then_sub_mod, [[COPY35]].sub_hi_dim_then_sub_dim_size, [[COPY35]].sub_hi_dim_then_sub_dim_stride, [[COPY35]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: [[COPY36:%[0-9]+]]:spill_edc_to_er = COPY [[COPY35]].sub_dim_count
; CHECK-NEXT: [[COPY37:%[0-9]+]]:spill_edn_to_er = COPY [[COPY35]].sub_dim_size
; CHECK-NEXT: [[COPY38:%[0-9]+]]:spill_edn_to_er = COPY [[COPY35]].sub_hi_dim_then_sub_dim_size
; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: undef [[COPY39:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY17]].sub_lo_dim {
; CHECK-NEXT: internal [[COPY39]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY17]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: internal [[COPY39]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY17]].sub_hi_dim_then_sub_dim_size
; CHECK-NEXT: internal [[COPY39]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY17]].sub_hi_dim_then_sub_dim_stride
; CHECK-NEXT: }
; CHECK-NEXT: undef [[COPY39:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY17]].sub_lo_dim
; CHECK-NEXT: [[COPY39:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY17]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: [[COPY39:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY17]].sub_hi_dim_then_sub_dim_size
; CHECK-NEXT: [[COPY39:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY17]].sub_hi_dim_then_sub_dim_stride
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, [[COPY39:%[0-9]+]].sub_dim_count:eds, [[COPY39:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY39]].sub_mod, [[COPY39]].sub_dim_size, [[COPY39]].sub_dim_stride, [[COPY39]].sub_dim_count, undef [[COPY39]].sub_hi_dim_then_sub_mod, [[COPY39]].sub_hi_dim_then_sub_dim_size, [[COPY39]].sub_hi_dim_then_sub_dim_stride, [[COPY39]].sub_hi_dim_then_sub_dim_count
; CHECK-NEXT: [[COPY40:%[0-9]+]]:spill_edc_to_er = COPY [[COPY39]].sub_dim_count
; CHECK-NEXT: [[COPY14:%[0-9]+]]:edcl = COPY [[COPY16]]
Expand Down
Loading