Skip to content

Commit 7fccccf

Browse files
committed
[AIE2PS] Add RA-level test for 512-bit acc/vec spilling
Mirror llvm/test/CodeGen/AIE/aie2p/ra/spill-vec-acc.mir for aie2ps. The test runs the full register-allocation pipeline (greedy + virtregrewriter) on a kernel that builds an acc1024 in BB0, broadcasts many vec512 values, and consumes them across a tight self-loop in BB1. Without a combined acc/vec spill class the allocator must spill at least one 512-bit value to the stack; the captured CHECK lines show the resulting VST_X_SPILL / VLDA_X_SPILL traffic against %stack.0. This baseline establishes the pre-feature behavior so the upcoming combined spill class change can be measured by a CHECK-line diff (memory spill -> cross-bank allocation).
1 parent d504624 commit 7fccccf

1 file changed

Lines changed: 125 additions & 0 deletions

File tree

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
8+
9+
# RUN: llc -mtriple=aie2ps -verify-machineinstrs --start-before=greedy --stop-after=virtregrewriter %s -o - | FileCheck %s
10+
11+
# Register-pressure scenario where an ACC1024 value is built in BB0 and
12+
# consumed across a tight self-loop in BB1, while many VEC512 broadcasts
13+
# are also live across the loop edge. Without a combined acc/vec spill
14+
# class, the allocator must spill at least one of these 512-bit values
15+
# to the stack. The aie2p target solves this with the
16+
# spill_vec512_to_composite class; this test mirrors that scenario for
17+
# aie2ps so that enabling the same combined class can be measured by a
18+
# CHECK-line diff (memory spill -> cross-bank allocation).
19+
20+
---
21+
name: spill-vec-acc
22+
alignment: 16
23+
legalized: true
24+
tracksRegLiveness: true
25+
body: |
26+
; CHECK-LABEL: name: spill-vec-acc
27+
; CHECK: bb.0:
28+
; CHECK-NEXT: successors: %bb.1(0x80000000)
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: renamable $r16 = MOV_RLC_imm11_pseudo 0
31+
; CHECK-NEXT: renamable $x5 = VBCST_32 renamable $r16
32+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19312
33+
; CHECK-NEXT: renamable $x0 = VBCST_16 killed renamable $r0
34+
; CHECK-NEXT: VST_X_SPILL killed renamable $x0, %stack.0, implicit $sp :: (store (s512) into %stack.0)
35+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19360
36+
; CHECK-NEXT: renamable $x2 = VBCST_16 killed renamable $r0
37+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19424
38+
; CHECK-NEXT: renamable $x4 = VBCST_16 killed renamable $r0
39+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 32704
40+
; CHECK-NEXT: renamable $x6 = VBCST_16 killed renamable $r0
41+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 16256
42+
; CHECK-NEXT: renamable $x8 = VBCST_16 killed renamable $r0
43+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46208
44+
; CHECK-NEXT: renamable $x10 = VBCST_16 killed renamable $r0
45+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46144
46+
; CHECK-NEXT: renamable $x1 = VBCST_16 killed renamable $r0
47+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46080
48+
; CHECK-NEXT: renamable $x3 = VBCST_16 killed renamable $r0
49+
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 45952
50+
; CHECK-NEXT: renamable $bmll0 = COPY killed renamable $x5
51+
; CHECK-NEXT: renamable $x5 = VBCST_16 killed renamable $r0
52+
; CHECK-NEXT: renamable $r0 = MOV_RLC_imm11_pseudo 1
53+
; CHECK-NEXT: renamable $x7 = VBCST_16 renamable $r16
54+
; CHECK-NEXT: renamable $bmlh0 = COPY renamable $bmll0
55+
; CHECK-NEXT: renamable $x9 = VBCST_16 killed renamable $r0
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.1:
58+
; CHECK-NEXT: successors: %bb.1(0x80000000)
59+
; CHECK-NEXT: liveins: $cml0:0x000000000000000C, $r16, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
60+
; CHECK-NEXT: {{ $}}
61+
; CHECK-NEXT: renamable $x11 = VCONV_bf16_fp32_mv_conv_mv_x_srs_bf16 renamable $cml0, implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crfpconvsat, implicit $crrnd
62+
; CHECK-NEXT: renamable $x0 = VLDA_X_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0)
63+
; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, killed renamable $x0, implicit $crbf8conf, implicit $crfp8conf
64+
; CHECK-NEXT: renamable $x0 = VSEL_16 renamable $x7, renamable $x10, killed renamable $r18, implicit $crbf8conf, implicit $crfp8conf
65+
; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, renamable $x2, implicit $crbf8conf, implicit $crfp8conf
66+
; CHECK-NEXT: renamable $r20 = VGE_bf16 renamable $x11, renamable $x4, implicit $crbf8conf, implicit $crfp8conf
67+
; CHECK-NEXT: renamable $r22 = VGE_bf16 renamable $x11, renamable $x6, implicit $crbf8conf, implicit $crfp8conf
68+
; CHECK-NEXT: renamable $r24 = VGE_bf16 killed renamable $x11, renamable $x8, implicit $crbf8conf, implicit $crfp8conf
69+
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x1, killed renamable $r18, implicit $crbf8conf, implicit $crfp8conf
70+
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x3, killed renamable $r20, implicit $crbf8conf, implicit $crfp8conf
71+
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x5, killed renamable $r22, implicit $crbf8conf, implicit $crfp8conf
72+
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x7, killed renamable $r24, implicit $crbf8conf, implicit $crfp8conf
73+
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x9, renamable $r16, implicit $crbf8conf, implicit $crfp8conf
74+
; CHECK-NEXT: renamable $r8 = MOV_RLC_imm11_pseudo 0
75+
; CHECK-NEXT: dead $dm1 = VMUL_f_vmul_bf_vmul_bf_core_X_X $x7, killed $x0, killed $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask
76+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
77+
bb.0:
78+
successors: %bb.1(0x80000000)
79+
80+
%36:mrs16m = MOV_RLC_imm11_pseudo 0
81+
%40:vec512 = VBCST_32 %36
82+
%58:mrm = MOVXM_lng_cg -19312
83+
%4:vec512 = VBCST_16 %58
84+
%57:mrm = MOVXM_lng_cg -19360
85+
%7:vec512 = VBCST_16 %57
86+
%56:mrm = MOVXM_lng_cg -19424
87+
%10:vec512 = VBCST_16 %56
88+
%55:mrm = MOVXM_lng_cg 32704
89+
%13:vec512 = VBCST_16 %55
90+
%54:mrm = MOVXM_lng_cg 16256
91+
%16:vec512 = VBCST_16 %54
92+
%47:mrm = MOVXM_lng_cg 46208
93+
%21:vec512 = VBCST_16 %47
94+
%48:mrm = MOVXM_lng_cg 46144
95+
%24:vec512 = VBCST_16 %48
96+
%49:mrm = MOVXM_lng_cg 46080
97+
%27:vec512 = VBCST_16 %49
98+
%50:mrm = MOVXM_lng_cg 45952
99+
undef %1.sub_512_acc_lo:acc1024 = COPY %40
100+
%30:vec512 = VBCST_16 %50
101+
%51:mrm = MOV_RLC_imm11_pseudo 1
102+
%19:vec512 = VBCST_16 %36
103+
%1.sub_512_acc_hi:acc1024 = COPY %1.sub_512_acc_lo
104+
%34:vec512 = VBCST_16 %51
105+
106+
bb.1:
107+
successors: %bb.1(0x80000000)
108+
109+
%0:vec512 = VCONV_bf16_fp32_mv_conv_mv_x_srs_bf16 %1, implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crfpconvsat, implicit $crrnd
110+
%3:mrs16m = VGE_bf16 %0, %4, implicit $crbf8conf, implicit $crfp8conf
111+
%18:vec512 = VSEL_16 %19, %21, %3, implicit $crbf8conf, implicit $crfp8conf
112+
%6:mrs16m = VGE_bf16 %0, %7, implicit $crbf8conf, implicit $crfp8conf
113+
%9:mrs16m = VGE_bf16 %0, %10, implicit $crbf8conf, implicit $crfp8conf
114+
%12:mrs16m = VGE_bf16 %0, %13, implicit $crbf8conf, implicit $crfp8conf
115+
%15:mrs16m = VGE_bf16 %0, %16, implicit $crbf8conf, implicit $crfp8conf
116+
%23:vec512 = VSEL_16 %18, %24, %6, implicit $crbf8conf, implicit $crfp8conf
117+
%26:vec512 = VSEL_16 %23, %27, %9, implicit $crbf8conf, implicit $crfp8conf
118+
%29:vec512 = VSEL_16 %26, %30, %12, implicit $crbf8conf, implicit $crfp8conf
119+
%32:vec512 = VSEL_16 %29, %19, %15, implicit $crbf8conf, implicit $crfp8conf
120+
%33:vec512 = VSEL_16 %32, %34, %36, implicit $crbf8conf, implicit $crfp8conf
121+
%62:mrv8 = MOV_RLC_imm11_pseudo 0
122+
dead %61:edm = VMUL_f_vmul_bf_vmul_bf_core_X_X %19, %33, %62, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask
123+
PseudoJ_jump_imm %bb.1
124+
125+
...

0 commit comments

Comments
 (0)