|
| 1 | +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| 2 | +# |
| 3 | +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +# See https://llvm.org/LICENSE.txt for license information. |
| 5 | +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +# |
| 7 | +# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates |
| 8 | + |
| 9 | +# RUN: llc -mtriple=aie2ps -verify-machineinstrs --start-before=greedy --stop-after=virtregrewriter %s -o - | FileCheck %s |
| 10 | + |
| 11 | +# Register-pressure scenario where an ACC1024 value is built in BB0 and |
| 12 | +# consumed across a tight self-loop in BB1, while many VEC512 broadcasts |
| 13 | +# are also live across the loop edge. Without a combined acc/vec spill |
| 14 | +# class, the allocator must spill at least one of these 512-bit values |
| 15 | +# to the stack. The aie2p target solves this with the |
| 16 | +# spill_vec512_to_composite class; this test mirrors that scenario for |
| 17 | +# aie2ps so that enabling the same combined class can be measured by a |
| 18 | +# CHECK-line diff (memory spill -> cross-bank allocation). |
| 19 | + |
| 20 | +--- |
| 21 | +name: spill-vec-acc |
| 22 | +alignment: 16 |
| 23 | +legalized: true |
| 24 | +tracksRegLiveness: true |
| 25 | +body: | |
| 26 | + ; CHECK-LABEL: name: spill-vec-acc |
| 27 | + ; CHECK: bb.0: |
| 28 | + ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| 29 | + ; CHECK-NEXT: {{ $}} |
| 30 | + ; CHECK-NEXT: renamable $r16 = MOV_RLC_imm11_pseudo 0 |
| 31 | + ; CHECK-NEXT: renamable $x5 = VBCST_32 renamable $r16 |
| 32 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19312 |
| 33 | + ; CHECK-NEXT: renamable $x0 = VBCST_16 killed renamable $r0 |
| 34 | + ; CHECK-NEXT: VST_X_SPILL killed renamable $x0, %stack.0, implicit $sp :: (store (s512) into %stack.0) |
| 35 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19360 |
| 36 | + ; CHECK-NEXT: renamable $x2 = VBCST_16 killed renamable $r0 |
| 37 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19424 |
| 38 | + ; CHECK-NEXT: renamable $x4 = VBCST_16 killed renamable $r0 |
| 39 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 32704 |
| 40 | + ; CHECK-NEXT: renamable $x6 = VBCST_16 killed renamable $r0 |
| 41 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 16256 |
| 42 | + ; CHECK-NEXT: renamable $x8 = VBCST_16 killed renamable $r0 |
| 43 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46208 |
| 44 | + ; CHECK-NEXT: renamable $x10 = VBCST_16 killed renamable $r0 |
| 45 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46144 |
| 46 | + ; CHECK-NEXT: renamable $x1 = VBCST_16 killed renamable $r0 |
| 47 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46080 |
| 48 | + ; CHECK-NEXT: renamable $x3 = VBCST_16 killed renamable $r0 |
| 49 | + ; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 45952 |
| 50 | + ; CHECK-NEXT: renamable $bmll0 = COPY killed renamable $x5 |
| 51 | + ; CHECK-NEXT: renamable $x5 = VBCST_16 killed renamable $r0 |
| 52 | + ; CHECK-NEXT: renamable $r0 = MOV_RLC_imm11_pseudo 1 |
| 53 | + ; CHECK-NEXT: renamable $x7 = VBCST_16 renamable $r16 |
| 54 | + ; CHECK-NEXT: renamable $bmlh0 = COPY renamable $bmll0 |
| 55 | + ; CHECK-NEXT: renamable $x9 = VBCST_16 killed renamable $r0 |
| 56 | + ; CHECK-NEXT: {{ $}} |
| 57 | + ; CHECK-NEXT: bb.1: |
| 58 | + ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| 59 | + ; CHECK-NEXT: liveins: $cml0:0x000000000000000C, $r16, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 |
| 60 | + ; CHECK-NEXT: {{ $}} |
| 61 | + ; CHECK-NEXT: renamable $x11 = VCONV_bf16_fp32_mv_conv_mv_x_srs_bf16 renamable $cml0, implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crfpconvsat, implicit $crrnd |
| 62 | + ; CHECK-NEXT: renamable $x0 = VLDA_X_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0) |
| 63 | + ; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, killed renamable $x0, implicit $crbf8conf, implicit $crfp8conf |
| 64 | + ; CHECK-NEXT: renamable $x0 = VSEL_16 renamable $x7, renamable $x10, killed renamable $r18, implicit $crbf8conf, implicit $crfp8conf |
| 65 | + ; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, renamable $x2, implicit $crbf8conf, implicit $crfp8conf |
| 66 | + ; CHECK-NEXT: renamable $r20 = VGE_bf16 renamable $x11, renamable $x4, implicit $crbf8conf, implicit $crfp8conf |
| 67 | + ; CHECK-NEXT: renamable $r22 = VGE_bf16 renamable $x11, renamable $x6, implicit $crbf8conf, implicit $crfp8conf |
| 68 | + ; CHECK-NEXT: renamable $r24 = VGE_bf16 killed renamable $x11, renamable $x8, implicit $crbf8conf, implicit $crfp8conf |
| 69 | + ; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x1, killed renamable $r18, implicit $crbf8conf, implicit $crfp8conf |
| 70 | + ; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x3, killed renamable $r20, implicit $crbf8conf, implicit $crfp8conf |
| 71 | + ; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x5, killed renamable $r22, implicit $crbf8conf, implicit $crfp8conf |
| 72 | + ; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x7, killed renamable $r24, implicit $crbf8conf, implicit $crfp8conf |
| 73 | + ; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x9, renamable $r16, implicit $crbf8conf, implicit $crfp8conf |
| 74 | + ; CHECK-NEXT: renamable $r8 = MOV_RLC_imm11_pseudo 0 |
| 75 | + ; CHECK-NEXT: dead $dm1 = VMUL_f_vmul_bf_vmul_bf_core_X_X $x7, killed $x0, killed $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask |
| 76 | + ; CHECK-NEXT: PseudoJ_jump_imm %bb.1 |
| 77 | + bb.0: |
| 78 | + successors: %bb.1(0x80000000) |
| 79 | +
|
| 80 | + %36:mrs16m = MOV_RLC_imm11_pseudo 0 |
| 81 | + %40:vec512 = VBCST_32 %36 |
| 82 | + %58:mrm = MOVXM_lng_cg -19312 |
| 83 | + %4:vec512 = VBCST_16 %58 |
| 84 | + %57:mrm = MOVXM_lng_cg -19360 |
| 85 | + %7:vec512 = VBCST_16 %57 |
| 86 | + %56:mrm = MOVXM_lng_cg -19424 |
| 87 | + %10:vec512 = VBCST_16 %56 |
| 88 | + %55:mrm = MOVXM_lng_cg 32704 |
| 89 | + %13:vec512 = VBCST_16 %55 |
| 90 | + %54:mrm = MOVXM_lng_cg 16256 |
| 91 | + %16:vec512 = VBCST_16 %54 |
| 92 | + %47:mrm = MOVXM_lng_cg 46208 |
| 93 | + %21:vec512 = VBCST_16 %47 |
| 94 | + %48:mrm = MOVXM_lng_cg 46144 |
| 95 | + %24:vec512 = VBCST_16 %48 |
| 96 | + %49:mrm = MOVXM_lng_cg 46080 |
| 97 | + %27:vec512 = VBCST_16 %49 |
| 98 | + %50:mrm = MOVXM_lng_cg 45952 |
| 99 | + undef %1.sub_512_acc_lo:acc1024 = COPY %40 |
| 100 | + %30:vec512 = VBCST_16 %50 |
| 101 | + %51:mrm = MOV_RLC_imm11_pseudo 1 |
| 102 | + %19:vec512 = VBCST_16 %36 |
| 103 | + %1.sub_512_acc_hi:acc1024 = COPY %1.sub_512_acc_lo |
| 104 | + %34:vec512 = VBCST_16 %51 |
| 105 | +
|
| 106 | + bb.1: |
| 107 | + successors: %bb.1(0x80000000) |
| 108 | +
|
| 109 | + %0:vec512 = VCONV_bf16_fp32_mv_conv_mv_x_srs_bf16 %1, implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crfpconvsat, implicit $crrnd |
| 110 | + %3:mrs16m = VGE_bf16 %0, %4, implicit $crbf8conf, implicit $crfp8conf |
| 111 | + %18:vec512 = VSEL_16 %19, %21, %3, implicit $crbf8conf, implicit $crfp8conf |
| 112 | + %6:mrs16m = VGE_bf16 %0, %7, implicit $crbf8conf, implicit $crfp8conf |
| 113 | + %9:mrs16m = VGE_bf16 %0, %10, implicit $crbf8conf, implicit $crfp8conf |
| 114 | + %12:mrs16m = VGE_bf16 %0, %13, implicit $crbf8conf, implicit $crfp8conf |
| 115 | + %15:mrs16m = VGE_bf16 %0, %16, implicit $crbf8conf, implicit $crfp8conf |
| 116 | + %23:vec512 = VSEL_16 %18, %24, %6, implicit $crbf8conf, implicit $crfp8conf |
| 117 | + %26:vec512 = VSEL_16 %23, %27, %9, implicit $crbf8conf, implicit $crfp8conf |
| 118 | + %29:vec512 = VSEL_16 %26, %30, %12, implicit $crbf8conf, implicit $crfp8conf |
| 119 | + %32:vec512 = VSEL_16 %29, %19, %15, implicit $crbf8conf, implicit $crfp8conf |
| 120 | + %33:vec512 = VSEL_16 %32, %34, %36, implicit $crbf8conf, implicit $crfp8conf |
| 121 | + %62:mrv8 = MOV_RLC_imm11_pseudo 0 |
| 122 | + dead %61:edm = VMUL_f_vmul_bf_vmul_bf_core_X_X %19, %33, %62, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask |
| 123 | + PseudoJ_jump_imm %bb.1 |
| 124 | +
|
| 125 | +... |
0 commit comments