From acf57b502b6b93fd4f7c6fd8af0205c7498ba4e1 Mon Sep 17 00:00:00 2001 From: Kateryna Muts Date: Tue, 26 May 2026 04:30:11 -0600 Subject: [PATCH 1/2] [AIE][NFC] Add precommit MIR tests for combine_lag_ptr_add (AIE2PS, AIE2P) Adds baseline MIR tests for the lag-ptr-add pointer combine covering 1D, 2D, and 3D POSTINC_LOAD/STORE variants plus a coalesce test for both AIE2PS and AIE2P targets. The coalesce test has three run lines: - Stop after the custom combiner: shows the G_PTR_ADD rewrite in virtual regs. - Stop after the register coalescer: shows %pre/%post merged into the same vreg. - Stop after virtregrewriter: shows $p0 used directly, back-edge copy eliminated. --- .../global-combiners/lag-ptr-add.mir | 429 ++++++++++++++++++ .../global-combiners/lag-reg-add-coalesce.mir | 136 ++++++ .../global-combiners/lag-ptr-add.mir | 429 ++++++++++++++++++ .../global-combiners/lag-reg-add-coalesce.mir | 136 ++++++ 4 files changed, 1130 insertions(+) create mode 100644 llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir create mode 100644 llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir create mode 100644 llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir create mode 100644 llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir new file mode 100644 index 000000000000..a2a80996d384 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir @@ -0,0 +1,429 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc -mtriple aie2p --start-before=aie-postlegalizer-generic-combiner --stop-after=aie-postlegalizer-custom-combiner -verify-machineinstrs -o - %s | FileCheck %s + +# Coverage for combine_lag_ptr_add. The rule rewrites G_PTR_ADD of a +# pre-increment pointer-register value in terms of the corresponding +# POSTINC's post-increment def. matchLagPtrAdd uses +# CombinerHelper::dominates so it works both same-BB and cross-BB; it +# also bails out when the root's def feeds a real (non-side-effecting) +# load/store so the PointerModifierCombiner cache stays consistent. + +# --------------------------------------------------------------------- +# Cross-BB, pointer path. +# +# At loop exit, %pre and %post satisfy %post = %pre + step, so +# G_PTR_ADD %pre, C == COPY %post when C == step +# G_PTR_ADD %pre, C == G_PTR_ADD %post, C-step when C != step +# --------------------------------------------------------------------- + +# 1D POSTINC_LOAD, C == step (32 == 32) -> rewrite to COPY %post. +--- +name: ptr-postinc-load-1d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-1d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 1D POSTINC_LOAD, C > step (96 > 32) -> rewrite to G_PTR_ADD %post, 64. +--- +name: ptr-postinc-load-1d-c-gt-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-1d-c-gt-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %offset:_(s20) = G_CONSTANT i20 96 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %offset(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %offset:_(s20) = G_CONSTANT i20 96 + %adjusted:_(p0) = G_PTR_ADD %pre, %offset(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 1D POSTINC_STORE (post-ptr at op 0): C == step -> COPY %post. +--- +name: ptr-postinc-store-1d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-store-1d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %post:_(p0) = G_AIE_POSTINC_STORE [[COPY1]](s32), %pre, [[C2]](s20) :: (store (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %post:_(p0) = G_AIE_POSTINC_STORE %1(s32), %pre(p0), %4(s20) :: (store (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 2D POSTINC_LOAD (PostPtrDef=1, PrePtrUse=3, ModifierUse=4), +# C == step (16 == 16) -> COPY %post. +--- +name: ptr-postinc-load-2d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-2d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %ctr:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctrn(s20), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_LOAD %pre, [[C2]], [[C2]], [[C2]], %ctr :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 16 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %ctr:_(s20) = G_PHI %4(s20), %bb.0, %ctrn(s20), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_LOAD %pre(p0), %4(s20), %4(s20), %4(s20), %ctr(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 16 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 2D POSTINC_STORE (PostPtrDef=0, PrePtrUse=3, ModifierUse=4), +# C == step (16 == 16) -> COPY %post. +--- +name: ptr-postinc-store-2d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-store-2d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %ctr:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctrn(s20), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_STORE [[COPY1]](s32), %pre, [[C2]], [[C2]], [[C2]], %ctr :: (store (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 16 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %ctr:_(s20) = G_PHI %4(s20), %bb.0, %ctrn(s20), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_STORE %1(s32), %pre(p0), %4(s20), %4(s20), %4(s20), %ctr(s20) :: (store (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 16 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 3D POSTINC_LOAD (PostPtrDef=1, PrePtrUse=4, ModifierUse=5), +# C == step (16 == 16) -> COPY %post. +--- +name: ptr-postinc-load-3d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-3d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %ctr1:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctr1n(s20), %bb.1 + ; CHECK-NEXT: %ctr2:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctr2n(s20), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0), %ctr1n:_(s20), %ctr2n:_ = G_AIE_POSTINC_3D_LOAD %pre, [[C2]], [[C2]], [[C2]], %ctr1, [[C2]], [[C2]], %ctr2 :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 16 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %ctr1:_(s20) = G_PHI %4(s20), %bb.0, %ctr1n(s20), %bb.1 + %ctr2:_(s20) = G_PHI %4(s20), %bb.0, %ctr2n(s20), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0), %ctr1n:_(s20), %ctr2n:_(s20) = G_AIE_POSTINC_3D_LOAD %pre(p0), %4(s20), %4(s20), %4(s20), %ctr1(s20), %4(s20), %4(s20), %ctr2(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 16 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# No-fire: the G_PTR_ADD's result feeds a G_LOAD — bail out so the +# PointerModifier combiner can handle it instead. +--- +name: ptr-postinc-load-no-fire-feeds-load +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-no-fire-feeds-load + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: %loaded:_(s32) = G_AIE_OFFSET_LOAD %pre(p0), %step(s20) :: (load (s32)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %loaded(s32) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + %loaded:_(s32) = G_LOAD %adjusted(p0) :: (load (s32)) + PseudoRET implicit $lr, implicit %loaded(s32) +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir new file mode 100644 index 000000000000..c0ca9e52cbed --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir @@ -0,0 +1,136 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc --mtriple=aie2p \ +# RUN: --start-before=aie-postlegalizer-generic-combiner \ +# RUN: --stop-after=aie-postlegalizer-custom-combiner \ +# RUN: --aie-global-ptr-mod-opt=true \ +# RUN: -o - %s | FileCheck %s --check-prefix=CHECK-COMBINER +# RUN: llc --mtriple=aie2p -verify-machineinstrs \ +# RUN: --start-before=aie-postlegalizer-generic-combiner \ +# RUN: --stop-after=register-coalescer \ +# RUN: --aie-global-ptr-mod-opt=true \ +# RUN: -o - %s | FileCheck %s --check-prefix=CHECK-COALESCER +# RUN: llc --mtriple=aie2p -verify-machineinstrs \ +# RUN: --start-before=aie-postlegalizer-generic-combiner \ +# RUN: --stop-after=virtregrewriter \ +# RUN: --aie-global-ptr-mod-opt=true \ +# RUN: -o - %s | FileCheck %s + +# Downstream-effect coverage for combine_lag_ptr_add. +# +# lag-ptr-add.mir pins the IR-level rewrite (G_PTR_ADD %pre, C => COPY %post +# when C == step). This file pins the resulting consequence: with the rewrite +# in place, the lag-PHI %pre and the in-loop POSTINC's %post become the same +# value, so register coalescing assigns them the same physical register and +# the back-edge COPY between two p-class regs disappears. +--- +name: ptr-postinc-coalesce +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-COMBINER-LABEL: name: ptr-postinc-coalesce + ; CHECK-COMBINER: bb.0: + ; CHECK-COMBINER-NEXT: successors: %bb.1(0x80000000) + ; CHECK-COMBINER-NEXT: liveins: $p0, $r0 + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-COMBINER-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-COMBINER-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-COMBINER-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-COMBINER-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: bb.1: + ; CHECK-COMBINER-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-COMBINER-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-COMBINER-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-COMBINER-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-COMBINER-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-COMBINER-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-COMBINER-NEXT: G_BR %bb.2 + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: bb.2: + ; CHECK-COMBINER-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-COMBINER-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-COMBINER-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + ; + ; CHECK-COALESCER-LABEL: name: ptr-postinc-coalesce + ; CHECK-COALESCER: bb.0: + ; CHECK-COALESCER-NEXT: successors: %bb.1(0x80000000) + ; CHECK-COALESCER-NEXT: liveins: $p0, $r0 + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: [[COPY:%[0-9]+]]:ep_as_32bit = COPY $p0 + ; CHECK-COALESCER-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-COALESCER-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 + ; CHECK-COALESCER-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: bb.1: + ; CHECK-COALESCER-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = COPY [[COPY]] + ; CHECK-COALESCER-NEXT: dead %val:er, [[COPY]]:ep_as_32bit = LDA_dms_lda_pstm_nrm [[COPY]], [[MOV_PD_imm11_pseudo]] :: (load (s32)) + ; CHECK-COALESCER-NEXT: [[COPY1:%[0-9]+]]:er = ADD_add_r_ri [[COPY1]], -1, implicit-def dead $srcarry + ; CHECK-COALESCER-NEXT: %cond:er = LT [[MOV_RLC_imm11_pseudo]], [[COPY1]] + ; CHECK-COALESCER-NEXT: PseudoJNZ %cond, %bb.1 + ; CHECK-COALESCER-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: bb.2: + ; CHECK-COALESCER-NEXT: %step:em = MOV_PD_imm11_pseudo 32 + ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = PADD_mod_pseudo %adjusted, %step + ; CHECK-COALESCER-NEXT: PseudoRET implicit $lr, implicit %adjusted + ; + ; CHECK-LABEL: name: ptr-postinc-coalesce + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r1 = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: renamable $m0 = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $m0, $p0, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $p1 = COPY renamable $p0 + ; CHECK-NEXT: dead renamable $r2, renamable $p0 = LDA_dms_lda_pstm_nrm killed renamable $p0, renamable $m0 :: (load (s32)) + ; CHECK-NEXT: renamable $r0 = ADD_add_r_ri killed renamable $r0, -1, implicit-def dead $srcarry + ; CHECK-NEXT: renamable $r2 = LT renamable $r1, renamable $r0 + ; CHECK-NEXT: PseudoJNZ killed renamable $r2, %bb.1 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $p1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $m0 = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: renamable $p1 = PADD_mod_pseudo killed renamable $p1, killed renamable $m0 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p1 + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... diff --git a/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir new file mode 100644 index 000000000000..8e00a80873b1 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir @@ -0,0 +1,429 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc -mtriple aie2ps --start-before=aie-postlegalizer-generic-combiner --stop-after=aie-postlegalizer-custom-combiner -verify-machineinstrs -o - %s | FileCheck %s + +# Coverage for combine_lag_ptr_add. The rule rewrites G_PTR_ADD of a +# pre-increment pointer-register value in terms of the corresponding +# POSTINC's post-increment def. matchLagPtrAdd uses +# CombinerHelper::dominates so it works both same-BB and cross-BB; it +# also bails out when the root's def feeds a real (non-side-effecting) +# load/store so the PointerModifierCombiner cache stays consistent. + +# --------------------------------------------------------------------- +# Cross-BB, pointer path. +# +# At loop exit, %pre and %post satisfy %post = %pre + step, so +# G_PTR_ADD %pre, C == COPY %post when C == step +# G_PTR_ADD %pre, C == G_PTR_ADD %post, C-step when C != step +# --------------------------------------------------------------------- + +# 1D POSTINC_LOAD, C == step (32 == 32) -> rewrite to COPY %post. +--- +name: ptr-postinc-load-1d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-1d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 1D POSTINC_LOAD, C > step (96 > 32) -> rewrite to G_PTR_ADD %post, 64. +--- +name: ptr-postinc-load-1d-c-gt-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-1d-c-gt-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %offset:_(s20) = G_CONSTANT i20 96 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %offset(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %offset:_(s20) = G_CONSTANT i20 96 + %adjusted:_(p0) = G_PTR_ADD %pre, %offset(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 1D POSTINC_STORE (post-ptr at op 0): C == step -> COPY %post. +--- +name: ptr-postinc-store-1d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-store-1d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %post:_(p0) = G_AIE_POSTINC_STORE [[COPY1]](s32), %pre, [[C2]](s20) :: (store (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %post:_(p0) = G_AIE_POSTINC_STORE %1(s32), %pre(p0), %4(s20) :: (store (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 2D POSTINC_LOAD (PostPtrDef=1, PrePtrUse=3, ModifierUse=4), +# C == step (16 == 16) -> COPY %post. +--- +name: ptr-postinc-load-2d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-2d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %ctr:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctrn(s20), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_LOAD %pre, [[C2]], [[C2]], [[C2]], %ctr :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 16 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %ctr:_(s20) = G_PHI %4(s20), %bb.0, %ctrn(s20), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_LOAD %pre(p0), %4(s20), %4(s20), %4(s20), %ctr(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 16 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 2D POSTINC_STORE (PostPtrDef=0, PrePtrUse=3, ModifierUse=4), +# C == step (16 == 16) -> COPY %post. +--- +name: ptr-postinc-store-2d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-store-2d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %ctr:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctrn(s20), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_STORE [[COPY1]](s32), %pre, [[C2]], [[C2]], [[C2]], %ctr :: (store (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 16 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %ctr:_(s20) = G_PHI %4(s20), %bb.0, %ctrn(s20), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %post:_(p0), %ctrn:_(s20) = G_AIE_POSTINC_2D_STORE %1(s32), %pre(p0), %4(s20), %4(s20), %4(s20), %ctr(s20) :: (store (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 16 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# 3D POSTINC_LOAD (PostPtrDef=1, PrePtrUse=4, ModifierUse=5), +# C == step (16 == 16) -> COPY %post. +--- +name: ptr-postinc-load-3d-c-eq-step +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-3d-c-eq-step + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %ctr1:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctr1n(s20), %bb.1 + ; CHECK-NEXT: %ctr2:_(s20) = G_PHI [[C2]](s20), %bb.0, %ctr2n(s20), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0), %ctr1n:_(s20), %ctr2n:_ = G_AIE_POSTINC_3D_LOAD %pre, [[C2]], [[C2]], [[C2]], %ctr1, [[C2]], [[C2]], %ctr2 :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 16 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %ctr1:_(s20) = G_PHI %4(s20), %bb.0, %ctr1n(s20), %bb.1 + %ctr2:_(s20) = G_PHI %4(s20), %bb.0, %ctr2n(s20), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0), %ctr1n:_(s20), %ctr2n:_(s20) = G_AIE_POSTINC_3D_LOAD %pre(p0), %4(s20), %4(s20), %4(s20), %ctr1(s20), %4(s20), %4(s20), %ctr2(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 16 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... + +# No-fire: the G_PTR_ADD's result feeds a G_LOAD — bail out so the +# PointerModifier combiner can handle it instead. +--- +name: ptr-postinc-load-no-fire-feeds-load +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ptr-postinc-load-no-fire-feeds-load + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: %loaded:_(s32) = G_AIE_OFFSET_LOAD %pre(p0), %step(s20) :: (load (s32)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit %loaded(s32) + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + %loaded:_(s32) = G_LOAD %adjusted(p0) :: (load (s32)) + PseudoRET implicit $lr, implicit %loaded(s32) +... diff --git a/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir new file mode 100644 index 000000000000..ac99c855f1dc --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir @@ -0,0 +1,136 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc --mtriple=aie2ps \ +# RUN: --start-before=aie-postlegalizer-generic-combiner \ +# RUN: --stop-after=aie-postlegalizer-custom-combiner \ +# RUN: --aie-global-ptr-mod-opt=true \ +# RUN: -o - %s | FileCheck %s --check-prefix=CHECK-COMBINER +# RUN: llc --mtriple=aie2ps -verify-machineinstrs \ +# RUN: --start-before=aie-postlegalizer-generic-combiner \ +# RUN: --stop-after=register-coalescer \ +# RUN: --aie-global-ptr-mod-opt=true \ +# RUN: -o - %s | FileCheck %s --check-prefix=CHECK-COALESCER +# RUN: llc --mtriple=aie2ps -verify-machineinstrs \ +# RUN: --start-before=aie-postlegalizer-generic-combiner \ +# RUN: --stop-after=virtregrewriter \ +# RUN: --aie-global-ptr-mod-opt=true \ +# RUN: -o - %s | FileCheck %s + +# Downstream-effect coverage for combine_lag_ptr_add. +# +# lag-ptr-add.mir pins the IR-level rewrite (G_PTR_ADD %pre, C => COPY %post +# when C == step). This file pins the resulting consequence: with the rewrite +# in place, the lag-PHI %pre and the in-loop POSTINC's %post become the same +# value, so register coalescing assigns them the same physical register and +# the back-edge COPY between two p-class regs disappears. +--- +name: ptr-postinc-coalesce +legalized: true +alignment: 16 +tracksRegLiveness: true +body: | + ; CHECK-COMBINER-LABEL: name: ptr-postinc-coalesce + ; CHECK-COMBINER: bb.0: + ; CHECK-COMBINER-NEXT: successors: %bb.1(0x80000000) + ; CHECK-COMBINER-NEXT: liveins: $p0, $r0 + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-COMBINER-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-COMBINER-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-COMBINER-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-COMBINER-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: bb.1: + ; CHECK-COMBINER-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: %pre:_(p0) = G_PHI [[COPY]](p0), %bb.0, %post(p0), %bb.1 + ; CHECK-COMBINER-NEXT: %i:_(s32) = G_PHI [[COPY1]](s32), %bb.0, %inext(s32), %bb.1 + ; CHECK-COMBINER-NEXT: %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre, [[C2]](s20) :: (load (s32)) + ; CHECK-COMBINER-NEXT: %inext:_(s32) = G_ADD %i, [[C1]] + ; CHECK-COMBINER-NEXT: %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), [[C]] + ; CHECK-COMBINER-NEXT: G_BRCOND %cond(s32), %bb.1 + ; CHECK-COMBINER-NEXT: G_BR %bb.2 + ; CHECK-COMBINER-NEXT: {{ $}} + ; CHECK-COMBINER-NEXT: bb.2: + ; CHECK-COMBINER-NEXT: %step:_(s20) = G_CONSTANT i20 32 + ; CHECK-COMBINER-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-COMBINER-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) + ; + ; CHECK-COALESCER-LABEL: name: ptr-postinc-coalesce + ; CHECK-COALESCER: bb.0: + ; CHECK-COALESCER-NEXT: successors: %bb.1(0x80000000) + ; CHECK-COALESCER-NEXT: liveins: $p0, $r0 + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: [[COPY:%[0-9]+]]:ep_as_32bit = COPY $p0 + ; CHECK-COALESCER-NEXT: [[COPY1:%[0-9]+]]:mlockid_reg = COPY $r0 + ; CHECK-COALESCER-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:mlockid_reg = MOV_RLC_imm11_pseudo 0 + ; CHECK-COALESCER-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: bb.1: + ; CHECK-COALESCER-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = COPY [[COPY]] + ; CHECK-COALESCER-NEXT: dead %val:er, [[COPY]]:ep_as_32bit = LDA_dms_lda_scalar_ld_pstm_nrm [[COPY]], [[MOV_PD_imm11_pseudo]] :: (load (s32)) + ; CHECK-COALESCER-NEXT: [[COPY1:%[0-9]+]]:mlockid_reg = ADD_add_r_ri [[COPY1]], -1, implicit-def dead $srcarry + ; CHECK-COALESCER-NEXT: %cond:mlockid_reg = LT [[MOV_RLC_imm11_pseudo]], [[COPY1]] + ; CHECK-COALESCER-NEXT: PseudoJNZ %cond, %bb.1 + ; CHECK-COALESCER-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-COALESCER-NEXT: {{ $}} + ; CHECK-COALESCER-NEXT: bb.2: + ; CHECK-COALESCER-NEXT: %step:em = MOV_PD_imm11_pseudo 32 + ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = PADD_mod_pseudo %adjusted, %step + ; CHECK-COALESCER-NEXT: PseudoRET implicit $lr, implicit %adjusted + ; + ; CHECK-LABEL: name: ptr-postinc-coalesce + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r2 = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: renamable $m0 = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $m0, $p0, $r0, $r2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $p1 = COPY renamable $p0 + ; CHECK-NEXT: dead renamable $r1, renamable $p0 = LDA_dms_lda_scalar_ld_pstm_nrm killed renamable $p0, renamable $m0 :: (load (s32)) + ; CHECK-NEXT: renamable $r0 = ADD_add_r_ri killed renamable $r0, -1, implicit-def dead $srcarry + ; CHECK-NEXT: renamable $r4 = LT renamable $r2, renamable $r0 + ; CHECK-NEXT: PseudoJNZ killed renamable $r4, %bb.1 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $p1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $m0 = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: renamable $p1 = PADD_mod_pseudo killed renamable $p1, killed renamable $m0 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p1 + bb.0: + liveins: $p0, $r0 + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s32) = G_CONSTANT i32 -1 + %4:_(s20) = G_CONSTANT i20 32 + + bb.1: + %pre:_(p0) = G_PHI %0(p0), %bb.0, %post(p0), %bb.1 + %i:_(s32) = G_PHI %1(s32), %bb.0, %inext(s32), %bb.1 + %val:_(s32), %post:_(p0) = G_AIE_POSTINC_LOAD %pre(p0), %4(s20) :: (load (s32)) + %inext:_(s32) = G_ADD %i, %3 + %cond:_(s32) = G_ICMP intpred(sgt), %inext(s32), %2 + G_BRCOND %cond(s32), %bb.1 + G_BR %bb.2 + + bb.2: + %step:_(s20) = G_CONSTANT i20 32 + %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + PseudoRET implicit $lr, implicit %adjusted(p0) +... From 0102d4a41fc9a109a37a86eba56768f05e9dc5f6 Mon Sep 17 00:00:00 2001 From: Kateryna Muts Date: Tue, 26 May 2026 04:31:16 -0600 Subject: [PATCH 2/2] [AIE] Add combine_lag_ptr_add to AIE2PS and AIE2P MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the lag-ptr-add pointer combine for AIE2PS and AIE2P targets. The combine rewrites a G_PTR_ADD of a loop pre-increment pointer in terms of the corresponding POSTINC instruction's post-increment def, avoiding a redundant add: - G_PTR_ADD %pre, C → COPY %post (when C == step) - G_PTR_ADD %pre, C → G_PTR_ADD %post, (C - step) (otherwise) To abstract over target-specific POSTINC operand layouts (1D, 2D, 3D) a virtual getPtrPostIncOpInfo() is added to AIEBaseInstrInfo, overridden in AIE2PInstrInfo and AIE2PSInstrInfo to return the PostPtrDef, PrePtrUse, and ModifierUse operand indices for each supported opcode. --- llvm/lib/Target/AIE/AIEBaseInstrInfo.h | 14 ++ llvm/lib/Target/AIE/AIECombine.td | 8 ++ llvm/lib/Target/AIE/AIECombinerHelper.cpp | 121 ++++++++++++++++++ llvm/lib/Target/AIE/AIECombinerHelper.h | 20 +++ llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 37 ++++++ llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h | 2 + .../lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp | 37 ++++++ llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h | 2 + .../global-combiners/lag-ptr-add.mir | 19 +-- .../global-combiners/lag-reg-add-coalesce.mir | 15 +-- .../global-combiners/lag-ptr-add.mir | 19 +-- .../global-combiners/lag-reg-add-coalesce.mir | 15 +-- 12 files changed, 263 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h index bd88e7759525..7f9fbbfd53e1 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h @@ -63,6 +63,14 @@ struct OperandRegInfo { : Reg(R), RC(RegClass) {} }; +/// Operand indices that describe a pointer-post-increment instruction's key +/// operands. All indices are absolute (defs precede uses in MachineInstr). +struct PtrPostIncOpInfo { + int PostPtrDefIdx; ///< Operand index of the post-increment pointer def. + int PrePtrUseIdx; ///< Operand index of the pre-increment pointer use. + int ModifierUseIdx; ///< Operand index of the s20 step/modifier use. +}; + // Structure representing a schedule class variant. struct SchedVariantInfo { unsigned SchedClass; @@ -344,6 +352,12 @@ struct AIEBaseInstrInfo : public TargetInstrInfo { virtual unsigned getGenericPostIncStoreOpcode() const { llvm_unreachable("Target didn't implement getGenericPostIncStoreOpcode!"); } + /// Map a POSTINC opcode to its key operand indices, or std::nullopt if the + /// opcode is not a pointer-post-increment instruction for this target. + virtual std::optional + getPtrPostIncOpInfo(unsigned Opcode) const { + return std::nullopt; + } /// Return the opcode to be used for subvector extraction. virtual unsigned getGenericExtractSubvectorOpcode() const { llvm_unreachable( diff --git a/llvm/lib/Target/AIE/AIECombine.td b/llvm/lib/Target/AIE/AIECombine.td index 734c5da12071..854c133388ba 100644 --- a/llvm/lib/Target/AIE/AIECombine.td +++ b/llvm/lib/Target/AIE/AIECombine.td @@ -371,6 +371,13 @@ def combine_postincloadstore_ptradd_with_trunc : GICombineRule< [{ return matchPostIncLoadStorePtrAddWithTrunc(*${root}, MRI, Helper, (const AIEBaseInstrInfo &)B.getTII(), Observer, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +def combine_lag_ptr_add_matchdata : GIDefMatchData<"LagPtrAddMatchInfo">; +def combine_lag_ptr_add : GICombineRule< + (defs root:$root, combine_lag_ptr_add_matchdata:$matchinfo), + (match (wip_match_opcode G_PTR_ADD):$root, + [{ return matchLagPtrAdd(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), Helper, ${matchinfo}); }]), + (apply [{ applyLagPtrAdd(*${root}, MRI, B, Observer, ${matchinfo}); }])>; + def combine_trunc_load : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (wip_match_opcode G_LOAD): $root, @@ -589,6 +596,7 @@ def aie2_postlegalizer_custom_combines : GICombineGroup<[ // Post-legalizer custom combines for AIE2P and more recent targets (AIE2PS, etc.). def aie2p_plus_postlegalizer_custom_shared_combines : GICombineGroup<[ + combine_lag_ptr_add, combine_extract_vector_assert_combine, combine_extract_broadcast_to_scalar, combine_vshift_chain_to_copy, diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 99e970746443..2a0a2232cd9b 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -1544,6 +1544,127 @@ bool llvm::matchChainedPtrAddWithNonConstOffsets(MachineInstr &MI, return true; } +// Prologue for combine_lag_ptr_add: +// * bail out when the root's def feeds a real (non-side-effecting) load/store +// * require a compile-time constant offset on operand 2 +// On success, returns {PreReg (pre-increment register, operand 1), +// C (constant offset)}; std::nullopt otherwise. +static std::optional> +getLagRootPreReg(const MachineInstr &MI, MachineRegisterInfo &MRI) { + Register DefReg = MI.getOperand(0).getReg(); + for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefReg)) { + if (UseMI.mayLoadOrStore() && !UseMI.hasUnmodeledSideEffects()) + return std::nullopt; + } + auto MaybeCst = + getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + if (!MaybeCst) + return std::nullopt; + return std::make_pair(MI.getOperand(1).getReg(), + MaybeCst->Value.getSExtValue()); +} + +// POSTINC pointer-modifying AIE intrinsics take a "pre" (pre-increment) +// base pointer and a constant %step (S), and produce a "post" +// (post-increment) pointer holding pre + S. So with the dataflow: +// +// %post_ptr = G_AIE_..._POSTINC_... %pre_ptr, ..., %step ; %post = %pre + S +// ... ; (dominates the +// G_PTR_ADD) +// %add = G_PTR_ADD %pre_ptr, C +// +// the G_PTR_ADD is "lagging": it recomputes %pre + C even though %post +// = %pre + S is already available. Substituting: +// +// %pre + C = (%post - S) + C = %post + (C - S) +// +// lets the G_PTR_ADD reuse %post and free %pre. When C == S the +// remaining offset is zero, so %post IS the result and we emit a COPY +// (later folded by register coalescing, which fuses the live ranges +// of %pre and %post): +// +// %post_ptr = G_AIE_..._POSTINC_... %pre_ptr, ..., %step +// ... +// %add = COPY %post_ptr +// +// Otherwise emit the adjusted G_PTR_ADD (requires (C - S) to fit in +// the original offset type, else the rebuilt G_CONSTANT would wrap): +// +// %post_ptr = G_AIE_..._POSTINC_... %pre_ptr, ..., %step +// ... +// %add = G_PTR_ADD %post_ptr, (C - S) +// +// Bails out when the G_PTR_ADD's def is consumed by a real (non-side- +// effecting) load/store, since those are rewritten by the memory-op- +// rooted siblings. +bool llvm::matchLagPtrAdd(MachineInstr &MI, MachineRegisterInfo &MRI, + const AIEBaseInstrInfo &TII, CombinerHelper &Helper, + LagPtrAddMatchInfo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); + + auto Root = getLagRootPreReg(MI, MRI); + if (!Root) + return false; + auto [PreReg, C] = *Root; + + for (MachineInstr &UseMI : MRI.use_nodbg_instructions(PreReg)) { + // UseMI (the POSTINC) must come before MI (the G_PTR_ADD root) in program + // order. + if (!Helper.dominates(UseMI, MI)) + continue; + // Find a POSTINC instruction with PreReg as base pointer. + auto MaybeInfo = TII.getPtrPostIncOpInfo(UseMI.getOpcode()); + if (!MaybeInfo) + continue; + if (!UseMI.getOperand(MaybeInfo->PrePtrUseIdx).isReg() || + UseMI.getOperand(MaybeInfo->PrePtrUseIdx).getReg() != PreReg) + continue; + // Step must be a compile-time constant. + auto MaybeStep = getIConstantVRegValWithLookThrough( + UseMI.getOperand(MaybeInfo->ModifierUseIdx).getReg(), MRI); + if (!MaybeStep) + continue; + // Replacement offset NewC = C - step must fit in the original offset + // type; otherwise the G_CONSTANT built in apply silently wraps. + const int64_t S = MaybeStep->Value.getSExtValue(); + LLT OffTy = MRI.getType(MI.getOperand(2).getReg()); + if (!isIntN(OffTy.getSizeInBits(), C - S)) + continue; + MatchInfo = {UseMI.getOperand(MaybeInfo->PostPtrDefIdx).getReg(), S}; + return true; + } + return false; +} + +void llvm::applyLagPtrAdd(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, GISelChangeObserver &Observer, + LagPtrAddMatchInfo &MatchInfo) { + Register DstReg = MI.getOperand(0).getReg(); + auto MaybeCst = + getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); + assert(MaybeCst && "matchLagPtrAdd should have verified constant"); + int64_t NewC = MaybeCst->Value.getSExtValue() - MatchInfo.Step; + + B.setInsertPt(*MI.getParent(), MI.getIterator()); + B.setDebugLoc(MI.getDebugLoc()); + + if (NewC == 0) { + // C == step: %post_ptr IS the result; insert a COPY that copy-propagation + // will later eliminate. + B.buildCopy(DstReg, MatchInfo.PostReg); + } else { + // C != step: adjust from %post_ptr by the remaining offset. Preserve the + // type of the original offset operand. + LLT OffTy = MRI.getType(MI.getOperand(2).getReg()); + auto NewCst = B.buildConstant(OffTy, NewC); + B.buildInstr(TargetOpcode::G_PTR_ADD, {DstReg}, + {MatchInfo.PostReg, NewCst}); + } + + Observer.erasingInstr(MI); + MI.eraseFromParent(); +} + /// Match a pattern of G_AIE_POSTINC_LOAD/STORE followed by G_PTR_ADD where both /// offsets come from G_TRUNC of s32 values. Combines them by updating the /// POSTINC to use the combined offset. diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.h b/llvm/lib/Target/AIE/AIECombinerHelper.h index a466a679af26..8790b945d4c6 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.h +++ b/llvm/lib/Target/AIE/AIECombinerHelper.h @@ -495,6 +495,26 @@ bool matchPostIncLoadStorePtrAddWithTrunc(MachineInstr &MI, GISelChangeObserver &Observer, BuildFnTy &MatchInfo); +/// Match data for combine_lag_ptr_add: the post-increment pointer register +/// produced by a dominating POSTINC instruction and the step constant S it +/// adds to the pre-increment pointer. +struct LagPtrAddMatchInfo { + Register PostReg; ///< %post_ptr produced by the dominating POSTINC. + int64_t Step; ///< Step constant S added by the POSTINC (post = pre + S). +}; + +/// Eliminate lag-register copies by rewriting G_PTR_ADD uses of the +/// pre-increment pointer register in terms of the POSTINC's post-pointer def. +/// Rewrites %dst = G_PTR_ADD %pre, C +/// to %dst = COPY %post (when C == S) +/// or %dst = G_PTR_ADD %post, (C-S) (otherwise) +bool matchLagPtrAdd(MachineInstr &MI, MachineRegisterInfo &MRI, + const AIEBaseInstrInfo &TII, CombinerHelper &Helper, + LagPtrAddMatchInfo &MatchInfo); +void applyLagPtrAdd(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, GISelChangeObserver &Observer, + LagPtrAddMatchInfo &MatchInfo); + } // namespace llvm #endif diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index bcab057e2ade..bd8563686f0d 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -1895,6 +1895,43 @@ unsigned AIE2PInstrInfo::getGenericPostIncStoreOpcode() const { return AIE2P::G_AIE_POSTINC_STORE; } +std::optional +AIE2PInstrInfo::getPtrPostIncOpInfo(unsigned Opcode) const { + switch (Opcode) { + // %val(0), %ptrDst(1) = G_AIE_POSTINC_LOAD/ZEXTLOAD/SEXTLOAD %ptrSrc(2), + // %modifier(3) + case AIE2P::G_AIE_POSTINC_LOAD: + case AIE2P::G_AIE_POSTINC_ZEXTLOAD: + case AIE2P::G_AIE_POSTINC_SEXTLOAD: + return PtrPostIncOpInfo{1, 2, 3}; + // %ptrDst(0) = G_AIE_POSTINC_STORE %val(1), %ptrSrc(2), %modifier(3) + case AIE2P::G_AIE_POSTINC_STORE: + return PtrPostIncOpInfo{0, 2, 3}; + // %val(0), %ptrDst(1), %cntDst(2) = G_AIE_POSTINC_2D_LOAD* %ptrSrc(3), + // %modifier(4), ... + case AIE2P::G_AIE_POSTINC_2D_LOAD: + case AIE2P::G_AIE_POSTINC_2D_ZEXTLOAD: + case AIE2P::G_AIE_POSTINC_2D_SEXTLOAD: + return PtrPostIncOpInfo{1, 3, 4}; + // %ptrDst(0), %cntDst(1) = G_AIE_POSTINC_2D_STORE %val(2), %ptrSrc(3), + // %modifier(4), ... + case AIE2P::G_AIE_POSTINC_2D_STORE: + return PtrPostIncOpInfo{0, 3, 4}; + // %val(0), %ptrDst(1), %cnt1Dst(2), %cnt2Dst(3) = G_AIE_POSTINC_3D_LOAD* + // %ptrSrc(4), %modifier(5), ... + case AIE2P::G_AIE_POSTINC_3D_LOAD: + case AIE2P::G_AIE_POSTINC_3D_ZEXTLOAD: + case AIE2P::G_AIE_POSTINC_3D_SEXTLOAD: + return PtrPostIncOpInfo{1, 4, 5}; + // %ptrDst(0), %cnt1Dst(1), %cnt2Dst(2) = G_AIE_POSTINC_3D_STORE %val(3), + // %ptrSrc(4), %modifier(5), ... + case AIE2P::G_AIE_POSTINC_3D_STORE: + return PtrPostIncOpInfo{0, 4, 5}; + default: + return std::nullopt; + } +} + unsigned AIE2PInstrInfo::getGenericVSelOpcode() const { return AIE2P::G_AIE_VSEL; } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h index 136a1707912c..5cb685d5cc86 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h @@ -61,6 +61,8 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo { unsigned getGenericBroadcastVectorOpcode() const override; unsigned getGenericPostIncLoadOpcode() const override; unsigned getGenericPostIncStoreOpcode() const override; + std::optional + getPtrPostIncOpInfo(unsigned Opcode) const override; unsigned getGenericVSelOpcode() const override; unsigned getGenericVShiftOpcode() const override; unsigned getGenericShuffleVectorOpcode() const override; diff --git a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp index 39888ee0c1a2..6768324e6bd3 100644 --- a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp @@ -1518,6 +1518,43 @@ unsigned AIE2PSInstrInfo::getGenericPostIncStoreOpcode() const { return AIE2PS::G_AIE_POSTINC_STORE; } +std::optional +AIE2PSInstrInfo::getPtrPostIncOpInfo(unsigned Opcode) const { + switch (Opcode) { + // %val(0), %ptrDst(1) = G_AIE_POSTINC_LOAD/ZEXTLOAD/SEXTLOAD %ptrSrc(2), + // %modifier(3) + case AIE2PS::G_AIE_POSTINC_LOAD: + case AIE2PS::G_AIE_POSTINC_ZEXTLOAD: + case AIE2PS::G_AIE_POSTINC_SEXTLOAD: + return PtrPostIncOpInfo{1, 2, 3}; + // %ptrDst(0) = G_AIE_POSTINC_STORE %val(1), %ptrSrc(2), %modifier(3) + case AIE2PS::G_AIE_POSTINC_STORE: + return PtrPostIncOpInfo{0, 2, 3}; + // %val(0), %ptrDst(1), %cntDst(2) = G_AIE_POSTINC_2D_LOAD* %ptrSrc(3), + // %modifier(4), ... + case AIE2PS::G_AIE_POSTINC_2D_LOAD: + case AIE2PS::G_AIE_POSTINC_2D_ZEXTLOAD: + case AIE2PS::G_AIE_POSTINC_2D_SEXTLOAD: + return PtrPostIncOpInfo{1, 3, 4}; + // %ptrDst(0), %cntDst(1) = G_AIE_POSTINC_2D_STORE %val(2), %ptrSrc(3), + // %modifier(4), ... + case AIE2PS::G_AIE_POSTINC_2D_STORE: + return PtrPostIncOpInfo{0, 3, 4}; + // %val(0), %ptrDst(1), %cnt1Dst(2), %cnt2Dst(3) = G_AIE_POSTINC_3D_LOAD* + // %ptrSrc(4), %modifier(5), ... + case AIE2PS::G_AIE_POSTINC_3D_LOAD: + case AIE2PS::G_AIE_POSTINC_3D_ZEXTLOAD: + case AIE2PS::G_AIE_POSTINC_3D_SEXTLOAD: + return PtrPostIncOpInfo{1, 4, 5}; + // %ptrDst(0), %cnt1Dst(1), %cnt2Dst(2) = G_AIE_POSTINC_3D_STORE %val(3), + // %ptrSrc(4), %modifier(5), ... + case AIE2PS::G_AIE_POSTINC_3D_STORE: + return PtrPostIncOpInfo{0, 4, 5}; + default: + return std::nullopt; + } +} + unsigned AIE2PSInstrInfo::getGenericShuffleVectorOpcode() const { return AIE2PS::G_AIE_SHUFFLE_VECTOR; } diff --git a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h index 1412848e9409..a14ed9dfd9d3 100644 --- a/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h +++ b/llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.h @@ -145,6 +145,8 @@ class AIE2PSInstrInfo : public AIE2PSGenInstrInfo { unsigned getGenericShuffleVectorOpcode() const override; unsigned getGenericPostIncLoadOpcode() const override; unsigned getGenericPostIncStoreOpcode() const override; + std::optional + getPtrPostIncOpInfo(unsigned Opcode) const override; unsigned getGenericExtractSubvectorOpcode() const override; unsigned getGenericIntegerComparisonOpcode() const override; diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir index a2a80996d384..4b73aeea4fce 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-ptr-add.mir @@ -53,8 +53,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -109,8 +108,8 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %offset:_(s20) = G_CONSTANT i20 96 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %offset(s20) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %post, [[C3]](s20) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -165,8 +164,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -223,8 +221,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -282,8 +279,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -342,8 +338,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir index c0ca9e52cbed..85e2fe96b981 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir @@ -58,8 +58,7 @@ body: | ; CHECK-COMBINER-NEXT: G_BR %bb.2 ; CHECK-COMBINER-NEXT: {{ $}} ; CHECK-COMBINER-NEXT: bb.2: - ; CHECK-COMBINER-NEXT: %step:_(s20) = G_CONSTANT i20 32 - ; CHECK-COMBINER-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-COMBINER-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-COMBINER-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) ; ; CHECK-COALESCER-LABEL: name: ptr-postinc-coalesce @@ -75,7 +74,6 @@ body: | ; CHECK-COALESCER-NEXT: bb.1: ; CHECK-COALESCER-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-COALESCER-NEXT: {{ $}} - ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = COPY [[COPY]] ; CHECK-COALESCER-NEXT: dead %val:er, [[COPY]]:ep_as_32bit = LDA_dms_lda_pstm_nrm [[COPY]], [[MOV_PD_imm11_pseudo]] :: (load (s32)) ; CHECK-COALESCER-NEXT: [[COPY1:%[0-9]+]]:er = ADD_add_r_ri [[COPY1]], -1, implicit-def dead $srcarry ; CHECK-COALESCER-NEXT: %cond:er = LT [[MOV_RLC_imm11_pseudo]], [[COPY1]] @@ -83,9 +81,7 @@ body: | ; CHECK-COALESCER-NEXT: PseudoJ_jump_imm %bb.2 ; CHECK-COALESCER-NEXT: {{ $}} ; CHECK-COALESCER-NEXT: bb.2: - ; CHECK-COALESCER-NEXT: %step:em = MOV_PD_imm11_pseudo 32 - ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = PADD_mod_pseudo %adjusted, %step - ; CHECK-COALESCER-NEXT: PseudoRET implicit $lr, implicit %adjusted + ; CHECK-COALESCER-NEXT: PseudoRET implicit $lr, implicit [[COPY]] ; ; CHECK-LABEL: name: ptr-postinc-coalesce ; CHECK: bb.0: @@ -99,7 +95,6 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $m0, $p0, $r0, $r1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $p1 = COPY renamable $p0 ; CHECK-NEXT: dead renamable $r2, renamable $p0 = LDA_dms_lda_pstm_nrm killed renamable $p0, renamable $m0 :: (load (s32)) ; CHECK-NEXT: renamable $r0 = ADD_add_r_ri killed renamable $r0, -1, implicit-def dead $srcarry ; CHECK-NEXT: renamable $r2 = LT renamable $r1, renamable $r0 @@ -107,11 +102,9 @@ body: | ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $p1 + ; CHECK-NEXT: liveins: $p0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $m0 = MOV_PD_imm11_pseudo 32 - ; CHECK-NEXT: renamable $p1 = PADD_mod_pseudo killed renamable $p1, killed renamable $m0 - ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p1 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0 bb.0: liveins: $p0, $r0 %0:_(p0) = COPY $p0 diff --git a/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir index 8e00a80873b1..34e13a73bd1c 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir +++ b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-ptr-add.mir @@ -53,8 +53,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -109,8 +108,8 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %offset:_(s20) = G_CONSTANT i20 96 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %offset(s20) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %post, [[C3]](s20) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -165,8 +164,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 32 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -223,8 +221,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -282,8 +279,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 @@ -342,8 +338,7 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: %step:_(s20) = G_CONSTANT i20 16 - ; CHECK-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) bb.0: liveins: $p0, $r0 diff --git a/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir index ac99c855f1dc..a327972a1405 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir +++ b/llvm/test/CodeGen/AIE/aie2ps/GlobalIsel/global-combiners/lag-reg-add-coalesce.mir @@ -58,8 +58,7 @@ body: | ; CHECK-COMBINER-NEXT: G_BR %bb.2 ; CHECK-COMBINER-NEXT: {{ $}} ; CHECK-COMBINER-NEXT: bb.2: - ; CHECK-COMBINER-NEXT: %step:_(s20) = G_CONSTANT i20 32 - ; CHECK-COMBINER-NEXT: %adjusted:_(p0) = G_PTR_ADD %pre, %step(s20) + ; CHECK-COMBINER-NEXT: %adjusted:_(p0) = COPY %post(p0) ; CHECK-COMBINER-NEXT: PseudoRET implicit $lr, implicit %adjusted(p0) ; ; CHECK-COALESCER-LABEL: name: ptr-postinc-coalesce @@ -75,7 +74,6 @@ body: | ; CHECK-COALESCER-NEXT: bb.1: ; CHECK-COALESCER-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-COALESCER-NEXT: {{ $}} - ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = COPY [[COPY]] ; CHECK-COALESCER-NEXT: dead %val:er, [[COPY]]:ep_as_32bit = LDA_dms_lda_scalar_ld_pstm_nrm [[COPY]], [[MOV_PD_imm11_pseudo]] :: (load (s32)) ; CHECK-COALESCER-NEXT: [[COPY1:%[0-9]+]]:mlockid_reg = ADD_add_r_ri [[COPY1]], -1, implicit-def dead $srcarry ; CHECK-COALESCER-NEXT: %cond:mlockid_reg = LT [[MOV_RLC_imm11_pseudo]], [[COPY1]] @@ -83,9 +81,7 @@ body: | ; CHECK-COALESCER-NEXT: PseudoJ_jump_imm %bb.2 ; CHECK-COALESCER-NEXT: {{ $}} ; CHECK-COALESCER-NEXT: bb.2: - ; CHECK-COALESCER-NEXT: %step:em = MOV_PD_imm11_pseudo 32 - ; CHECK-COALESCER-NEXT: %adjusted:ep_as_32bit = PADD_mod_pseudo %adjusted, %step - ; CHECK-COALESCER-NEXT: PseudoRET implicit $lr, implicit %adjusted + ; CHECK-COALESCER-NEXT: PseudoRET implicit $lr, implicit [[COPY]] ; ; CHECK-LABEL: name: ptr-postinc-coalesce ; CHECK: bb.0: @@ -99,7 +95,6 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $m0, $p0, $r0, $r2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $p1 = COPY renamable $p0 ; CHECK-NEXT: dead renamable $r1, renamable $p0 = LDA_dms_lda_scalar_ld_pstm_nrm killed renamable $p0, renamable $m0 :: (load (s32)) ; CHECK-NEXT: renamable $r0 = ADD_add_r_ri killed renamable $r0, -1, implicit-def dead $srcarry ; CHECK-NEXT: renamable $r4 = LT renamable $r2, renamable $r0 @@ -107,11 +102,9 @@ body: | ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $p1 + ; CHECK-NEXT: liveins: $p0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $m0 = MOV_PD_imm11_pseudo 32 - ; CHECK-NEXT: renamable $p1 = PADD_mod_pseudo killed renamable $p1, killed renamable $m0 - ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p1 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0 bb.0: liveins: $p0, $r0 %0:_(p0) = COPY $p0