Skip to content

Commit 8c88fae

Browse files
authored
[VPlan] Lower CanIVIncrementForPart in convertToConcreteRecipes. (llvm#190844)
Move the lowering of CanonicalIVIncrementForPart from generate() to convertToConcreteRecipes, converting it to an Add VPInstruction at the VPlan level. This enables VPlan-level simplifications (e.g., folding add 0, x) and prepares for adding a 3-operand form. PR: llvm#190844
1 parent 3af4275 commit 8c88fae

File tree

4 files changed

+28
-38
lines changed

4 files changed

+28
-38
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -658,14 +658,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
658658
{AVL, VFArg, Builder.getTrue()});
659659
return EVL;
660660
}
661-
case VPInstruction::CanonicalIVIncrementForPart: {
662-
auto *IV = State.get(getOperand(0), VPLane(0));
663-
auto *VFxPart = State.get(getOperand(1), VPLane(0));
664-
// The canonical IV is incremented by the vectorization factor (num of
665-
// SIMD elements) times the unroll part.
666-
return Builder.CreateAdd(IV, VFxPart, Name, hasNoUnsignedWrap(),
667-
hasNoSignedWrap());
668-
}
669661
case VPInstruction::BranchOnCond: {
670662
Value *Cond = State.get(getOperand(0), VPLane(0));
671663
// Replace the temporary unreachable terminator with a new conditional

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4102,6 +4102,19 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
41024102
continue;
41034103
}
41044104

4105+
// Lower CanonicalIVIncrementForPart to plain Add.
4106+
if (match(
4107+
&R,
4108+
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>())) {
4109+
auto *VPI = cast<VPInstruction>(&R);
4110+
VPValue *Add = Builder.createOverflowingOp(
4111+
Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4112+
VPI->getDebugLoc());
4113+
VPI->replaceAllUsesWith(Add);
4114+
ToRemove.push_back(VPI);
4115+
continue;
4116+
}
4117+
41054118
// Lower BranchOnCount to ICmp + BranchOnCond.
41064119
VPValue *IV, *TC;
41074120
if (match(&R, m_BranchOnCount(m_VPValue(IV), m_VPValue(TC)))) {

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -304,15 +304,12 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
304304
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
305305
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
306306
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
307-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
308307
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
309-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP3]]
310308
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 3
311-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP4]]
312309
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
313-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
314-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]])
315-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]])
310+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP1]], i64 [[N]])
311+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP3]], i64 [[N]])
312+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP4]], i64 [[N]])
316313
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]]
317314
; CHECK-ORDERED-TF: vector.body:
318315
; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1320,15 +1317,12 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
13201317
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13211318
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
13221319
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
1323-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
13241320
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
1325-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP3]]
13261321
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 3
1327-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP4]]
13281322
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
1329-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
1330-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]])
1331-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]])
1323+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP1]], i64 [[N]])
1324+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP3]], i64 [[N]])
1325+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP4]], i64 [[N]])
13321326
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]]
13331327
; CHECK-ORDERED-TF: vector.body:
13341328
; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1572,15 +1566,12 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
15721566
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
15731567
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
15741568
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 2
1575-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
15761569
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
1577-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP3]]
15781570
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 3
1579-
; CHECK-ORDERED-TF-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP4]]
15801571
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
1581-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
1582-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]])
1583-
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]])
1572+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP1]], i64 [[N]])
1573+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP3]], i64 [[N]])
1574+
; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP4]], i64 [[N]])
15841575
; CHECK-ORDERED-TF-NEXT: br label [[VECTOR_BODY:%.*]]
15851576
; CHECK-ORDERED-TF: vector.body:
15861577
; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,12 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
1313
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64()
1414
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP61]], 2
1515
; CHECK-NEXT: [[TMP62:%.*]] = shl nuw i64 [[TMP1]], 2
16-
; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
1716
; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP1]], 1
18-
; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP28]]
1917
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP1]], 3
20-
; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP30]]
2118
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
22-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]])
23-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]])
24-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[UMAX]])
19+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP1]], i64 [[UMAX]])
20+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP28]], i64 [[UMAX]])
21+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP30]], i64 [[UMAX]])
2522
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
2623
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2724
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -79,15 +76,12 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %
7976
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
8077
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP5]], 2
8178
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP1]], 2
82-
; CHECK-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP1]]
8379
; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP1]], 1
84-
; CHECK-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP28]]
8580
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP1]], 3
86-
; CHECK-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP30]]
8781
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
88-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]])
89-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]])
90-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[UMAX]])
82+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP1]], i64 [[UMAX]])
83+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP28]], i64 [[UMAX]])
84+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP30]], i64 [[UMAX]])
9185
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
9286
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
9387
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

0 commit comments

Comments
 (0)