Skip to content

Commit 11db272

Browse files
fhahnjyli0116
authored andcommitted
[VPlan] Don't added separate vector latch block (NFC).
Simplify initial VPlan construction by not creating a separate vector.latch block, which isn't needed and will get folded away later. This has been suggested as independent clean-up multiple times.
1 parent bdaa636 commit 11db272

File tree

4 files changed

+46
-65
lines changed

4 files changed

+46
-65
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+12-18
Original file line numberDiff line numberDiff line change
@@ -9531,14 +9531,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95319531
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
95329532
VPBlockBase *PrevVPBB = nullptr;
95339533
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
9534-
// Handle VPBBs down to the latch.
9535-
if (VPBB == LoopRegion->getExiting()) {
9536-
assert(!VPB2IRBB.contains(VPBB) &&
9537-
"the latch block shouldn't have a corresponding IRBB");
9538-
VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
9539-
break;
9540-
}
9541-
95429534
// Create mask based on the IR BB corresponding to VPBB.
95439535
// TODO: Predicate directly based on VPlan.
95449536
Builder.setInsertPoint(VPBB, VPBB->begin());
@@ -9761,6 +9753,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97619753
for (ElementCount VF : Range)
97629754
Plan->addVF(VF);
97639755

9756+
// Tail folding is not supported for outer loops, so the induction increment
9757+
// is guaranteed to not wrap.
9758+
bool HasNUW = true;
9759+
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
9760+
DebugLoc());
9761+
97649762
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
97659763
Plan,
97669764
[this](PHINode *P) {
@@ -9769,12 +9767,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97699767
*PSE.getSE(), *TLI))
97709768
return nullptr;
97719769

9772-
// Tail folding is not supported for outer loops, so the induction increment
9773-
// is guaranteed to not wrap.
9774-
bool HasNUW = true;
9775-
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
9776-
DebugLoc());
9777-
97789770
// Collect mapping of IR header phis to header phi recipes, to be used in
97799771
// addScalarResumePhis.
97809772
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
@@ -9939,14 +9931,18 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
99399931
// ensure that it comes after all of it's inputs, including CondOp.
99409932
// Delete CurrentLink as it will be invalid if its operand is replaced
99419933
// with a reduction defined at the bottom of the block in the next link.
9942-
LinkVPBB->appendRecipe(RedRecipe);
9934+
if (LinkVPBB->getNumSuccessors() == 0)
9935+
RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
9936+
else
9937+
LinkVPBB->appendRecipe(RedRecipe);
9938+
99439939
CurrentLink->replaceAllUsesWith(RedRecipe);
99449940
ToDelete.push_back(CurrentLink);
99459941
PreviousLink = RedRecipe;
99469942
}
99479943
}
99489944
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
9949-
Builder.setInsertPoint(&*LatchVPBB->begin());
9945+
Builder.setInsertPoint(&*std::prev(std::prev(LatchVPBB->end())));
99509946
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
99519947
for (VPRecipeBase &R :
99529948
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
@@ -9966,8 +9962,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
99669962
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
99679963
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
99689964
VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader());
9969-
assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB &&
9970-
"reduction recipe must be defined before latch");
99719965
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
99729966
std::optional<FastMathFlags> FMFs =
99739967
PhiTy->isFloatingPointTy()

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
451451
createLoopRegion(Plan, HeaderVPB);
452452

453453
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
454-
auto *OrigExiting = TopRegion->getExiting();
455-
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
456-
VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
457-
TopRegion->setExiting(LatchVPBB);
458454
TopRegion->setName("vector loop");
459455
TopRegion->getEntryBasicBlock()->setName("vector.body");
460456

@@ -472,7 +468,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
472468

473469
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
474470
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
475-
476471
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
477472
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
478473

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
5555
make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
5656

5757
VPValue *VPV = Ingredient.getVPSingleValue();
58+
if (!VPV->getUnderlyingValue())
59+
continue;
60+
5861
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
5962

6063
VPRecipeBase *NewRecipe = nullptr;
@@ -387,9 +390,13 @@ static void addReplicateRegions(VPlan &Plan) {
387390
SplitBlock->setName(
388391
OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
389392
// Record predicated instructions for above packing optimizations.
390-
VPBlockBase *Region = createReplicateRegion(RepR, Plan);
393+
VPRegionBlock *Region = createReplicateRegion(RepR, Plan);
391394
Region->setParent(CurrentBlock->getParent());
392395
VPBlockUtils::insertOnEdge(CurrentBlock, SplitBlock, Region);
396+
397+
VPRegionBlock *ParentRegion = Region->getParent();
398+
if (ParentRegion && ParentRegion->getExiting() == CurrentBlock)
399+
ParentRegion->setExiting(SplitBlock);
393400
}
394401
}
395402

llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp

+26-41
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,10 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
5353
VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
5454
EXPECT_EQ(7u, VecBB->size());
5555
EXPECT_EQ(0u, VecBB->getNumPredecessors());
56-
EXPECT_EQ(1u, VecBB->getNumSuccessors());
56+
EXPECT_EQ(0u, VecBB->getNumSuccessors());
5757
EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
5858
EXPECT_EQ(&*Plan, VecBB->getPlan());
5959

60-
VPBlockBase *VecLatch = VecBB->getSingleSuccessor();
61-
EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch);
62-
EXPECT_EQ(0u, VecLatch->getNumSuccessors());
63-
6460
auto Iter = VecBB->begin();
6561
VPWidenPHIRecipe *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++);
6662
EXPECT_NE(nullptr, Phi);
@@ -130,33 +126,28 @@ compound=true
130126
" EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
131127
" EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" +
132128
" EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" +
133-
"Successor(s): vector.latch\l"
134-
]
135-
N2 -> N4 [ label=""]
136-
N4 [label =
137-
"vector.latch:\l" +
138129
"No successors\l"
139130
]
140131
}
141-
N4 -> N5 [ label="" ltail=cluster_N3]
142-
N5 [label =
132+
N2 -> N4 [ label="" ltail=cluster_N3]
133+
N4 [label =
143134
"middle.block:\l" +
144135
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
145136
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
146137
"Successor(s): ir-bb\<for.end\>, scalar.ph\l"
147138
]
148-
N5 -> N6 [ label="T"]
149-
N5 -> N7 [ label="F"]
150-
N6 [label =
139+
N4 -> N5 [ label="T"]
140+
N4 -> N6 [ label="F"]
141+
N5 [label =
151142
"ir-bb\<for.end\>:\l" +
152143
"No successors\l"
153144
]
154-
N7 [label =
145+
N6 [label =
155146
"scalar.ph:\l" +
156147
"Successor(s): ir-bb\<for.body\>\l"
157148
]
158-
N7 -> N8 [ label=""]
159-
N8 [label =
149+
N6 -> N7 [ label=""]
150+
N7 [label =
160151
"ir-bb\<for.body\>:\l" +
161152
" IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" +
162153
" IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" +
@@ -171,10 +162,6 @@ compound=true
171162
)";
172163
EXPECT_EQ(ExpectedStr, FullDump);
173164
#endif
174-
TargetLibraryInfoImpl TLII(M.getTargetTriple());
175-
TargetLibraryInfo TLI(TLII);
176-
VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
177-
Plan, [](PHINode *P) { return nullptr; }, *SE, TLI);
178165
}
179166

180167
TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
@@ -203,6 +190,12 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
203190

204191
TargetLibraryInfoImpl TLII(M.getTargetTriple());
205192
TargetLibraryInfo TLI(TLII);
193+
// Current VPlan construction doesn't add a terminator for top-level loop
194+
// latches. Add it before running transform.
195+
cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getExiting())
196+
->appendRecipe(new VPInstruction(
197+
VPInstruction::BranchOnCond,
198+
{Plan->getOrAddLiveIn(ConstantInt::getTrue(F->getContext()))}));
206199
VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
207200
Plan, [](PHINode *P) { return nullptr; }, *SE, TLI);
208201

@@ -214,15 +207,11 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
214207
// Check that the region following the preheader consists of a block for the
215208
// original header and a separate latch.
216209
VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
217-
EXPECT_EQ(7u, VecBB->size());
210+
EXPECT_EQ(8u, VecBB->size());
218211
EXPECT_EQ(0u, VecBB->getNumPredecessors());
219-
EXPECT_EQ(1u, VecBB->getNumSuccessors());
212+
EXPECT_EQ(0u, VecBB->getNumSuccessors());
220213
EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
221214

222-
VPBlockBase *VecLatch = VecBB->getSingleSuccessor();
223-
EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch);
224-
EXPECT_EQ(0u, VecLatch->getNumSuccessors());
225-
226215
auto Iter = VecBB->begin();
227216
EXPECT_NE(nullptr, dyn_cast<VPWidenPHIRecipe>(&*Iter++));
228217
EXPECT_NE(nullptr, dyn_cast<VPWidenGEPRecipe>(&*Iter++));
@@ -231,6 +220,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
231220
EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++));
232221
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
233222
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
223+
EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
234224
EXPECT_EQ(VecBB->end(), Iter);
235225
}
236226

@@ -303,33 +293,28 @@ compound=true
303293
" EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
304294
" EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" +
305295
" EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" +
306-
"Successor(s): vector.latch\l"
307-
]
308-
N4 -> N5 [ label=""]
309-
N5 [label =
310-
"vector.latch:\l" +
311296
"No successors\l"
312297
]
313298
}
314-
N5 -> N6 [ label="" ltail=cluster_N3]
315-
N6 [label =
299+
N4 -> N5 [ label="" ltail=cluster_N3]
300+
N5 [label =
316301
"middle.block:\l" +
317302
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
318303
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
319304
"Successor(s): ir-bb\<exit.2\>, scalar.ph\l"
320305
]
321-
N6 -> N7 [ label="T"]
322-
N6 -> N8 [ label="F"]
323-
N7 [label =
306+
N5 -> N6 [ label="T"]
307+
N5 -> N7 [ label="F"]
308+
N6 [label =
324309
"ir-bb\<exit.2\>:\l" +
325310
"No successors\l"
326311
]
327-
N8 [label =
312+
N7 [label =
328313
"scalar.ph:\l" +
329314
"Successor(s): ir-bb\<loop.header\>\l"
330315
]
331-
N8 -> N9 [ label=""]
332-
N9 [label =
316+
N7 -> N8 [ label=""]
317+
N8 [label =
333318
"ir-bb\<loop.header\>:\l" +
334319
" IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]\l" +
335320
" IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %iv\l" +

0 commit comments

Comments
 (0)