@@ -67,9 +67,10 @@ class VectorCombine {
67
67
public:
68
68
VectorCombine (Function &F, const TargetTransformInfo &TTI,
69
69
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
70
- const DataLayout *DL, bool TryEarlyFoldsOnly)
70
+ const DataLayout *DL, TTI::TargetCostKind CostKind,
71
+ bool TryEarlyFoldsOnly)
71
72
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL),
72
- TryEarlyFoldsOnly (TryEarlyFoldsOnly) {}
73
+ CostKind (CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
73
74
74
75
bool run ();
75
76
@@ -81,6 +82,7 @@ class VectorCombine {
81
82
AAResults &AA;
82
83
AssumptionCache ∾
83
84
const DataLayout *DL;
85
+ TTI::TargetCostKind CostKind;
84
86
85
87
// / If true, only perform beneficial early IR transforms. Do not introduce new
86
88
// / vector operations.
@@ -249,7 +251,6 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
249
251
InstructionCost OldCost =
250
252
TTI.getMemoryOpCost (Instruction::Load, LoadTy, Alignment, AS);
251
253
APInt DemandedElts = APInt::getOneBitSet (MinVecNumElts, 0 );
252
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
253
254
OldCost +=
254
255
TTI.getScalarizationOverhead (MinVecTy, DemandedElts,
255
256
/* Insert */ true , HasExtract, CostKind);
@@ -329,11 +330,11 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) {
329
330
// undef value is 0. We could add that cost if the cost model accurately
330
331
// reflects the real cost of that operation.
331
332
InstructionCost OldCost =
332
- TTI.getMemoryOpCost (Instruction::Load, LoadTy, Alignment, AS);
333
+ TTI.getMemoryOpCost (Instruction::Load, LoadTy, Alignment, AS, CostKind );
333
334
334
335
// New pattern: load PtrOp
335
336
InstructionCost NewCost =
336
- TTI.getMemoryOpCost (Instruction::Load, Ty, Alignment, AS);
337
+ TTI.getMemoryOpCost (Instruction::Load, Ty, Alignment, AS, CostKind );
337
338
338
339
// We can aggressively convert to the vector form because the backend can
339
340
// invert this transform if it does not result in a performance win.
@@ -366,7 +367,6 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
366
367
return nullptr ;
367
368
368
369
Type *VecTy = Ext0->getVectorOperand ()->getType ();
369
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
370
370
assert (VecTy == Ext1->getVectorOperand ()->getType () && " Need matching types" );
371
371
InstructionCost Cost0 =
372
372
TTI.getVectorInstrCost (*Ext0, VecTy, CostKind, Index0);
@@ -436,7 +436,6 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
436
436
// both sequences.
437
437
unsigned Ext0Index = Ext0IndexC->getZExtValue ();
438
438
unsigned Ext1Index = Ext1IndexC->getZExtValue ();
439
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
440
439
441
440
InstructionCost Extract0Cost =
442
441
TTI.getVectorInstrCost (*Ext0, VecTy, CostKind, Ext0Index);
@@ -683,7 +682,6 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
683
682
Mask[Index] = Index + NumElts;
684
683
685
684
Type *ScalarTy = VecTy->getScalarType ();
686
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
687
685
InstructionCost OldCost =
688
686
TTI.getArithmeticInstrCost (Instruction::FNeg, ScalarTy) +
689
687
TTI.getVectorInstrCost (I, VecTy, CostKind, Index);
@@ -772,21 +770,20 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) {
772
770
unsigned NumOps = IsUnary ? 1 : 2 ;
773
771
774
772
// The new shuffle must not cost more than the old shuffle.
775
- TargetTransformInfo::TargetCostKind CK =
776
- TargetTransformInfo::TCK_RecipThroughput;
777
773
TargetTransformInfo::ShuffleKind SK =
778
774
IsUnary ? TargetTransformInfo::SK_PermuteSingleSrc
779
775
: TargetTransformInfo::SK_PermuteTwoSrc;
780
776
781
777
InstructionCost DestCost =
782
- TTI.getShuffleCost (SK, NewShuffleTy, NewMask, CK ) +
778
+ TTI.getShuffleCost (SK, NewShuffleTy, NewMask, CostKind ) +
783
779
(NumOps * TTI.getCastInstrCost (Instruction::BitCast, NewShuffleTy, SrcTy,
784
780
TargetTransformInfo::CastContextHint::None,
785
- CK ));
781
+ CostKind ));
786
782
InstructionCost SrcCost =
787
- TTI.getShuffleCost (SK, SrcTy, Mask, CK ) +
783
+ TTI.getShuffleCost (SK, SrcTy, Mask, CostKind ) +
788
784
TTI.getCastInstrCost (Instruction::BitCast, DestTy, OldShuffleTy,
789
- TargetTransformInfo::CastContextHint::None, CK);
785
+ TargetTransformInfo::CastContextHint::None,
786
+ CostKind);
790
787
if (DestCost > SrcCost || !DestCost.isValid ())
791
788
return false ;
792
789
@@ -841,7 +838,6 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
841
838
// Calculate cost of splatting both operands into vectors and the vector
842
839
// intrinsic
843
840
VectorType *VecTy = cast<VectorType>(VPI.getType ());
844
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
845
841
SmallVector<int > Mask;
846
842
if (auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
847
843
Mask.resize (FVTy->getNumElements (), 0 );
@@ -1003,7 +999,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
1003
999
1004
1000
// Get cost estimate for the insert element. This cost will factor into
1005
1001
// both sequences.
1006
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1007
1002
InstructionCost InsertCost = TTI.getVectorInstrCost (
1008
1003
Instruction::InsertElement, VecTy, CostKind, Index);
1009
1004
InstructionCost OldCost =
@@ -1080,7 +1075,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
1080
1075
1081
1076
auto *Ext0 = cast<ExtractElementInst>(I0);
1082
1077
auto *Ext1 = cast<ExtractElementInst>(I1);
1083
- ExtractElementInst *ConvertToShuf = getShuffleExtract (Ext0, Ext1);
1078
+ ExtractElementInst *ConvertToShuf = getShuffleExtract (Ext0, Ext1, CostKind );
1084
1079
if (!ConvertToShuf)
1085
1080
return false ;
1086
1081
assert ((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
@@ -1089,13 +1084,12 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
1089
1084
// The original scalar pattern is:
1090
1085
// binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
1091
1086
CmpInst::Predicate Pred = P0;
1092
- unsigned CmpOpcode = CmpInst::isFPPredicate (Pred) ? Instruction::FCmp
1093
- : Instruction::ICmp;
1087
+ unsigned CmpOpcode =
1088
+ CmpInst::isFPPredicate (Pred) ? Instruction::FCmp : Instruction::ICmp;
1094
1089
auto *VecTy = dyn_cast<FixedVectorType>(X->getType ());
1095
1090
if (!VecTy)
1096
1091
return false ;
1097
1092
1098
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1099
1093
InstructionCost Ext0Cost =
1100
1094
TTI.getVectorInstrCost (*Ext0, VecTy, CostKind, Index0),
1101
1095
Ext1Cost =
@@ -1386,7 +1380,6 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
1386
1380
}
1387
1381
1388
1382
auto *Index = dyn_cast<ConstantInt>(UI->getOperand (1 ));
1389
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1390
1383
OriginalCost +=
1391
1384
TTI.getVectorInstrCost (Instruction::ExtractElement, VecTy, CostKind,
1392
1385
Index ? Index->getZExtValue () : -1 );
@@ -1480,8 +1473,6 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
1480
1473
}
1481
1474
1482
1475
// Try to merge shuffles across the binop if the new shuffles are not costly.
1483
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1484
-
1485
1476
InstructionCost OldCost =
1486
1477
TTI.getArithmeticInstrCost (Opcode, BinOpTy, CostKind) +
1487
1478
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc, BinOpTy,
@@ -1575,8 +1566,6 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1575
1566
}
1576
1567
1577
1568
// Try to replace a binop with a shuffle if the shuffle is not costly.
1578
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1579
-
1580
1569
InstructionCost OldCost =
1581
1570
TTI.getArithmeticInstrCost (B0->getOpcode (), BinOpTy, CostKind) +
1582
1571
TTI.getArithmeticInstrCost (B1->getOpcode (), BinOpTy, CostKind) +
@@ -1672,8 +1661,6 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
1672
1661
FixedVectorType::get (CastSrcTy->getScalarType (), NewMask.size ());
1673
1662
1674
1663
// Try to replace a castop with a shuffle if the shuffle is not costly.
1675
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1676
-
1677
1664
InstructionCost CostC0 =
1678
1665
TTI.getCastInstrCost (C0->getOpcode (), CastDstTy, CastSrcTy,
1679
1666
TTI::CastContextHint::None, CostKind);
@@ -1767,8 +1754,6 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
1767
1754
}
1768
1755
1769
1756
// Try to merge the shuffles if the new shuffle is not costly.
1770
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1771
-
1772
1757
InstructionCost InnerCost0 =
1773
1758
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
1774
1759
InnerMask0, CostKind, 0 , nullptr , {V0, U0}, ShufI0);
@@ -1837,12 +1822,10 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
1837
1822
return false ;
1838
1823
1839
1824
InstructionCost OldCost =
1840
- TTI.getIntrinsicInstrCost (IntrinsicCostAttributes (IID, *II0),
1841
- TTI::TCK_RecipThroughput) +
1842
- TTI.getIntrinsicInstrCost (IntrinsicCostAttributes (IID, *II1),
1843
- TTI::TCK_RecipThroughput) +
1825
+ TTI.getIntrinsicInstrCost (IntrinsicCostAttributes (IID, *II0), CostKind) +
1826
+ TTI.getIntrinsicInstrCost (IntrinsicCostAttributes (IID, *II1), CostKind) +
1844
1827
TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc, II0Ty, OldMask,
1845
- TTI::TCK_RecipThroughput , 0 , nullptr , {II0, II1}, &I);
1828
+ CostKind , 0 , nullptr , {II0, II1}, &I);
1846
1829
1847
1830
SmallVector<Type *> NewArgsTy;
1848
1831
InstructionCost NewCost = 0 ;
@@ -1854,10 +1837,10 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
1854
1837
NewArgsTy.push_back (FixedVectorType::get (VecTy->getElementType (),
1855
1838
VecTy->getNumElements () * 2 ));
1856
1839
NewCost += TTI.getShuffleCost (TargetTransformInfo::SK_PermuteTwoSrc,
1857
- VecTy, OldMask, TTI::TCK_RecipThroughput );
1840
+ VecTy, OldMask, CostKind );
1858
1841
}
1859
1842
IntrinsicCostAttributes NewAttr (IID, ShuffleDstTy, NewArgsTy);
1860
- NewCost += TTI.getIntrinsicInstrCost (NewAttr, TTI::TCK_RecipThroughput );
1843
+ NewCost += TTI.getIntrinsicInstrCost (NewAttr, CostKind );
1861
1844
1862
1845
LLVM_DEBUG (dbgs () << " Found a shuffle feeding two intrinsics: " << I
1863
1846
<< " \n OldCost: " << OldCost << " vs NewCost: " << NewCost
@@ -1923,7 +1906,7 @@ generateInstLaneVectorFromOperand(ArrayRef<InstLane> Item, int Op) {
1923
1906
}
1924
1907
1925
1908
// / Detect concat of multiple values into a vector
1926
- static bool isFreeConcat (ArrayRef<InstLane> Item,
1909
+ static bool isFreeConcat (ArrayRef<InstLane> Item, TTI::TargetCostKind CostKind,
1927
1910
const TargetTransformInfo &TTI) {
1928
1911
auto *Ty = cast<FixedVectorType>(Item.front ().first ->get ()->getType ());
1929
1912
unsigned NumElts = Ty->getNumElements ();
@@ -1934,8 +1917,7 @@ static bool isFreeConcat(ArrayRef<InstLane> Item,
1934
1917
// during legalization.
1935
1918
SmallVector<int , 16 > ConcatMask (NumElts * 2 );
1936
1919
std::iota (ConcatMask.begin (), ConcatMask.end (), 0 );
1937
- if (TTI.getShuffleCost (TTI::SK_PermuteTwoSrc, Ty, ConcatMask,
1938
- TTI::TCK_RecipThroughput) != 0 )
1920
+ if (TTI.getShuffleCost (TTI::SK_PermuteTwoSrc, Ty, ConcatMask, CostKind) != 0 )
1939
1921
return false ;
1940
1922
1941
1923
unsigned NumSlices = Item.size () / NumElts;
@@ -2189,7 +2171,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
2189
2171
}
2190
2172
}
2191
2173
2192
- if (isFreeConcat (Item, TTI)) {
2174
+ if (isFreeConcat (Item, CostKind, TTI)) {
2193
2175
ConcatLeafs.insert (FrontU);
2194
2176
continue ;
2195
2177
}
@@ -2367,7 +2349,6 @@ bool VectorCombine::foldCastFromReductions(Instruction &I) {
2367
2349
auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->getType ());
2368
2350
Type *ResultTy = I.getType ();
2369
2351
2370
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2371
2352
InstructionCost OldCost = TTI.getArithmeticReductionCost (
2372
2353
ReductionOpc, ReductionSrcTy, std::nullopt, CostKind);
2373
2354
OldCost += TTI.getCastInstrCost (CastOpc, ReductionSrcTy, SrcTy,
@@ -2717,7 +2698,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
2717
2698
// / lshr((zext(x),y) -> zext(lshr(x,trunc(y)))
2718
2699
// / Cost model calculations takes into account if zext(x) has other users and
2719
2700
// / whether it can be propagated through them too.
2720
- bool VectorCombine::shrinkType (llvm:: Instruction &I) {
2701
+ bool VectorCombine::shrinkType (Instruction &I) {
2721
2702
Value *ZExted, *OtherOperand;
2722
2703
if (!match (&I, m_c_BitwiseLogic (m_ZExt (m_Value (ZExted)),
2723
2704
m_Value (OtherOperand))) &&
@@ -2746,7 +2727,6 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
2746
2727
2747
2728
// Calculate costs of leaving current IR as it is and moving ZExt operation
2748
2729
// later, along with adding truncates if needed
2749
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2750
2730
InstructionCost ZExtCost = TTI.getCastInstrCost (
2751
2731
Instruction::ZExt, BigTy, SmallTy,
2752
2732
TargetTransformInfo::CastContextHint::None, CostKind);
@@ -2833,7 +2813,6 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
2833
2813
auto *Ins = cast<InsertElementInst>(&I);
2834
2814
auto *Ext = cast<ExtractElementInst>(I.getOperand (1 ));
2835
2815
2836
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2837
2816
InstructionCost OldCost =
2838
2817
TTI.getVectorInstrCost (*Ext, VecTy, CostKind, ExtIdx) +
2839
2818
TTI.getVectorInstrCost (*Ins, VecTy, CostKind, InsIdx);
@@ -2981,7 +2960,8 @@ PreservedAnalyses VectorCombinePass::run(Function &F,
2981
2960
DominatorTree &DT = FAM.getResult <DominatorTreeAnalysis>(F);
2982
2961
AAResults &AA = FAM.getResult <AAManager>(F);
2983
2962
const DataLayout *DL = &F.getDataLayout ();
2984
- VectorCombine Combiner (F, TTI, DT, AA, AC, DL, TryEarlyFoldsOnly);
2963
+ VectorCombine Combiner (F, TTI, DT, AA, AC, DL, TTI::TCK_RecipThroughput,
2964
+ TryEarlyFoldsOnly);
2985
2965
if (!Combiner.run ())
2986
2966
return PreservedAnalyses::all ();
2987
2967
PreservedAnalyses PA;
0 commit comments