-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[MachinePipeliner] Remove cheap check in dependence analysis #174390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-hexagon Author: Ryotaro Kasuga (kasuga-fj) ChangesIn loop-carried dependency analysis, there is special handling for a specific case, referred to as a "cheap check". This check is not sound and sometimes misses dependencies. If there is no significant performance regression, this special logic should be deleted. Split off from #135148 Full diff: https://github.com/llvm/llvm-project/pull/174390.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 59ead74da6fb6..6b022783f4bb8 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -344,15 +344,9 @@ class LoopCarriedOrderDepsTracker {
const LoadStoreChunk &To);
/// Add a loop-carried order dependency between \p Src and \p Dst if we
- /// cannot prove they are independent. When \p PerformCheapCheck is true, a
- /// lightweight dependency test (referred to as "cheap check" below) is
- /// performed at first. Note that the cheap check is retained to maintain the
- /// existing behavior and not expected to be used anymore.
- ///
- /// TODO: Remove \p PerformCheapCheck and the corresponding cheap check.
+ /// cannot prove they are independent.
void addDependenciesBetweenSUs(const SUnitWithMemInfo &Src,
- const SUnitWithMemInfo &Dst,
- bool PerformCheapCheck = false);
+ const SUnitWithMemInfo &Dst);
void computeDependenciesAux();
};
@@ -1051,11 +1045,12 @@ bool SUnitWithMemInfo::getUnderlyingObjects() {
/// Returns true if there is a loop-carried order dependency from \p Src to \p
/// Dst.
-static bool
-hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
- BatchAAResults &BAA, const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI,
- const SwingSchedulerDAG *SSD, bool PerformCheapCheck) {
+static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src,
+ const SUnitWithMemInfo &Dst,
+ BatchAAResults &BAA,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ const SwingSchedulerDAG *SSD) {
if (Src.isTriviallyDisjoint(Dst))
return false;
if (isSuccOrder(Src.SU, Dst.SU))
@@ -1063,28 +1058,6 @@ hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
MachineInstr &SrcMI = *Src.SU->getInstr();
MachineInstr &DstMI = *Dst.SU->getInstr();
- if (PerformCheapCheck) {
- // First, perform the cheaper check that compares the base register.
- // If they are the same and the load offset is less than the store
- // offset, then mark the dependence as loop carried potentially.
- //
- // TODO: This check will be removed.
- const MachineOperand *BaseOp1, *BaseOp2;
- int64_t Offset1, Offset2;
- bool Offset1IsScalable, Offset2IsScalable;
- if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
- TRI) &&
- TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
- TRI)) {
- if (BaseOp1->isIdenticalTo(*BaseOp2) &&
- Offset1IsScalable == Offset2IsScalable &&
- (int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
- "What happened to the chain edge?");
- return true;
- }
- }
- }
if (!SSD->mayOverlapInLaterIter(&SrcMI, &DstMI))
return false;
@@ -1158,13 +1131,12 @@ LoopCarriedOrderDepsTracker::getInstrTag(SUnit *SU) const {
}
void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
- const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
- bool PerformCheapCheck) {
+ const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst) {
// Avoid self-dependencies.
if (Src.SU == Dst.SU)
return;
- if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG, PerformCheapCheck))
+ if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG))
LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
}
@@ -1173,8 +1145,7 @@ void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
// Add load-to-store dependencies (WAR).
for (const SUnitWithMemInfo &Src : From.Loads)
for (const SUnitWithMemInfo &Dst : To.Stores)
- // Perform a cheap check first if this is a forward dependency.
- addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
+ addDependenciesBetweenSUs(Src, Dst);
// Add store-to-load dependencies (RAW).
for (const SUnitWithMemInfo &Src : From.Stores)
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
index afc989cbc6921..8ff61fc4b11ab 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
@@ -3,7 +3,13 @@
# Test that the loop carried dependence check correctly identifies a recurrence.
-# CHECK: Overlap check:
+# CHECK: Overlap check:
+# CHECK-NEXT: BaseMI: %17:intregs = L2_loadrh_io %12:intregs, -8 :: (load (s16) from %ir.cgep10)
+# CHECK-NEXT: Base + -8 + I * 4, Len: 2
+# CHECK-NEXT: OtherMI: S2_storerh_io %12:intregs, 0, %18:intregs :: (store (s16) into %ir.lsr.iv24)
+# CHECK-NEXT: Base + 0 + I * 4, Len: 2
+# CHECK-NEXT: Result: No overlap
+# CHECK: Overlap check:
# CHECK-NEXT: BaseMI: S2_storerh_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s16) into %ir.lsr.iv24)
# CHECK-NEXT: Base + 0 + I * 4, Len: 2
# CHECK-NEXT: OtherMI: %{{[0-9]+}}:intregs = L2_loadrh_io %{{[0-9]+}}:intregs, -8 :: (load (s16) from %ir.cgep10)
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
index e16334ba7978f..fcab9613b6ec9 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
@@ -4,24 +4,21 @@
# Test that the loop carried dependence check correctly identifies dependences
# when the loop variable decreases and the array index offset is negative.
-# No dependence from the store to the load.
-# CHECK: Overlap check:
+# No dependence from the store to the load, but there is a dependence from the
+# load to the store.
+# CHECK: Overlap check:
+# CHECK-NEXT: BaseMI: %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
+# CHECK-NEXT: Base + -8 + I * -4, Len: 4
+# CHECK-NEXT: OtherMI: S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
+# CHECK-NEXT: Base + 0 + I * -4, Len: 4
+# CHECK-NEXT: Result: Overlap
+# CHECK: Overlap check:
# CHECK-NEXT: BaseMI: S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
# CHECK-NEXT: Base + 0 + I * -4, Len: 4
# CHECK-NEXT: OtherMI: %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
# CHECK-NEXT: Base + -8 + I * -4, Len: 4
# CHECK-NEXT: Result: No overlap
-# TODO: There is a loop carried dependence from the load to the store but it
-# is not recognised. addLoopCarriedDependences() should be modified to
-# recognise the dependence and enable the following checks.
-# CHECK-AFTER-FIX: Overlap check:
-# CHECK-AFTER-FIX-NEXT: BaseMI: %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
-# CHECK-AFTER-FIX-NEXT: Base + -8 + I * -4, Len: 4
-# CHECK-AFTER-FIX-NEXT: OtherMI: S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
-# CHECK-AFTER-FIX-NEXT: Base + 0 + I * -4, Len: 4
-# CHECK-AFTER-FIX-NEXT: Result: Overlap!
-
--- |
define void @test() {
|

In loop-carried dependency analysis of MachinePipeliner, there is special handling for a specific case, referred to as a "cheap check". This check is not sound and sometimes misses dependencies. If there is no significant performance regression, this special logic should be deleted.
Split off from #135148