Skip to content

Conversation

@kasuga-fj
Copy link
Contributor

@kasuga-fj kasuga-fj commented Jan 5, 2026

In loop-carried dependency analysis of MachinePipeliner, there is special handling for a specific case, referred to as a "cheap check". This check is not sound and sometimes misses dependencies. If there is no significant performance regression, this special logic should be deleted.

Split off from #135148

@llvmbot
Copy link
Member

llvmbot commented Jan 5, 2026

@llvm/pr-subscribers-backend-hexagon

Author: Ryotaro Kasuga (kasuga-fj)

Changes

In loop-carried dependency analysis, there is special handling for a specific case, referred to as a "cheap check". This check is not sound and sometimes misses dependencies. If there is no significant performance regression, this special logic should be deleted.

Split off from #135148


Full diff: https://github.com/llvm/llvm-project/pull/174390.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+11-40)
  • (modified) llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir (+7-1)
  • (modified) llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir (+9-12)
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 59ead74da6fb6..6b022783f4bb8 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -344,15 +344,9 @@ class LoopCarriedOrderDepsTracker {
                                           const LoadStoreChunk &To);
 
   /// Add a loop-carried order dependency between \p Src and \p Dst if we
-  /// cannot prove they are independent. When \p PerformCheapCheck is true, a
-  /// lightweight dependency test (referred to as "cheap check" below) is
-  /// performed at first. Note that the cheap check is retained to maintain the
-  /// existing behavior and not expected to be used anymore.
-  ///
-  /// TODO: Remove \p PerformCheapCheck and the corresponding cheap check.
+  /// cannot prove they are independent.
   void addDependenciesBetweenSUs(const SUnitWithMemInfo &Src,
-                                 const SUnitWithMemInfo &Dst,
-                                 bool PerformCheapCheck = false);
+                                 const SUnitWithMemInfo &Dst);
 
   void computeDependenciesAux();
 };
@@ -1051,11 +1045,12 @@ bool SUnitWithMemInfo::getUnderlyingObjects() {
 
 /// Returns true if there is a loop-carried order dependency from \p Src to \p
 /// Dst.
-static bool
-hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
-                     BatchAAResults &BAA, const TargetInstrInfo *TII,
-                     const TargetRegisterInfo *TRI,
-                     const SwingSchedulerDAG *SSD, bool PerformCheapCheck) {
+static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src,
+                                 const SUnitWithMemInfo &Dst,
+                                 BatchAAResults &BAA,
+                                 const TargetInstrInfo *TII,
+                                 const TargetRegisterInfo *TRI,
+                                 const SwingSchedulerDAG *SSD) {
   if (Src.isTriviallyDisjoint(Dst))
     return false;
   if (isSuccOrder(Src.SU, Dst.SU))
@@ -1063,28 +1058,6 @@ hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
 
   MachineInstr &SrcMI = *Src.SU->getInstr();
   MachineInstr &DstMI = *Dst.SU->getInstr();
-  if (PerformCheapCheck) {
-    // First, perform the cheaper check that compares the base register.
-    // If they are the same and the load offset is less than the store
-    // offset, then mark the dependence as loop carried potentially.
-    //
-    // TODO: This check will be removed.
-    const MachineOperand *BaseOp1, *BaseOp2;
-    int64_t Offset1, Offset2;
-    bool Offset1IsScalable, Offset2IsScalable;
-    if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
-                                     TRI) &&
-        TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
-                                     TRI)) {
-      if (BaseOp1->isIdenticalTo(*BaseOp2) &&
-          Offset1IsScalable == Offset2IsScalable &&
-          (int)Offset1 < (int)Offset2) {
-        assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
-               "What happened to the chain edge?");
-        return true;
-      }
-    }
-  }
 
   if (!SSD->mayOverlapInLaterIter(&SrcMI, &DstMI))
     return false;
@@ -1158,13 +1131,12 @@ LoopCarriedOrderDepsTracker::getInstrTag(SUnit *SU) const {
 }
 
 void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
-    const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
-    bool PerformCheapCheck) {
+    const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst) {
   // Avoid self-dependencies.
   if (Src.SU == Dst.SU)
     return;
 
-  if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG, PerformCheapCheck))
+  if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG))
     LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
 }
 
@@ -1173,8 +1145,7 @@ void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
   // Add load-to-store dependencies (WAR).
   for (const SUnitWithMemInfo &Src : From.Loads)
     for (const SUnitWithMemInfo &Dst : To.Stores)
-      // Perform a cheap check first if this is a forward dependency.
-      addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
+      addDependenciesBetweenSUs(Src, Dst);
 
   // Add store-to-load dependencies (RAW).
   for (const SUnitWithMemInfo &Src : From.Stores)
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
index afc989cbc6921..8ff61fc4b11ab 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
@@ -3,7 +3,13 @@
 
 # Test that the loop carried dependence check correctly identifies a recurrence.
 
-# CHECK: Overlap check:
+# CHECK:      Overlap check:
+# CHECK-NEXT:   BaseMI:   %17:intregs = L2_loadrh_io %12:intregs, -8 :: (load (s16) from %ir.cgep10)
+# CHECK-NEXT:     Base + -8 + I * 4, Len: 2
+# CHECK-NEXT:   OtherMI:   S2_storerh_io %12:intregs, 0, %18:intregs :: (store (s16) into %ir.lsr.iv24)
+# CHECK-NEXT:     Base + 0 + I * 4, Len: 2
+# CHECK-NEXT:   Result: No overlap
+# CHECK:      Overlap check:
 # CHECK-NEXT:   BaseMI:   S2_storerh_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s16) into %ir.lsr.iv24)
 # CHECK-NEXT:     Base + 0 + I * 4, Len: 2
 # CHECK-NEXT:   OtherMI:   %{{[0-9]+}}:intregs = L2_loadrh_io %{{[0-9]+}}:intregs, -8 :: (load (s16) from %ir.cgep10)
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
index e16334ba7978f..fcab9613b6ec9 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
@@ -4,24 +4,21 @@
 # Test that the loop carried dependence check correctly identifies dependences
 # when the loop variable decreases and the array index offset is negative.
 
-# No dependence from the store to the load.
-# CHECK: Overlap check:
+# No dependence from the store to the load, but there is a dependence from the
+# load to the store.
+# CHECK:      Overlap check:
+# CHECK-NEXT:   BaseMI:   %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
+# CHECK-NEXT:     Base + -8 + I * -4, Len: 4
+# CHECK-NEXT:   OtherMI:   S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
+# CHECK-NEXT:     Base + 0 + I * -4, Len: 4
+# CHECK-NEXT:   Result: Overlap
+# CHECK:      Overlap check:
 # CHECK-NEXT:   BaseMI:   S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
 # CHECK-NEXT:     Base + 0 + I * -4, Len: 4
 # CHECK-NEXT:   OtherMI:   %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
 # CHECK-NEXT:     Base + -8 + I * -4, Len: 4
 # CHECK-NEXT:   Result: No overlap
 
-# TODO: There is a loop carried dependence from the load to the store but it
-#   is not recognised. addLoopCarriedDependences() should be modified to
-#   recognise the dependence and enable the following checks.
-# CHECK-AFTER-FIX: Overlap check:
-# CHECK-AFTER-FIX-NEXT:   BaseMI:   %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
-# CHECK-AFTER-FIX-NEXT:     Base + -8 + I * -4, Len: 4
-# CHECK-AFTER-FIX-NEXT:   OtherMI:   S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
-# CHECK-AFTER-FIX-NEXT:     Base + 0 + I * -4, Len: 4
-# CHECK-AFTER-FIX-NEXT:   Result: Overlap!
-
 --- |
 
   define void @test() {

@kasuga-fj kasuga-fj changed the title [MachinePipeliner] Remove cheap check in dependency analysis [MachinePipeliner] Remove cheap check in dependence analysis Jan 5, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants