-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[MachinePipelner] Add loop-carried dependencies for global barriers #174391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/kasuga-fj/pipeliner-remove-performcheap
Are you sure you want to change the base?
[MachinePipelner] Add loop-carried dependencies for global barriers #174391
Conversation
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
02c7724 to
804b65b
Compare
|
@llvm/pr-subscribers-backend-hexagon Author: Ryotaro Kasuga (kasuga-fj) ChangesThe loads/stores must not be reordered across barrier instructions. However, in MachinePipeliner, it potentially could happen since loop-carried dependencies from loads/stores to a barrier instruction were not considered. The same problem exists for barrier-to-barrier dependencies. This patch adds the handling for those cases. The implementation is based on that of Split off from #135148 Full diff: https://github.com/llvm/llvm-project/pull/174391.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 6b022783f4bb8..4c8ef0aaa3d66 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -349,6 +349,10 @@ class LoopCarriedOrderDepsTracker {
const SUnitWithMemInfo &Dst);
void computeDependenciesAux();
+
+ void setLoopCarriedDep(const SUnit *Src, const SUnit *Dst) {
+ LoopCarried[Src->NodeNum].set(Dst->NodeNum);
+ }
};
} // end anonymous namespace
@@ -1137,7 +1141,7 @@ void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
return;
if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG))
- LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
+ setLoopCarriedDep(Src.SU, Dst.SU);
}
void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
@@ -1160,11 +1164,16 @@ void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
SmallVector<LoadStoreChunk, 2> Chunks(1);
+ SUnit *FirstBarrier = nullptr;
+ SUnit *LastBarrier = nullptr;
for (const auto &TSU : TaggedSUnits) {
InstrTag Tag = TSU.getTag();
SUnit *SU = TSU.getPointer();
switch (Tag) {
case InstrTag::Barrier:
+ if (!FirstBarrier)
+ FirstBarrier = SU;
+ LastBarrier = SU;
Chunks.emplace_back();
break;
case InstrTag::LoadOrStore:
@@ -1182,9 +1191,47 @@ void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
for (const LoadStoreChunk &Chunk : Chunks)
addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
- // TODO: If there are multiple barrier instructions, dependencies from the
- // last barrier instruction (or load/store below it) to the first barrier
- // instruction (or load/store above it).
+ // There is no barrier instruction between load/store instructions in the same
+ // LoadStoreChunk. If there are one or more barrier instructions, the
+ // instructions sequence is as follows:
+ //
+ // Loads/Stores (Chunks.front())
+ // Barrier (FirstBarrier)
+ // Loads/Stores
+ // Barrier
+ // ...
+ // Loads/Stores
+ // Barrier (LastBarrier)
+ // Loads/Stores (Chunks.back())
+ //
+ // Since loads/stores must not be reordered across barrier instructions, and
+ // the order of barrier instructions must be preserved, add the following
+ // loop-carried dependences:
+ //
+ // Loads/Stores (Chunks.front()) <-----+
+ // +--> Barrier (FirstBarrier) <---------+ |
+ // | Loads/Stores | |
+ // | Barrier | |
+ // | ... | |
+ // | Loads/Stores | |
+ // | Barrier (LastBarrier) -----------+--+
+ // +--- Loads/Stores (Chunks.back())
+ //
+ if (FirstBarrier) {
+ assert(LastBarrier && "Both barriers should be set.");
+ for (const SUnitWithMemInfo &Dst : Chunks.front().Loads)
+ setLoopCarriedDep(LastBarrier, Dst.SU);
+ for (const SUnitWithMemInfo &Dst : Chunks.front().Stores)
+ setLoopCarriedDep(LastBarrier, Dst.SU);
+
+ for (const SUnitWithMemInfo &Src : Chunks.back().Loads)
+ setLoopCarriedDep(Src.SU, FirstBarrier);
+ for (const SUnitWithMemInfo &Src : Chunks.back().Stores)
+ setLoopCarriedDep(Src.SU, FirstBarrier);
+
+ if (FirstBarrier != LastBarrier)
+ setLoopCarriedDep(LastBarrier, FirstBarrier);
+ }
}
/// Add a chain edge between a load and store if the store can be an
diff --git a/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir b/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
index bcc6a3ea9b285..214f7e245030a 100644
--- a/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
+++ b/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
@@ -5,16 +5,24 @@
# floating-point exception, and there is an instruction for barrier event. In
# this case the order of them must not change.
#
+# SU(2): May raise FP exception
+# SU(3): May raise FP exception
+# SU(4): Store
+# SU(5): Barrier
+# SU(7): Barrier
+#
# FIXME: Currently the following dependencies are missed.
#
# Loop carried edges from SU(7)
# Order
# SU(2)
# SU(3)
-# SU(4)
-# SU(5)
# CHECK: ===== Loop Carried Edges Begin =====
+# CHECK-NEXT: Loop carried edges from SU(7)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(4)
+# CHECK-NEXT: SU(5)
# CHECK-NEXT: ===== Loop Carried Edges End =====
--- |
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
index 4281d15377141..8fe3b3d83aa94 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
@@ -19,23 +19,33 @@
# }
# ```
#
-# FIXME: Currently the following dependencies are missed.
-# Loop carried edges from SU(16)
-# Order
-# SU(6)
-# SU(8)
-# SU(10)
-# SU(11)
-# Loop carried edges from SU(17)
-# Order
-# SU(10)
-# SU(11)
-# Loop carried edges from SU(19)
-# Order
-# SU(10)
-# SU(11)
+# SU(6): Load
+# SU(8): Store
+# SU(10): Store
+# SU(11): Barrier
+# SU(16): Barrier
+# SU(17): Load
+# SU(19): Load
+#
+# As the order between load/store and barrier must be preserved, the following
+# loop-carried dependnecies need to be added:
+# - SU(16) -> SU(6), SU(8), SU(10)
+# - SU(17), SU(19) -> SU(11)
+# - SU(16) -> SU(11) (barrier to barrier)
# CHECK: ===== Loop Carried Edges Begin =====
+# CHECK-NEXT: Loop carried edges from SU(16)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(6)
+# CHECK-NEXT: SU(8)
+# CHECK-NEXT: SU(10)
+# CHECK-NEXT: SU(11)
+# CHECK-NEXT: Loop carried edges from SU(17)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(11)
+# CHECK-NEXT: Loop carried edges from SU(19)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(11)
# CHECK-NEXT: ===== Loop Carried Edges End =====
--- |
|
|
@llvm/pr-subscribers-backend-aarch64 Author: Ryotaro Kasuga (kasuga-fj) ChangesThe loads/stores must not be reordered across barrier instructions. However, in MachinePipeliner, it potentially could happen since loop-carried dependencies from loads/stores to a barrier instruction were not considered. The same problem exists for barrier-to-barrier dependencies. This patch adds the handling for those cases. The implementation is based on that of Split off from #135148 Full diff: https://github.com/llvm/llvm-project/pull/174391.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 6b022783f4bb8..4c8ef0aaa3d66 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -349,6 +349,10 @@ class LoopCarriedOrderDepsTracker {
const SUnitWithMemInfo &Dst);
void computeDependenciesAux();
+
+ void setLoopCarriedDep(const SUnit *Src, const SUnit *Dst) {
+ LoopCarried[Src->NodeNum].set(Dst->NodeNum);
+ }
};
} // end anonymous namespace
@@ -1137,7 +1141,7 @@ void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
return;
if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG))
- LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
+ setLoopCarriedDep(Src.SU, Dst.SU);
}
void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
@@ -1160,11 +1164,16 @@ void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
SmallVector<LoadStoreChunk, 2> Chunks(1);
+ SUnit *FirstBarrier = nullptr;
+ SUnit *LastBarrier = nullptr;
for (const auto &TSU : TaggedSUnits) {
InstrTag Tag = TSU.getTag();
SUnit *SU = TSU.getPointer();
switch (Tag) {
case InstrTag::Barrier:
+ if (!FirstBarrier)
+ FirstBarrier = SU;
+ LastBarrier = SU;
Chunks.emplace_back();
break;
case InstrTag::LoadOrStore:
@@ -1182,9 +1191,47 @@ void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
for (const LoadStoreChunk &Chunk : Chunks)
addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
- // TODO: If there are multiple barrier instructions, dependencies from the
- // last barrier instruction (or load/store below it) to the first barrier
- // instruction (or load/store above it).
+ // There is no barrier instruction between load/store instructions in the same
+ // LoadStoreChunk. If there are one or more barrier instructions, the
+ // instructions sequence is as follows:
+ //
+ // Loads/Stores (Chunks.front())
+ // Barrier (FirstBarrier)
+ // Loads/Stores
+ // Barrier
+ // ...
+ // Loads/Stores
+ // Barrier (LastBarrier)
+ // Loads/Stores (Chunks.back())
+ //
+ // Since loads/stores must not be reordered across barrier instructions, and
+ // the order of barrier instructions must be preserved, add the following
+ // loop-carried dependences:
+ //
+ // Loads/Stores (Chunks.front()) <-----+
+ // +--> Barrier (FirstBarrier) <---------+ |
+ // | Loads/Stores | |
+ // | Barrier | |
+ // | ... | |
+ // | Loads/Stores | |
+ // | Barrier (LastBarrier) -----------+--+
+ // +--- Loads/Stores (Chunks.back())
+ //
+ if (FirstBarrier) {
+ assert(LastBarrier && "Both barriers should be set.");
+ for (const SUnitWithMemInfo &Dst : Chunks.front().Loads)
+ setLoopCarriedDep(LastBarrier, Dst.SU);
+ for (const SUnitWithMemInfo &Dst : Chunks.front().Stores)
+ setLoopCarriedDep(LastBarrier, Dst.SU);
+
+ for (const SUnitWithMemInfo &Src : Chunks.back().Loads)
+ setLoopCarriedDep(Src.SU, FirstBarrier);
+ for (const SUnitWithMemInfo &Src : Chunks.back().Stores)
+ setLoopCarriedDep(Src.SU, FirstBarrier);
+
+ if (FirstBarrier != LastBarrier)
+ setLoopCarriedDep(LastBarrier, FirstBarrier);
+ }
}
/// Add a chain edge between a load and store if the store can be an
diff --git a/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir b/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
index bcc6a3ea9b285..214f7e245030a 100644
--- a/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
+++ b/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
@@ -5,16 +5,24 @@
# floating-point exception, and there is an instruction for barrier event. In
# this case the order of them must not change.
#
+# SU(2): May raise FP exception
+# SU(3): May raise FP exception
+# SU(4): Store
+# SU(5): Barrier
+# SU(7): Barrier
+#
# FIXME: Currently the following dependencies are missed.
#
# Loop carried edges from SU(7)
# Order
# SU(2)
# SU(3)
-# SU(4)
-# SU(5)
# CHECK: ===== Loop Carried Edges Begin =====
+# CHECK-NEXT: Loop carried edges from SU(7)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(4)
+# CHECK-NEXT: SU(5)
# CHECK-NEXT: ===== Loop Carried Edges End =====
--- |
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
index 4281d15377141..8fe3b3d83aa94 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
@@ -19,23 +19,33 @@
# }
# ```
#
-# FIXME: Currently the following dependencies are missed.
-# Loop carried edges from SU(16)
-# Order
-# SU(6)
-# SU(8)
-# SU(10)
-# SU(11)
-# Loop carried edges from SU(17)
-# Order
-# SU(10)
-# SU(11)
-# Loop carried edges from SU(19)
-# Order
-# SU(10)
-# SU(11)
+# SU(6): Load
+# SU(8): Store
+# SU(10): Store
+# SU(11): Barrier
+# SU(16): Barrier
+# SU(17): Load
+# SU(19): Load
+#
+# As the order between load/store and barrier must be preserved, the following
+# loop-carried dependnecies need to be added:
+# - SU(16) -> SU(6), SU(8), SU(10)
+# - SU(17), SU(19) -> SU(11)
+# - SU(16) -> SU(11) (barrier to barrier)
# CHECK: ===== Loop Carried Edges Begin =====
+# CHECK-NEXT: Loop carried edges from SU(16)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(6)
+# CHECK-NEXT: SU(8)
+# CHECK-NEXT: SU(10)
+# CHECK-NEXT: SU(11)
+# CHECK-NEXT: Loop carried edges from SU(17)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(11)
+# CHECK-NEXT: Loop carried edges from SU(19)
+# CHECK-NEXT: Order
+# CHECK-NEXT: SU(11)
# CHECK-NEXT: ===== Loop Carried Edges End =====
--- |
|

The loads/stores must not be reordered across barrier instructions. However, in MachinePipeliner, it potentially could happen since loop-carried dependencies from loads/stores to a barrier instruction were not considered. The same problem exists for barrier-to-barrier dependencies. This patch adds the handling for those cases. The implementation is based on that of
ScheduleDAGInstrs::buildSchedGraph.Split off from #135148