Skip to content

Commit 623145b

Browse files
Merge pull request #50 from Xilinx/sanubola.waw.mutator.liveness
Supply interblock liveness information to WAW rewriter
2 parents 1fdf9b0 + 512694e commit 623145b

File tree

18 files changed

+763
-60
lines changed

18 files changed

+763
-60
lines changed

llvm/lib/Target/AIE/AIEBaseSubtarget.cpp

+53-7
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,8 @@ void dumpDependencies(ScheduleDAGInstrs *DAG, SDep::Kind depType,
425425
/// live set of MBB, backtrack the DAG and update the live set. Whenever an edge
426426
/// points to a non-live write, it is updated to the subsequent live write.
427427
class WAWEdges : public ScheduleDAGMutation {
428+
429+
AIEPostRASchedStrategy *Scheduler = nullptr;
428430
// Collect all edges in a separate vector. This allows modifying SU.Preds
429431
// without invalidating iterators.
430432
SmallVector<SDep, 4> getPreds(SUnit &SU) {
@@ -446,17 +448,41 @@ class WAWEdges : public ScheduleDAGMutation {
446448
}
447449
}
448450
}
451+
452+
public:
453+
void setScheduler(AIEPostRASchedStrategy *Scheduler) {
454+
this->Scheduler = Scheduler;
455+
}
456+
449457
void apply(ScheduleDAGInstrs *DAG) override {
450458
MachineFunction &MF = DAG->MF;
451459
MachineRegisterInfo &MRI = MF.getRegInfo();
452460
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
453461
auto *RI = static_cast<const AIEBaseRegisterInfo *>(TRI);
454462
LivePhysRegs LiveRegs;
455463
LiveRegs.init(*TRI);
456-
// Reserved registers are considered always live
457-
for (MCPhysReg PhysReg : MRI.getReservedRegs().set_bits()) {
458-
if (RI->isSimplifiableReservedReg(PhysReg))
459-
LiveRegs.addReg(PhysReg);
464+
bool AddReservedRegs = true;
465+
if (Scheduler) {
466+
MachineBasicBlock *MBB = DAG->getBB();
467+
const BlockState &BS = Scheduler->getInterBlock().getBlockState(MBB);
468+
auto Region = BS.getCurrentRegion();
469+
auto BottomRegion = BS.getBottom();
470+
if (*Region.begin() == *BottomRegion.begin()) {
471+
// If the region is bottom region, liveouts of region are same as
472+
// liveouts of the MBB
473+
for (const MCPhysReg Reg : BS.LiveOuts) {
474+
LiveRegs.addReg(Reg);
475+
}
476+
AddReservedRegs = false;
477+
}
478+
}
479+
480+
if (AddReservedRegs) {
481+
// Reserved registers are considered always live
482+
for (const MCPhysReg PhysReg : MRI.getReservedRegs().set_bits()) {
483+
if (RI->isSimplifiableReservedReg(PhysReg))
484+
LiveRegs.addReg(PhysReg);
485+
}
460486
}
461487
// Stores latest live write of physical register.
462488
std::map<Register, SUnit *> PhysRegWriters;
@@ -482,6 +508,26 @@ class WAWEdges : public ScheduleDAGMutation {
482508
};
483509
};
484510

511+
// Adds WAW edges for scheduling in the context of the Scheduler.
512+
// This class extends WAWEdges to apply WAW edges using a Scheduler if available
513+
// It overrides the apply method to retrieve the Scheduler from the DAG if a
514+
// BasicBlock is present, otherwise, it uses nullptr.
515+
class MachineSchedWAWEdges : public WAWEdges {
516+
void apply(ScheduleDAGInstrs *DAG) override {
517+
AIEPostRASchedStrategy *Scheduler =
518+
DAG->getBB() ? static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl()
519+
: nullptr;
520+
setScheduler(Scheduler);
521+
WAWEdges::apply(DAG);
522+
}
523+
};
524+
525+
// This class extends WAWEdges to apply WAW edges without using a Scheduler.
526+
// This is useful for scenarios where the SWP (Software Pipelining) is performed
527+
// independently of the Scheduler.
528+
class SWPWAWEdges : public WAWEdges {
529+
void apply(ScheduleDAGInstrs *DAG) override { WAWEdges::apply(DAG); }
530+
};
485531
} // namespace
486532

487533
std::vector<std::unique_ptr<ScheduleDAGMutation>>
@@ -491,7 +537,7 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) {
491537
if (!TT.isAIE1()) {
492538
Mutations.emplace_back(std::make_unique<RegionEndEdges>());
493539
Mutations.emplace_back(std::make_unique<MemoryEdges>());
494-
Mutations.emplace_back(std::make_unique<WAWEdges>());
540+
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>());
495541
}
496542
return Mutations;
497543
}
@@ -504,7 +550,7 @@ AIEBaseSubtarget::getInterBlockMutationsImpl(const Triple &TT) {
504550
if (!TT.isAIE1()) {
505551
Mutations.emplace_back(std::make_unique<RegionEndEdges>());
506552
Mutations.emplace_back(std::make_unique<MemoryEdges>());
507-
Mutations.emplace_back(std::make_unique<WAWEdges>());
553+
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>());
508554
}
509555
return Mutations;
510556
}
@@ -523,7 +569,7 @@ std::vector<std::unique_ptr<ScheduleDAGMutation>>
523569
AIEBaseSubtarget::getSMSMutationsImpl(const Triple &TT) {
524570
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
525571
if (!TT.isAIE1()) {
526-
Mutations.emplace_back(std::make_unique<WAWEdges>());
572+
Mutations.emplace_back(std::make_unique<SWPWAWEdges>());
527573
if (EnablePipelinerSchedPropagateIncomingLatencies)
528574
Mutations.emplace_back(std::make_unique<PropagateIncomingLatencies>());
529575
}

llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "AIEInterBlockScheduling.h"
12+
#include "AIELiveRegs.h"
1213
#include "AIEMaxLatencyFinder.h"
1314
#include "llvm/ADT/PostOrderIterator.h"
1415
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -111,9 +112,28 @@ void InterBlockScheduling::enterFunction(MachineFunction *MF) {
111112
// Get ourselves a hazard recognizer
112113
HR = std::make_unique<AIEHazardRecognizer>(MF->getSubtarget());
113114

115+
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
116+
LiveRegs MBBLiveness(MF);
117+
const std::map<const MachineBasicBlock *, LivePhysRegs> &LiveIns =
118+
MBBLiveness.getLiveIns();
119+
114120
// Define our universe of blocks
115121
for (MachineBasicBlock &MBB : *MF) {
116-
Blocks.emplace(&MBB, &MBB);
122+
auto Itr = Blocks.emplace(&MBB, &MBB).first;
123+
BlockState &BS = Itr->second;
124+
BS.LiveOuts.init(*TRI);
125+
// Calculating LiveOuts by iterating over each successor of the MBB and
126+
// adding each successor's LiveIns to LiveOuts.
127+
for (const MachineBasicBlock *Succ : MBB.successors()) {
128+
const LivePhysRegs &MBBLiveins = LiveIns.at(Succ);
129+
for (const MCPhysReg Reg : MBBLiveins) {
130+
BS.LiveOuts.addReg(Reg);
131+
}
132+
}
133+
LLVM_DEBUG({
134+
dbgs() << MBB.getFullName() << " LiveOuts\n";
135+
BS.LiveOuts.dump();
136+
});
117137
}
118138
if (LoopAware) {
119139
// Mark epilogues of the loops we found. This is only necessary if

llvm/lib/Target/AIE/AIEInterBlockScheduling.h

+1
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ class BlockState {
165165
MachineBasicBlock *TheBlock = nullptr;
166166
FixedpointState FixPoint;
167167
BlockType Kind = BlockType::Regular;
168+
LivePhysRegs LiveOuts;
168169
void initInterBlock(const MachineSchedContext &Context);
169170

170171
// Concatenate Bundles to the current region

llvm/lib/Target/AIE/AIELiveRegs.cpp

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
//===- AIELiveRegs.cpp - Liveness Analysis infrastructure -----------------===//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
8+
//
9+
//===----------------------------------------------------------------------===//
10+
// Implementations of the classes used to support Liveness Analysis of all
11+
// physical registers, including reserved registers.
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "AIELiveRegs.h"
15+
#include "llvm/ADT/PostOrderIterator.h"
16+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
17+
18+
using namespace llvm;
19+
20+
namespace llvm::AIE {
21+
22+
void LiveRegs::addToWorkList(const MachineBasicBlock *MBB) {
23+
auto It = InWorkList.find(MBB);
24+
if (It == InWorkList.end() || !It->second) {
25+
WorkList.push(MBB);
26+
It->second = true;
27+
}
28+
}
29+
30+
const MachineBasicBlock *LiveRegs::popFromWorkList() {
31+
const MachineBasicBlock *MBB = WorkList.front();
32+
WorkList.pop();
33+
InWorkList[MBB] = false;
34+
return MBB;
35+
}
36+
37+
void LiveRegs::computeBlockLiveIns(const MachineBasicBlock *MBB,
38+
LivePhysRegs &CurrentLive) {
39+
CurrentLive.init(*TRI);
40+
41+
// Calculates CurrentLive by iterating over each successor of the current
42+
// MBB and adding each successor's LiveIns to CurrentLive.
43+
// This ensures CurrentLive is the union of LiveIns of all successor MBBs.
44+
for (const MachineBasicBlock *Succ : MBB->successors()) {
45+
for (MCPhysReg Reg : LiveIns[Succ]) {
46+
CurrentLive.addReg(Reg);
47+
}
48+
}
49+
// Calculates CurrentLive by bottom-up traversal of the MBB
50+
for (const MachineInstr &MI : llvm::reverse(*MBB)) {
51+
CurrentLive.stepBackward(MI);
52+
}
53+
}
54+
55+
bool LiveRegs::equal(LivePhysRegs &CurrentLive, LivePhysRegs &OldLive) {
56+
int LiveInCount = 0;
57+
bool Equal = true;
58+
59+
for (const MCPhysReg Reg : OldLive) {
60+
if (!CurrentLive.contains(Reg)) {
61+
Equal = false;
62+
}
63+
LiveInCount++;
64+
}
65+
66+
for (const MCPhysReg Reg : CurrentLive) {
67+
LiveInCount--;
68+
}
69+
return LiveInCount == 0 && Equal;
70+
}
71+
72+
void LiveRegs::updateLiveRegs(LivePhysRegs &CurrentLive,
73+
LivePhysRegs &OldLive) {
74+
OldLive.init(*TRI);
75+
for (const MCPhysReg Reg : CurrentLive) {
76+
OldLive.addReg(Reg);
77+
}
78+
}
79+
80+
const std::map<const MachineBasicBlock *, LivePhysRegs> &
81+
LiveRegs::getLiveIns() const {
82+
return LiveIns;
83+
}
84+
85+
LiveRegs::LiveRegs(const MachineFunction *MF) {
86+
const TargetRegisterInfo *RegisterInfo = MF->getSubtarget().getRegisterInfo();
87+
TRI = RegisterInfo;
88+
89+
// Using post_order optimizes by minimizing re-runs, though it's not required
90+
// for correctness. Post-order ensures that we process the basic blocks in a
91+
// way that naturally accommodates dependencies and minimizes redundant work.
92+
for (const MachineBasicBlock *MBB : post_order(MF)) {
93+
LiveIns[MBB].init(*TRI);
94+
WorkList.push(MBB);
95+
InWorkList[MBB] = true;
96+
}
97+
while (!WorkList.empty()) {
98+
const MachineBasicBlock *MBB = popFromWorkList();
99+
LivePhysRegs CurrentLive;
100+
computeBlockLiveIns(MBB, CurrentLive);
101+
102+
// If MBB's liveins changed, force a recomputation for MBB's preds
103+
if (equal(CurrentLive, LiveIns[MBB]))
104+
continue;
105+
106+
updateLiveRegs(CurrentLive, LiveIns[MBB]);
107+
for (const MachineBasicBlock *Pred : MBB->predecessors())
108+
addToWorkList(Pred);
109+
}
110+
}
111+
112+
} // namespace llvm::AIE

llvm/lib/Target/AIE/AIELiveRegs.h

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//===- AIELiveRegs.h - Liveness Analysis logic -*- C++ -*------------------===//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
8+
//
9+
//===----------------------------------------------------------------------===//
10+
//
11+
// Class providing services for Liveness Analysis.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_LIB_TARGET_AIE_AIELIVEREGS_H
16+
#define LLVM_LIB_TARGET_AIE_AIELIVEREGS_H
17+
18+
#include "llvm/CodeGen/LivePhysRegs.h"
19+
#include "llvm/CodeGen/MachineBasicBlock.h"
20+
#include "llvm/CodeGen/MachineFunction.h"
21+
#include "llvm/CodeGen/TargetRegisterInfo.h"
22+
#include <map>
23+
#include <queue>
24+
#include <set>
25+
26+
namespace llvm::AIE {
27+
28+
class LiveRegs {
29+
// Mapping from Machine Basic Blocks to their livein registers.
30+
std::map<const MachineBasicBlock *, LivePhysRegs> LiveIns;
31+
32+
// Queue to manage the order of MBBs to be processed.
33+
std::queue<const MachineBasicBlock *> WorkList;
34+
35+
// Map to track whether an MBB is in the WorkList.
36+
std::map<const MachineBasicBlock *, bool> InWorkList;
37+
38+
// Pointer to Target Register Information, used for initializing livein
39+
// registers.
40+
const TargetRegisterInfo *TRI;
41+
42+
// Adds a Machine Basic Block to the work list if it is not already present.
43+
void addToWorkList(const MachineBasicBlock *MBB);
44+
45+
// Pops a Machine Basic Block from the work list, removes it from the work
46+
// list set, and returns the popped MBB.
47+
const MachineBasicBlock *popFromWorkList();
48+
49+
// Computes the live-in registers for a given Machine Basic Block.
50+
void computeBlockLiveIns(const MachineBasicBlock *MBB,
51+
LivePhysRegs &CurrentLive);
52+
53+
// Checks if the CurrentLive is the same as the liveins of the given MBB.
54+
static bool equal(LivePhysRegs &CurrentLive, LivePhysRegs &OldLive);
55+
56+
// Updates OldLive registers with CurrentLive registers.
57+
void updateLiveRegs(LivePhysRegs &CurrentLive, LivePhysRegs &OldLive);
58+
59+
public:
60+
const std::map<const MachineBasicBlock *, LivePhysRegs> &getLiveIns() const;
61+
LiveRegs(const MachineFunction *MF);
62+
};
63+
64+
} // end namespace llvm::AIE
65+
66+
#endif // LLVM_LIB_TARGET_AIE_AIELIVEREGS_H

llvm/lib/Target/AIE/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ add_llvm_target(AIECodeGen
7676
AIEInterBlockScheduling.cpp
7777
AIEISelDAGToDAG.cpp
7878
AIELegalizerInfo.cpp
79+
AIELiveRegs.cpp
7980
AIEMachineAlignment.cpp
8081
AIEMachineBlockPlacement.cpp
8182
AIEMachineFunctionInfo.cpp

llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll

+5-11
Original file line numberDiff line numberDiff line change
@@ -39,32 +39,26 @@ define void @nested(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
3939
; CHECK-NEXT: .LBB0_2: // %for.body6
4040
; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
4141
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
42-
; CHECK-NEXT: nopa ; lshl r7, r6, r4
42+
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r7, r6, r4; nopm ; nopv
4343
; CHECK-NEXT: mov dj0, r7
4444
; CHECK-NEXT: lda r7, [p4, dj0]
4545
; CHECK-NEXT: nop
4646
; CHECK-NEXT: nop
47-
; CHECK-NEXT: nop
48-
; CHECK-NEXT: nop
49-
; CHECK-NEXT: nop
50-
; CHECK-NEXT: add r6, r6, #1
51-
; CHECK-NEXT: add r2, r2, r7
5247
; CHECK-NEXT: jnzd r5, r5, p2
5348
; CHECK-NEXT: nop // Delay Slot 5
5449
; CHECK-NEXT: nop // Delay Slot 4
55-
; CHECK-NEXT: nop // Delay Slot 3
56-
; CHECK-NEXT: nop // Delay Slot 2
50+
; CHECK-NEXT: add r6, r6, #1 // Delay Slot 3
51+
; CHECK-NEXT: add r2, r2, r7 // Delay Slot 2
5752
; CHECK-NEXT: st r2, [p0, #0] // Delay Slot 1
5853
; CHECK-NEXT: .p2align 4
5954
; CHECK-NEXT: // %bb.3: // %for.cond3.for.cond.cleanup5_crit_edge
6055
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
61-
; CHECK-NEXT: nopb ; nopa ; nops ; add r3, r3, #1; nopm ; nopv
62-
; CHECK-NEXT: nopa ; jnzd r0, r0, p3
56+
; CHECK-NEXT: jnzd r0, r0, p3
6357
; CHECK-NEXT: nop // Delay Slot 5
6458
; CHECK-NEXT: nop // Delay Slot 4
6559
; CHECK-NEXT: nop // Delay Slot 3
6660
; CHECK-NEXT: nop // Delay Slot 2
67-
; CHECK-NEXT: nop // Delay Slot 1
61+
; CHECK-NEXT: add r3, r3, #1 // Delay Slot 1
6862
; CHECK-NEXT: .p2align 4
6963
; CHECK-NEXT: // %bb.4: // %for.cond.cleanup
7064
; CHECK-NEXT: nopa ; ret lr

0 commit comments

Comments
 (0)