Skip to content

Commit c52bfcb

Browse files
committed
[AIEX] Make "bottom-fixed" instructions part of the sched region
This means they will be placed by the scheduler, and once they are placed, other "free" instructions can be bundled with them.
1 parent d1b2cb5 commit c52bfcb

18 files changed

+326
-184
lines changed

llvm/lib/Target/AIE/AIEBaseSubtarget.cpp

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -275,27 +275,14 @@ class RegionEndEdges : public ScheduleDAGMutation {
275275
assert(EdgeLatency < DelaySlots);
276276
EdgeLatency = DelaySlots + 1;
277277
}
278+
278279
// Between writing Registers (lc, le, ls) and the end of the loop,
279280
// there must be a distance of 112 bytes in terms of PM addresses.
280281
// 112 bytes correspond to 7 fully-expanded 128-bit instructions and
281282
// hence adding a latency of 8 from LoopStart to the ExitSU.
282-
// We can subtract the number of bundles that interblock pushed into
283-
// BottomInsert
284-
// FIXME: this holds as long as we insert them unconditionally. If we
285-
// integrate them with the bottom region, we just need to keep 8 away
286-
// from ExitSU
287283
if (TII->isZeroOverheadLoopSetupInstr(MI)) {
288-
unsigned PatchCycles = 8;
289-
if (DAG->getBB()) {
290-
auto *Scheduler =
291-
static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl();
292-
auto &InterBlock = Scheduler->getInterBlock();
293-
unsigned InsertedCycles =
294-
InterBlock.getBlockState(DAG->getBB()).BottomInsert.size();
295-
PatchCycles =
296-
PatchCycles >= InsertedCycles ? PatchCycles - InsertedCycles : 0;
297-
}
298-
EdgeLatency = std::max(EdgeLatency, PatchCycles);
284+
const unsigned ZOLDistance = 8;
285+
EdgeLatency = std::max(EdgeLatency, ZOLDistance);
299286
}
300287

301288
ExitDep.setLatency(EdgeLatency);
@@ -318,6 +305,63 @@ class RegionEndEdges : public ScheduleDAGMutation {
318305
};
319306
};
320307

308+
/// This Mutator is responsible for emitting "fixed" SUnits at the top or bottom
309+
/// of the region. These special SUnits require a specific cycle and cannot be
310+
/// placed freely by the scheduler.
311+
///
312+
/// Here, these special SUnits get created from Region::top_fixed_instrs() or
313+
/// Region::bot_fixed_instrs(), and dependencies are created between "free" and
314+
/// "fixed" SUnits.
315+
class EmitFixedSUnits : public ScheduleDAGMutation {
316+
public:
317+
void apply(ScheduleDAGInstrs *DAG) override {
318+
AIEPostRASchedStrategy *Scheduler =
319+
static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl();
320+
auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII);
321+
auto *ItinData = DAG->MF.getSubtarget().getInstrItineraryData();
322+
const BlockState &BS =
323+
Scheduler->getInterBlock().getBlockState(DAG->getBB());
324+
const Region &CurRegion = BS.getCurrentRegion();
325+
326+
// First, create SUnits for all "fixed" instructions
327+
// Those will be chained from/to the EntrySU/ExitSU to ensure they are
328+
// placed in the correct cycle. The scheduler will enforce that these fixed
329+
// SUnits get placed exactly at their depth (for the Top zone) or height
330+
// (for the Bot zone).
331+
SUnit *Succ = &DAG->ExitSU;
332+
for (MachineInstr &MI : reverse(CurRegion.bot_fixed_instrs())) {
333+
SUnit &FixedSU = Scheduler->addFixedSUnit(MI, /*IsTop=*/false);
334+
SDep Dep(&FixedSU, SDep::Artificial);
335+
Dep.setLatency(Succ == &DAG->ExitSU ? 0 : 1);
336+
Succ->addPred(Dep);
337+
Succ = &FixedSU;
338+
}
339+
DAG->makeMaps();
340+
341+
// Then, create dependencies between "free" and "fixed" instructions
342+
auto IsFreeSU = [Scheduler](const SUnit &SU) {
343+
return Scheduler->isFreeSU(SU);
344+
};
345+
ArrayRef<AIE::MachineBundle> BotFixedBundles =
346+
CurRegion.getBotFixedBundles();
347+
for (SUnit &FreeSU : make_filter_range(DAG->SUnits, IsFreeSU)) {
348+
const MachineInstr &MI = *FreeSU.getInstr();
349+
MachineInstr *FixedDepMI =
350+
AIE::findEarliestRef(MI, BotFixedBundles, BotFixedBundles.size()).MI;
351+
if (!FixedDepMI)
352+
continue;
353+
354+
SUnit *FixedDepSU =
355+
DAG->getSUnit(&*getBundleStart(FixedDepMI->getIterator()));
356+
assert(FixedDepSU && "Fixed Bundle has no corresponding SU.");
357+
SDep Dep(&FreeSU, SDep::Artificial);
358+
Dep.setLatency(
359+
AIE::maxLatency(&MI, *TII, *ItinData, /*IncludeStages=*/true));
360+
FixedDepSU->addPred(Dep, /*Required=*/true);
361+
}
362+
}
363+
};
364+
321365
/// Collect all "weak" edges in a separate vector. This allows modifying
322366
/// \p SU.Preds without invalidating iterators.
323367
SmallVector<SDep, 4> getWeakPreds(SUnit &SU) {
@@ -664,6 +708,7 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) {
664708
Mutations.emplace_back(std::make_unique<MemoryEdges>());
665709
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>());
666710
Mutations.emplace_back(std::make_unique<BiasDepth>());
711+
Mutations.emplace_back(std::make_unique<EmitFixedSUnits>());
667712
}
668713
return Mutations;
669714
}

llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,9 @@ void InterBlockScheduling::enterRegion(MachineBasicBlock *BB,
717717
if (BS.Kind != BlockType::Loop ||
718718
BS.FixPoint.Stage == SchedulingStage::GatheringRegions) {
719719
ArrayRef<MachineBundle> TopFixedBundles;
720-
ArrayRef<MachineBundle> BotFixedBundles;
720+
ArrayRef<MachineBundle> BotFixedBundles =
721+
RegionEnd == BB->end() ? ArrayRef<MachineBundle>(BS.BottomInsert)
722+
: ArrayRef<MachineBundle>();
721723
BS.addRegion(BB, RegionBegin, RegionEnd, TopFixedBundles, BotFixedBundles);
722724
}
723725
}
@@ -830,7 +832,7 @@ void InterBlockScheduling::emitInterBlockBottom(const BlockState &BS) const {
830832
assert(PreHeader->end() == PreHeader->getFirstTerminator() &&
831833
"PreHeader is not fall-through");
832834
emitBundles(BS.BottomInsert, PreHeader, PreHeader->end(), /*Move=*/false,
833-
/*EmitNops=*/true);
835+
/*EmitNops=*/false);
834836
}
835837

836838
int InterBlockScheduling::getCyclesToRespectTiming(

llvm/lib/Target/AIE/AIEMachineScheduler.cpp

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ void AIEPostRASchedStrategy::initializeBotScoreBoard(ScoreboardTrust Trust) {
270270
assert(!doMBBSchedRegionsTopDown());
271271
AIEHazardRecognizer *BotHazardRec = getAIEHazardRecognizer(Bot);
272272
const int Depth = BotHazardRec->getMaxLookAhead();
273+
assert(unsigned(Depth) >= BotHazardRec->getPipelineDepth());
273274

274275
/// These lambdas are an abstraction of the scoreboard manipulations,
275276
/// hiding the details of the implementation. In particular, we need to
@@ -479,27 +480,82 @@ SUnit *AIEPostRASchedStrategy::pickNodeAndCycle(
479480
}
480481

481482
int AIEPostRASchedStrategy::getMaxDeltaCycles(const SchedBoundary &Zone) const {
482-
assert(!Zone.isTop());
483-
if (Zone.getCurrCycle() >= RegionBottomUpCycles - 1)
483+
// Top-down scheduling does not support DeltaCycles
484+
if (Zone.isTop() || Zone.getCurrCycle() >= RegionBottomUpCycles - 1)
484485
return 0;
485486
return std::min({int(RegionBottomUpCycles - 1 - Zone.getCurrCycle()),
486487
int(getAIEHazardRecognizer(Zone)->getMaxLookAhead()),
487488
BottomUpDelta.getValue()});
488489
}
489490

491+
/// Returns the number of emitted instructions in the Top or Bot zone.
492+
unsigned getNumEmittedInstrs(ScheduleDAGMI *DAG, bool IsTop) {
493+
if (IsTop)
494+
return DAG->top().isValid() ? std::distance(DAG->begin(), DAG->top()) : 0;
495+
return DAG->bottom().isValid() ? std::distance(DAG->bottom(), DAG->end()) : 0;
496+
}
497+
498+
SUnit *AIEPostRASchedStrategy::getNextUnscheduledFixedInstr(
499+
const SchedBoundary &Zone) const {
500+
if (Zone.isTop())
501+
return nullptr;
502+
const Region &Reg = InterBlock.getBlockState(CurMBB).getCurrentRegion();
503+
const unsigned NumEmitted = getNumEmittedInstrs(DAG, /*IsTop=*/false);
504+
505+
// If the zone still has unscheduled fixed instructions, the next one to pick
506+
// is (DAG->bottom() - 1) for bottom-up, or DAG->top() for top-down.
507+
if (NumEmitted < Reg.getBotFixedBundles().size()) {
508+
MachineInstr &NextMI =
509+
*std::prev(DAG->bottom().isValid() ? DAG->bottom() : DAG->end());
510+
SUnit *NextSU = DAG->getSUnit(&NextMI);
511+
assert(NextSU);
512+
assert(NextSU->BotReadyCycle == NextSU->getHeight() &&
513+
"Fixed instruction won't be placed at the correct cycle");
514+
assert(Zone.getCurrCycle() <= NextSU->BotReadyCycle);
515+
return NextSU;
516+
}
517+
return nullptr;
518+
}
519+
520+
bool AIEPostRASchedStrategy::isFixedSU(const SUnit &SU, bool IsTop) const {
521+
if (IsTop) {
522+
return FirstTopFixedSU && SU.NodeNum >= *FirstTopFixedSU &&
523+
SU.NodeNum < FirstBotFixedSU.value_or(DAG->SUnits.size());
524+
}
525+
return FirstBotFixedSU && SU.NodeNum >= *FirstBotFixedSU &&
526+
SU.NodeNum <= LastBotFixedSU.value();
527+
}
528+
529+
bool AIEPostRASchedStrategy::isFreeSU(const SUnit &SU) const {
530+
const unsigned NumUpperBound = DAG->SUnits.size();
531+
return SU.NodeNum < FirstTopFixedSU.value_or(NumUpperBound) &&
532+
SU.NodeNum < FirstBotFixedSU.value_or(NumUpperBound);
533+
}
534+
490535
bool AIEPostRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone,
491536
bool /*VerifyReadyCycle*/) {
537+
// Note we use signed integers to avoid wrap-around behavior.
538+
const int MinDelta = -getMaxDeltaCycles(Zone);
539+
const int ReadyCycle = std::max(Zone.getCurrCycle(), SU.BotReadyCycle);
540+
const int CurrCycle = Zone.getCurrCycle();
541+
542+
// If the Zone has remaining fixed instructions, only one SU is available.
543+
if (SUnit *FixedSU = getNextUnscheduledFixedInstr(Zone)) {
544+
assert(!Zone.isTop() && "Fixed instructions only expected in Bot zone");
545+
const int DeltaCycles = CurrCycle - ReadyCycle;
546+
return FixedSU == &SU && DeltaCycles >= MinDelta;
547+
}
548+
549+
// If SU is a fixed instruction in the other zone, it isn't available
550+
if (isFixedSU(SU, !Zone.isTop()))
551+
return false;
552+
492553
// Whether or not the zone is Top or Bot, verify if SU is ready to be
493554
// scheduled in terms of cycle.
494555
if (Zone.isTop())
495556
return MachineSchedStrategy::isAvailableNode(SU, Zone,
496557
/*VerifyReadyCycle=*/true);
497558

498-
// Note we use signed integers to avoid wrap-around behavior.
499-
const int MinDelta = -getMaxDeltaCycles(Zone);
500-
const int ReadyCycle = std::max(Zone.getCurrCycle(), SU.BotReadyCycle);
501-
const int CurrCycle = Zone.getCurrCycle();
502-
503559
for (int DeltaCycles = CurrCycle - ReadyCycle; DeltaCycles >= MinDelta;
504560
--DeltaCycles) {
505561
// ReadyCycle is always greater or equal to the current cycle,
@@ -542,6 +598,10 @@ void AIEPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
542598
// from a block is the bottom one. We reset this when leaving any
543599
// region
544600
IsBottomRegion = true;
601+
602+
// The block may have a timed region, append its instructions.
603+
auto &BS = InterBlock.getBlockState(MBB);
604+
InterBlock.emitInterBlockBottom(BS);
545605
}
546606

547607
void AIEPostRASchedStrategy::commitBlockSchedule(MachineBasicBlock *BB) {
@@ -551,10 +611,11 @@ void AIEPostRASchedStrategy::commitBlockSchedule(MachineBasicBlock *BB) {
551611
// scheduling region.
552612
assert(BS.getRegions().empty() ||
553613
0 == BS.getTop().getTopFixedBundles().size());
554-
assert(BS.getRegions().empty() ||
555-
0 == BS.getBottom().getBotFixedBundles().size());
614+
assert(BS.BottomInsert.empty() ||
615+
BS.BottomInsert.size() == BS.getBottom().getBotFixedBundles().size());
556616

557617
// Safety margin, swp epilogue
618+
// Note that the prologue is handled in a different way. See enterMBB.
558619
InterBlock.emitInterBlockTop(BS);
559620

560621
if (BS.isPipelined()) {
@@ -582,8 +643,6 @@ void AIEPostRASchedStrategy::commitBlockSchedule(MachineBasicBlock *BB) {
582643
AIEHazardRecognizer::applyBundles(Region.Bundles, BS.TheBlock);
583644
}
584645
}
585-
// swp prologue
586-
InterBlock.emitInterBlockBottom(BS);
587646
}
588647

589648
void AIEPostRASchedStrategy::leaveMBB() {
@@ -634,6 +693,9 @@ void AIEPostRASchedStrategy::leaveRegion(const SUnit &ExitSU) {
634693
RegionBegin = nullptr;
635694
RegionEnd = nullptr;
636695
IsBottomRegion = false;
696+
FirstTopFixedSU = {};
697+
FirstBotFixedSU = {};
698+
LastBotFixedSU = {};
637699
BS.advanceRegion();
638700
DEBUG_BLOCKS(dbgs() << " << leaveRegion\n");
639701
}
@@ -758,6 +820,11 @@ bool AIEPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
758820
return true;
759821
}
760822

823+
SchedBoundary &Zone = getSchedZone();
824+
assert(!getNextUnscheduledFixedInstr(Zone) &&
825+
"More than one available SUnit while not all fixed instructions have "
826+
"been emitted.");
827+
761828
// Instructions with delay slots are critical and should be scheduled
762829
// as soon as they are ready.
763830
if (TryCand.SU->getInstr()->hasDelaySlot()) {
@@ -770,8 +837,6 @@ bool AIEPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
770837
return false;
771838
}
772839

773-
SchedBoundary &Zone = getSchedZone();
774-
775840
// Avoid serializing long latency dependence chains.
776841
if (Cand.Policy.ReduceLatency && Zone.isTop() &&
777842
tryLatency(TryCand, Cand, Zone)) {
@@ -1237,6 +1302,11 @@ void llvm::AIEPostRASchedStrategy::buildGraph(ScheduleDAGMI &DAG, AAResults *AA,
12371302
PressureDiffs *PDiffs,
12381303
LiveIntervals *LIS,
12391304
bool TrackLaneMasks) {
1305+
1306+
// Let's save the DAG already instead of waiting for initialize().
1307+
// Some DAG mutators might require a DAG to be set.
1308+
this->DAG = &DAG;
1309+
12401310
/// We are called after enterRegion, which will have recorded the semantic
12411311
/// order. We can't use the basic block order, since this may have changed
12421312
/// in earlier iterations of scheduling
@@ -1267,6 +1337,28 @@ void llvm::AIEPostRASchedStrategy::buildGraph(ScheduleDAGMI &DAG, AAResults *AA,
12671337
static_cast<AIEScheduleDAGMI &>(DAG).recordDbgInstrs(Region);
12681338
}
12691339

1340+
SUnit &AIEPostRASchedStrategy::addFixedSUnit(MachineInstr &MI, bool IsTop) {
1341+
DEBUG_BLOCKS(dbgs() << "Adding Fixed MI: " << MI);
1342+
DEBUG_BLOCKS(dbgs() << " DAG size=" << DAG->SUnits.size()
1343+
<< " capacity=" << DAG->SUnits.capacity() << "\n");
1344+
assert(!(IsTop && FirstBotFixedSU) && "Top-fixed SUnits must be added first");
1345+
assert(DAG->SUnits.size() < DAG->SUnits.capacity() &&
1346+
"SUnits need to be re-allocated.");
1347+
unsigned SUNum = DAG->initSUnit(MI).value();
1348+
SUnit &SU = DAG->SUnits[SUNum];
1349+
1350+
if (IsTop) {
1351+
if (!FirstTopFixedSU)
1352+
FirstTopFixedSU = SUNum;
1353+
} else {
1354+
if (!FirstBotFixedSU)
1355+
FirstBotFixedSU = SUNum;
1356+
LastBotFixedSU = SUNum;
1357+
}
1358+
1359+
return SU;
1360+
}
1361+
12701362
bool AIEScheduleDAGMI::mayAlias(SUnit *SUa, SUnit *SUb, bool UseTBAA) {
12711363
BlockState &BS = getSchedImpl()->getInterBlock().getBlockState(getBB());
12721364
if (BS.FixPoint.Stage == SchedulingStage::Pipelining) {

llvm/lib/Target/AIE/AIEMachineScheduler.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,17 @@ class AIEPostRASchedStrategy : public PostGenericScheduler {
7272
RegPressureTracker *RPTracker, PressureDiffs *PDiffs,
7373
LiveIntervals *LIS, bool TrackLaneMasks) override;
7474

75+
/// Adds a SUnit for the given fixed instruction
76+
/// \param IsTop Whether MI is fixed at the top or bottom of the region
77+
SUnit &addFixedSUnit(MachineInstr &MI, bool IsTop);
78+
79+
/// Whether \p SU is fixed in a specific cycle of the given zone.
80+
bool isFixedSU(const SUnit &SU, bool IsTop) const;
81+
82+
/// Whether \p SU is free to be scheduled anywhere in the region.
83+
/// (modulo dependencies and resource conflicts)
84+
bool isFreeSU(const SUnit &SU) const;
85+
7586
/// Explicitly process regions backwards. The first scheduled region in
7687
/// a block connects with successors.
7788
bool doMBBSchedRegionsTopDown() const override { return false; }
@@ -119,6 +130,16 @@ class AIEPostRASchedStrategy : public PostGenericScheduler {
119130
/// cycle of instructions to be scheduled.
120131
int getMaxDeltaCycles(const SchedBoundary &Zone) const;
121132

133+
/// Return the next "fixed" instruction to place down.
134+
SUnit *getNextUnscheduledFixedInstr(const SchedBoundary &Zone) const;
135+
136+
/// SU numbers for fixed instructions.
137+
/// "top" fixed SUnits belong in [FirstTopFixedSU,FirstBotFixedSU)
138+
/// "bot" fixed SUnits belong in [FirstBotFixedSU,LastBotFixedSU]
139+
std::optional<unsigned> FirstTopFixedSU;
140+
std::optional<unsigned> FirstBotFixedSU;
141+
std::optional<unsigned> LastBotFixedSU;
142+
122143
/// Keeps track of the current zone used for scheduling. See getSchedZone().
123144
bool IsTopDown = true;
124145

llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/bitwisenot.mir

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,16 @@
1313
define dso_local void @bitNot(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr #0 {
1414
; CHECK-LABEL: bitNot:
1515
; CHECK: // %bb.0:
16-
; CHECK-NEXT: add.nc lc, r0, #-5
17-
; CHECK-NEXT: movxm ls, #.LBB0_2
18-
; CHECK-NEXT: movxm le, #.L_LEnd0
19-
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopxm ; nopv
16+
; CHECK-NEXT: nopa ; vldb wh0, [p0, #32]; nopx ; add.nc lc, r0, #-5
17+
; CHECK-NEXT: vldb wl0, [p0], #64; movxm ls, #.LBB0_2
18+
; CHECK-NEXT: vldb wh0, [p0, #32]; movxm le, #.L_LEnd0
2019
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopxm ; nopv
2120
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopxm ; nopv
2221
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopxm ; nopv
2322
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopxm ; nopv
2423
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopxm ; nopv
25-
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopxm ; nopv
26-
; CHECK-NEXT: vldb wl0, [p0], #64; nopx
27-
; CHECK-NEXT: vldb wh0, [p0, #32]; vbneg_ltz.s16 x1, r21, x0
28-
; CHECK-NEXT: vldb wl0, [p0], #64
24+
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv
25+
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopxm ; nopv
2926
; CHECK-NEXT: .LBB0_1: // %for.body
3027
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
3128
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; vst wh1, [p1, #32]; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv

0 commit comments

Comments
 (0)