-
Notifications
You must be signed in to change notification settings - Fork 41
[AIEX] Postpipeliner Solver #939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: aie-public
Are you sure you want to change the base?
Changes from all commits
f95a2ba
5a09067
ddfa022
f3a9aeb
cce4989
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -618,7 +618,14 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) { | |
| if (BS.getRegions().size() == 1) { | ||
| auto &PostSWP = BS.getPostSWP(); | ||
| if (PostSWP.isPostPipelineCandidate(*BS.TheBlock)) { | ||
| BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock); | ||
| // A CLI --aie-postpipeliner-target-ii is a hard limit: start at | ||
| // exactly that II (bypassing --aie-postpipeliner-maxii) and let | ||
| // updatePipelining one-shot it. A pragma-driven TargetII is a soft | ||
| // hint: start at ResMII and iterate normally; the solver fallback at | ||
| // II == TargetII is handled inside the post-pipeliner. | ||
| BS.FixPoint.II = PostSWP.isTargetIIHardLimit() | ||
| ? PostSWP.getTargetII() | ||
| : PostSWP.getResMII(*BS.TheBlock); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should simplify. When we drive an example from the command line, we want to say where we start, where we stop. Orthogonal to that, we want to say which approaches to enable. In my branch I have introduced a MinII CLI. |
||
| BS.FixPoint.IITries = 1; | ||
| return SchedulingStage::Pipelining; | ||
| } | ||
|
|
@@ -632,11 +639,17 @@ SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) { | |
| return BS.FixPoint.Stage; | ||
| } | ||
|
|
||
| // Otherwise try a larger II. | ||
| // We cut off at larger IIs to prevent excessive compilation time. | ||
| if (++BS.FixPoint.II <= PostPipelinerMaxII && | ||
| ++BS.FixPoint.IITries <= PostPipelinerMaxTryII) { | ||
| return SchedulingStage::Pipelining; | ||
| // A CLI --aie-postpipeliner-target-ii is one-shot: try only the requested | ||
| // II, even if it exceeds --aie-postpipeliner-maxii. If that attempt | ||
| // failed, do not try any other II. A pragma-driven TargetII keeps the | ||
| // normal iteration (ResMII..MaxII). | ||
| if (!BS.getPostSWP().isTargetIIHardLimit()) { | ||
| // Otherwise try a larger II. | ||
| // We cut off at larger IIs to prevent excessive compilation time. | ||
| if (++BS.FixPoint.II <= PostPipelinerMaxII && | ||
| ++BS.FixPoint.IITries <= PostPipelinerMaxTryII) { | ||
| return SchedulingStage::Pipelining; | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, a one-shot attempt can be build from MinII and MaxII and enabling the algorithms that you want to act on it. |
||
| } | ||
|
|
||
| auto *BB = BS.TheBlock; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ | |
| #include "llvm/CodeGen/ResourceScoreboard.h" | ||
| #include "llvm/CodeGen/ScheduleDAG.h" | ||
| #include "llvm/CodeGen/ScheduleDAGInstrs.h" | ||
| #include "llvm/MC/MCInstrItineraries.h" | ||
| #include "llvm/Transforms/Utils/LoopUtils.h" | ||
| #include <limits> | ||
| #include <string> | ||
|
|
@@ -44,9 +45,16 @@ static cl::opt<int> | |
| cl::desc("Number of runs for heuristics that converge"), | ||
| cl::init(20), cl::Hidden); | ||
|
|
||
| static cl::opt<int> PresetII("aie-postpipeliner-target-ii", | ||
| cl::desc("II for which to allow the solver"), | ||
| cl::init(0), cl::Hidden); | ||
| static cl::opt<bool> | ||
| UseSolver("aie-postpipeliner-solver", | ||
| cl::desc("Use the solver as fallback after heuristics fail"), | ||
| cl::init(false), cl::Hidden); | ||
|
|
||
| static cl::opt<int> | ||
| PresetII("aie-postpipeliner-target-ii", | ||
| cl::desc("Run solver-only at this II; bypasses MaxII and " | ||
| "skips heuristics"), | ||
| cl::init(0), cl::Hidden); | ||
|
|
||
| PipelineScheduleVisitor::~PipelineScheduleVisitor() {} | ||
|
|
||
|
|
@@ -160,16 +168,35 @@ bool PostPipeliner::isPostPipelineCandidate(MachineBasicBlock &LoopBlock) { | |
| return false; | ||
| } | ||
|
|
||
| if (PresetII) { | ||
| TargetII = PresetII; | ||
| // No solver backend compiled in: TargetII/--aie-postpipeliner-solver | ||
| // are no-ops. Keep pre-commit behavior (heuristics only). | ||
| if (!Solver::hasSolver()) { | ||
| const bool AnyRequest = | ||
| PresetII || UseSolver || getInitiationInterval(getLoopID(LoopBlock)); | ||
| if (AnyRequest) { | ||
| DEBUG_SUMMARY( | ||
| dbgs() << " PostPipeliner: ignoring TargetII/solver request, " | ||
| "no solver compiled in\n"); | ||
| } | ||
| return true; | ||
| } | ||
| auto ParsedInitiationInterval = getInitiationInterval(getLoopID(LoopBlock)); | ||
| if (ParsedInitiationInterval) { | ||
| TargetII = *ParsedInitiationInterval; | ||
| DEBUG_SUMMARY(dbgs() << " PostPipeliner: TargetII=" << TargetII << "\n"); | ||
|
|
||
| // --aie-postpipeliner-target-ii: hard one-shot. Bypasses MaxII and | ||
| // skips heuristics; only the solver runs at exactly this II. | ||
| if (PresetII) { | ||
| TargetII = PresetII; | ||
| TargetIIIsHardLimit = true; | ||
| } else if (!UseSolver) { | ||
| // Pragma soft hint: heuristics iterate normally and the solver runs | ||
| // at II == TargetII. --aie-postpipeliner-solver overrides this. | ||
| if (const auto Pragma = getInitiationInterval(getLoopID(LoopBlock))) | ||
| TargetII = *Pragma; | ||
| } | ||
|
|
||
| if (TargetII) | ||
| DEBUG_SUMMARY(dbgs() << " PostPipeliner: TargetII=" << TargetII | ||
| << (TargetIIIsHardLimit ? " (hard)" : " (soft)") | ||
| << "\n"); | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -1431,8 +1458,7 @@ static const ConfigStrategy::Configuration Heuristics[] = { | |
| {1, false, false, 1, {Prio::NodeNum}, {}}, // pure bottom up | ||
| }; | ||
|
|
||
| bool PostPipeliner::tryApproaches() { | ||
| DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n"); | ||
| bool PostPipeliner::runHeuristics() { | ||
| int HeuristicIndex = 0; | ||
| for (const auto &Config : Heuristics) { | ||
| if (Heuristic >= 0 && Heuristic != HeuristicIndex++) { | ||
|
|
@@ -1459,27 +1485,45 @@ bool PostPipeliner::tryApproaches() { | |
| } | ||
| DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " failed\n"); | ||
| } | ||
| // Last-chance heuristic: relax the iteration-count constraint. | ||
| IterCountSlackStrategy Relaxed(*DAG, Info, MinLength + II); | ||
| resetSchedule(/*FullReset=*/true); | ||
| if (scheduleWithStrategy(Relaxed)) { | ||
| return scheduleWithStrategy(Relaxed); | ||
| } | ||
|
|
||
| bool PostPipeliner::runSolverFallback() { | ||
| const SolverData Data = createSolverData(); | ||
| const int NS = MinLength / II; | ||
| if (solve(Data, NS, false)) { | ||
| return true; | ||
| } | ||
|
|
||
| // TargetII is the OK from the user to spend some time reaching this II. | ||
| // Therefore, if we haven't found a solution yet, bring in the big guns. | ||
| if (II == TargetII) { | ||
| const SolverData Data = createSolverData(); | ||
| int NS = MinLength / II; | ||
| if (solve(Data, NS, false)) { | ||
| return true; | ||
| } | ||
| if (NS == MinTripCount) { | ||
| // Only try this at the boundary case | ||
| if (solve(Data, NS + 1, true)) { | ||
| return true; | ||
| } | ||
| } | ||
| // Let's try SEF solution. | ||
| if (solve(Data, NS + 1, true)) { | ||
| return true; | ||
| } | ||
| // Marsshot: last try with full NS + 1. | ||
| return solve(Data, NS + 1, false); | ||
| } | ||
|
|
||
| bool PostPipeliner::tryApproaches() { | ||
| DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n"); | ||
|
|
||
| // CLI --aie-postpipeliner-target-ii: solver-only, skip heuristics. | ||
| const bool SolverOnly = TargetIIIsHardLimit; | ||
| const bool RunHeuristics = !SolverOnly; | ||
|
|
||
| // Solver runs at this II if the user asked for solver fallback at every | ||
| // II, or this II matches a TargetII (CLI hard or pragma soft hint). | ||
| const bool SolverAtThisII = | ||
| UseSolver || SolverOnly || (TargetII != 0 && II == TargetII); | ||
| // Belt-and-braces re-check: never call solve() with no backend, even | ||
| // though isPostPipelineCandidate already filtered the request out. | ||
| const bool RunSolver = Solver::hasSolver() && SolverAtThisII; | ||
|
|
||
| if (RunHeuristics && runHeuristics()) | ||
| return true; | ||
| if (RunSolver && runSolverFallback()) | ||
| return true; | ||
|
|
||
| DEBUG_SUMMARY(dbgs() << "=== II=" << II << " Failed ===\n"); | ||
| return false; | ||
|
|
@@ -1495,13 +1539,41 @@ bool PostPipeliner::solve(const SolverData &Data, int NS, bool SEFStage) { | |
| return false; | ||
| } | ||
|
|
||
| // Register \p MI's per-cycle FU footprint into \p Data so | ||
| // SWPSolver::resourceConflicts can forbid same-cycle co-occupation. | ||
| static void addResourceUses(SolverData &Data, int Id, const MachineInstr *MI, | ||
| const AIEBaseInstrInfo *TII) { | ||
| const InstrItineraryData *Itin = | ||
| MI->getMF()->getSubtarget().getInstrItineraryData(); | ||
| if (!Itin || Itin->isEmpty()) { | ||
| return; | ||
| } | ||
| const unsigned SchedClass = MI->getDesc().getSchedClass(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. TII->getSchedClass().
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we use the current pattern all across the codebase. |
||
| int Cycle = 0; | ||
| for (const InstrStage &IS : Itin->getStages(SchedClass)) { | ||
| const bool IsRequired = IS.getReservationKind() == InstrStage::Required; | ||
| const bool IsReserved = IS.getReservationKind() == InstrStage::Reserved; | ||
| assert(IsRequired != IsReserved && | ||
| "ReservationKind must be exactly one of Required/Reserved"); | ||
|
|
||
| assert(IS.getNextCycles() >= 0 && | ||
|
F-Stuckmann marked this conversation as resolved.
|
||
| "Negative NextCycles breaks cumulative offset"); | ||
| const unsigned FUIndex = TII->getFuncUnitIndex(IS.getUnits()); | ||
| for (unsigned C = 0; C < IS.getCycles(); C++) { | ||
| const int Off = Cycle + C; | ||
| Data.addResourceUse(Id, Off, FUIndex, IsRequired); | ||
| } | ||
| Cycle += IS.getNextCycles(); | ||
| } | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And I'm not too happy about this parallel implementation of resource queries. |
||
|
|
||
| SolverData PostPipeliner::createSolverData() { | ||
| SolverData Data; | ||
| // Add the forward dependence edges within the first iteration | ||
| for (int N = 0; N < NInstr; N++) { | ||
| const SUnit &SU = DAG->SUnits[N]; | ||
| MachineInstr *const MI = SU.getInstr(); | ||
| auto SlotKind = TII->getSlotKind(MI->getOpcode()); | ||
| const auto SlotKind = TII->getSlotKind(MI->getOpcode()); | ||
|
|
||
| const uint64_t MemoryBanks = HR.getMemoryBanks(MI); | ||
| const int Id = | ||
|
|
@@ -1512,6 +1584,7 @@ SolverData PostPipeliner::createSolverData() { | |
| assert(From < NInstr); | ||
| Data.addLatency(From, N, Dep.getSignedLatency()); | ||
| } | ||
| addResourceUses(Data, Id, MI, TII); | ||
| } | ||
|
|
||
| // Add loop-carried dependences to future iterations. The iteration | ||
|
|
@@ -1534,10 +1607,8 @@ SolverData PostPipeliner::createSolverData() { | |
| bool PostPipeliner::applySolver(const SolverData &Data, SWPSolver &Solver, | ||
| int NS, bool SEFStage) { | ||
|
|
||
| // We don't model the resource hazards. They would be very tedious to express, | ||
| // since resource uses are offset relative to the instruction cycle. We would | ||
| // need to interpret raw itinerary data, and the modulo constraints on those | ||
| // would lead to very awkard expressions. | ||
| // FU resource hazards are modeled by SWPSolver::resourceConflicts via | ||
| // SolverData::ResourceUses (Required/Reserved bits per InstrItin stage). | ||
| Solver.setScheduleSize(II, NS); | ||
| Solver.genModel(Data, SEFStage); | ||
| if (!Solver.solveModel()) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.