Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,10 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
llvm_unreachable("Target didn't implement getNumReservedDelaySlots");
}

/// Convert an InstrStage's getUnits() value to an FU bit position.
/// Default: identity, matching AIE's FUNCUNIT_REPRESENTATION(x) = (x).
virtual unsigned getFuncUnitIndex(uint64_t Units) const { return Units; }
Comment thread
F-Stuckmann marked this conversation as resolved.

/// Check whether Opc represents a JNZ instruction. This is mainly for
/// detecting a downcounting loop branch.
virtual bool isJNZ(unsigned Opc) const { return false; }
Expand Down
25 changes: 19 additions & 6 deletions llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,14 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
if (BS.getRegions().size() == 1) {
auto &PostSWP = BS.getPostSWP();
if (PostSWP.isPostPipelineCandidate(*BS.TheBlock)) {
BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock);
// A CLI --aie-postpipeliner-target-ii is a hard limit: start at
// exactly that II (bypassing --aie-postpipeliner-maxii) and let
// updatePipelining one-shot it. A pragma-driven TargetII is a soft
// hint: start at ResMII and iterate normally; the solver fallback at
// II == TargetII is handled inside the post-pipeliner.
BS.FixPoint.II = PostSWP.isTargetIIHardLimit()
? PostSWP.getTargetII()
: PostSWP.getResMII(*BS.TheBlock);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should simplify. When we drive an example from the command line, we want to say where we start, where we stop. Orthogonal to that, we want to say which approaches to enable. In my branch I have introduced a MinII CLI.

BS.FixPoint.IITries = 1;
return SchedulingStage::Pipelining;
}
Expand All @@ -632,11 +639,17 @@ SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) {
return BS.FixPoint.Stage;
}

// Otherwise try a larger II.
// We cut off at larger IIs to prevent excessive compilation time.
if (++BS.FixPoint.II <= PostPipelinerMaxII &&
++BS.FixPoint.IITries <= PostPipelinerMaxTryII) {
return SchedulingStage::Pipelining;
// A CLI --aie-postpipeliner-target-ii is one-shot: try only the requested
// II, even if it exceeds --aie-postpipeliner-maxii. If that attempt
// failed, do not try any other II. A pragma-driven TargetII keeps the
// normal iteration (ResMII..MaxII).
if (!BS.getPostSWP().isTargetIIHardLimit()) {
// Otherwise try a larger II.
// We cut off at larger IIs to prevent excessive compilation time.
if (++BS.FixPoint.II <= PostPipelinerMaxII &&
++BS.FixPoint.IITries <= PostPipelinerMaxTryII) {
return SchedulingStage::Pipelining;
}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, a one-shot attempt can be build from MinII and MaxII and enabling the algorithms that you want to act on it.

}

auto *BB = BS.TheBlock;
Expand Down
135 changes: 103 additions & 32 deletions llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/CodeGen/ResourceScoreboard.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <limits>
#include <string>
Expand All @@ -44,9 +45,16 @@ static cl::opt<int>
cl::desc("Number of runs for heuristics that converge"),
cl::init(20), cl::Hidden);

static cl::opt<int> PresetII("aie-postpipeliner-target-ii",
cl::desc("II for which to allow the solver"),
cl::init(0), cl::Hidden);
static cl::opt<bool>
UseSolver("aie-postpipeliner-solver",
cl::desc("Use the solver as fallback after heuristics fail"),
cl::init(false), cl::Hidden);

static cl::opt<int>
PresetII("aie-postpipeliner-target-ii",
cl::desc("Run solver-only at this II; bypasses MaxII and "
"skips heuristics"),
cl::init(0), cl::Hidden);

PipelineScheduleVisitor::~PipelineScheduleVisitor() {}

Expand Down Expand Up @@ -160,16 +168,35 @@ bool PostPipeliner::isPostPipelineCandidate(MachineBasicBlock &LoopBlock) {
return false;
}

if (PresetII) {
TargetII = PresetII;
// No solver backend compiled in: TargetII/--aie-postpipeliner-solver
// are no-ops. Keep pre-commit behavior (heuristics only).
if (!Solver::hasSolver()) {
const bool AnyRequest =
PresetII || UseSolver || getInitiationInterval(getLoopID(LoopBlock));
if (AnyRequest) {
DEBUG_SUMMARY(
dbgs() << " PostPipeliner: ignoring TargetII/solver request, "
"no solver compiled in\n");
}
return true;
}
auto ParsedInitiationInterval = getInitiationInterval(getLoopID(LoopBlock));
if (ParsedInitiationInterval) {
TargetII = *ParsedInitiationInterval;
DEBUG_SUMMARY(dbgs() << " PostPipeliner: TargetII=" << TargetII << "\n");

// --aie-postpipeliner-target-ii: hard one-shot. Bypasses MaxII and
// skips heuristics; only the solver runs at exactly this II.
if (PresetII) {
TargetII = PresetII;
TargetIIIsHardLimit = true;
} else if (!UseSolver) {
// Pragma soft hint: heuristics iterate normally and the solver runs
// at II == TargetII. --aie-postpipeliner-solver overrides this.
if (const auto Pragma = getInitiationInterval(getLoopID(LoopBlock)))
TargetII = *Pragma;
}

if (TargetII)
DEBUG_SUMMARY(dbgs() << " PostPipeliner: TargetII=" << TargetII
<< (TargetIIIsHardLimit ? " (hard)" : " (soft)")
<< "\n");
return true;
}

Expand Down Expand Up @@ -1431,8 +1458,7 @@ static const ConfigStrategy::Configuration Heuristics[] = {
{1, false, false, 1, {Prio::NodeNum}, {}}, // pure bottom up
};

bool PostPipeliner::tryApproaches() {
DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n");
bool PostPipeliner::runHeuristics() {
int HeuristicIndex = 0;
for (const auto &Config : Heuristics) {
if (Heuristic >= 0 && Heuristic != HeuristicIndex++) {
Expand All @@ -1459,27 +1485,45 @@ bool PostPipeliner::tryApproaches() {
}
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " failed\n");
}
// Last-chance heuristic: relax the iteration-count constraint.
IterCountSlackStrategy Relaxed(*DAG, Info, MinLength + II);
resetSchedule(/*FullReset=*/true);
if (scheduleWithStrategy(Relaxed)) {
return scheduleWithStrategy(Relaxed);
}

bool PostPipeliner::runSolverFallback() {
const SolverData Data = createSolverData();
const int NS = MinLength / II;
if (solve(Data, NS, false)) {
return true;
}

// TargetII is the OK from the user to spend some time reaching this II.
// Therefore, if we haven't found a solution yet, bring in the big guns.
if (II == TargetII) {
const SolverData Data = createSolverData();
int NS = MinLength / II;
if (solve(Data, NS, false)) {
return true;
}
if (NS == MinTripCount) {
// Only try this at the boundary case
if (solve(Data, NS + 1, true)) {
return true;
}
}
// Let's try SEF solution.
if (solve(Data, NS + 1, true)) {
return true;
}
// Marsshot: last try with full NS + 1.
return solve(Data, NS + 1, false);
}

bool PostPipeliner::tryApproaches() {
DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n");

// CLI --aie-postpipeliner-target-ii: solver-only, skip heuristics.
const bool SolverOnly = TargetIIIsHardLimit;
const bool RunHeuristics = !SolverOnly;

// Solver runs at this II if the user asked for solver fallback at every
// II, or this II matches a TargetII (CLI hard or pragma soft hint).
const bool SolverAtThisII =
UseSolver || SolverOnly || (TargetII != 0 && II == TargetII);
// Belt-and-braces re-check: never call solve() with no backend, even
// though isPostPipelineCandidate already filtered the request out.
const bool RunSolver = Solver::hasSolver() && SolverAtThisII;

if (RunHeuristics && runHeuristics())
return true;
if (RunSolver && runSolverFallback())
return true;

DEBUG_SUMMARY(dbgs() << "=== II=" << II << " Failed ===\n");
return false;
Expand All @@ -1495,13 +1539,41 @@ bool PostPipeliner::solve(const SolverData &Data, int NS, bool SEFStage) {
return false;
}

// Register \p MI's per-cycle FU footprint into \p Data so
// SWPSolver::resourceConflicts can forbid same-cycle co-occupation.
static void addResourceUses(SolverData &Data, int Id, const MachineInstr *MI,
const AIEBaseInstrInfo *TII) {
const InstrItineraryData *Itin =
MI->getMF()->getSubtarget().getInstrItineraryData();
if (!Itin || Itin->isEmpty()) {
return;
}
const unsigned SchedClass = MI->getDesc().getSchedClass();
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. TII->getSchedClass().

Copy link
Copy Markdown
Collaborator Author

@F-Stuckmann F-Stuckmann May 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we use the current pattern all across the codebase.
Should I be changing them all across the AIE target, when we have TII available?

int Cycle = 0;
for (const InstrStage &IS : Itin->getStages(SchedClass)) {
const bool IsRequired = IS.getReservationKind() == InstrStage::Required;
const bool IsReserved = IS.getReservationKind() == InstrStage::Reserved;
assert(IsRequired != IsReserved &&
"ReservationKind must be exactly one of Required/Reserved");

assert(IS.getNextCycles() >= 0 &&
Comment thread
F-Stuckmann marked this conversation as resolved.
"Negative NextCycles breaks cumulative offset");
const unsigned FUIndex = TII->getFuncUnitIndex(IS.getUnits());
for (unsigned C = 0; C < IS.getCycles(); C++) {
const int Off = Cycle + C;
Data.addResourceUse(Id, Off, FUIndex, IsRequired);
}
Cycle += IS.getNextCycles();
}
}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I'm not too happy about this parallel implementation of resource queries.
Perhaps we can pusblish anyStage() in AIEHazardRecogniser.


SolverData PostPipeliner::createSolverData() {
SolverData Data;
// Add the forward dependence edges within the first iteration
for (int N = 0; N < NInstr; N++) {
const SUnit &SU = DAG->SUnits[N];
MachineInstr *const MI = SU.getInstr();
auto SlotKind = TII->getSlotKind(MI->getOpcode());
const auto SlotKind = TII->getSlotKind(MI->getOpcode());

const uint64_t MemoryBanks = HR.getMemoryBanks(MI);
const int Id =
Expand All @@ -1512,6 +1584,7 @@ SolverData PostPipeliner::createSolverData() {
assert(From < NInstr);
Data.addLatency(From, N, Dep.getSignedLatency());
}
addResourceUses(Data, Id, MI, TII);
}

// Add loop-carried dependences to future iterations. The iteration
Expand All @@ -1534,10 +1607,8 @@ SolverData PostPipeliner::createSolverData() {
bool PostPipeliner::applySolver(const SolverData &Data, SWPSolver &Solver,
int NS, bool SEFStage) {

// We don't model the resource hazards. They would be very tedious to express,
// since resource uses are offset relative to the instruction cycle. We would
// need to interpret raw itinerary data, and the modulo constraints on those
// would lead to very awkard expressions.
// FU resource hazards are modeled by SWPSolver::resourceConflicts via
// SolverData::ResourceUses (Required/Reserved bits per InstrItin stage).
Solver.setScheduleSize(II, NS);
Solver.genModel(Data, SEFStage);
if (!Solver.solveModel()) {
Expand Down
25 changes: 22 additions & 3 deletions llvm/lib/Target/AIE/AIEPostPipeliner.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,14 @@ class PostPipeliner {
/// The minimum tripcount, read from the pragma, or from an LC initialization.
int MinTripCount = 0;

/// The II requested by a pragma. This will trigger expensive algorithms
/// like solvers or exhaustive searches to be run if the heuristic methods
/// don't find a solution.
/// User/pragma-requested II at which the solver is additionally run.
/// Stays 0 when no solver backend is compiled in.
int TargetII = 0;

/// True when TargetII is a hard CLI one-shot (skip heuristics, bypass
/// MaxII), false when it's a soft pragma hint.
bool TargetIIIsHardLimit = false;

/// The Preheader of the loop.
MachineBasicBlock *Preheader = nullptr;

Expand Down Expand Up @@ -316,6 +319,14 @@ class PostPipeliner {
/// If it returns true, a valid schedule is laid down in Info.
bool tryApproaches();

/// Run the heuristic strategies (each ConfigStrategy plus the relaxed
/// IterCountSlackStrategy fallback) at the current II.
bool runHeuristics();

/// Run the solver-based last-resort attempts at the current II:
/// (NS, !SEF), (NS+1, SEF), (NS+1, !SEF).
bool runSolverFallback();

/// Find the first available unscheduled instruction with the highest
/// priority.
int mostUrgent(PostPipelinerStrategy &Strategy);
Expand Down Expand Up @@ -346,6 +357,14 @@ class PostPipeliner {
/// \pre isPostPipelineCandidate has returned true
int getResMII(MachineBasicBlock &LoopBlock);

/// Return the user/pragma-requested II, or 0 if none was set.
/// \pre isPostPipelineCandidate has returned true
int getTargetII() const { return TargetII; }

/// True when TargetII is a hard CLI one-shot (vs a soft pragma hint).
/// \pre isPostPipelineCandidate has returned true
bool isTargetIIHardLimit() const { return TargetIIIsHardLimit; }

// Schedule using the given InitiationInterval. Return true when successful.
// In that case calls to the query methods below are legitimate.
bool schedule(ScheduleDAGMI &DAG, int InitiationInterval,
Expand Down
Loading