-
Couldn't load subscription status.
- Fork 29
[AIEX] Scheduler improvements #147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8193c57
8dc4a1b
73f1cd4
71614b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,6 +37,10 @@ static cl::opt<bool> LoopEpilogueAnalysis( | |
| "aie-loop-epilogue-analysis", cl::init(true), | ||
| cl::desc("[AIE] Perform Loop/Epilogue analysis with loop scheduling")); | ||
|
|
||
| static cl::opt<int> MaxExpensiveIterations( | ||
| "aie-loop-aware-expensive-iterations", cl::init(25), | ||
| cl::desc("[AIE] Perform Loop/Epilogue analysis with loop scheduling")); | ||
|
|
||
| namespace llvm::AIE { | ||
|
|
||
| void dumpInterBlock(const InterBlockEdges &Edges) { | ||
|
|
@@ -166,10 +170,13 @@ bool InterBlockScheduling::leaveBlock() { | |
| if (BS.Kind == BlockType::Loop && !updateFixPoint(BS)) { | ||
| BS.FixPoint.NumIters++; | ||
| // Iterate on CurrentBlock | ||
| // We will first try to increase the latency margin for one instruction at | ||
| // a time, before increasing that margin for all instructions at once. | ||
| // If we are very unlucky, we may step both the latency margin and | ||
| // the resource margin to the max. Any more indicates failure to converge, | ||
| // and we abort to prevent an infinite loop. | ||
| if (BS.FixPoint.NumIters > 2 * HR->getConflictHorizon()) { | ||
| if (BS.FixPoint.NumIters > | ||
|
||
| MaxExpensiveIterations + 2 * HR->getConflictHorizon()) { | ||
| report_fatal_error("Inter-block scheduling did not converge."); | ||
| } | ||
| return false; | ||
|
|
@@ -219,7 +226,7 @@ bool InterBlockScheduling::resourcesConverged(BlockState &BS) const { | |
| return true; | ||
| } | ||
|
|
||
| bool InterBlockScheduling::latencyConverged(BlockState &BS) const { | ||
| MachineInstr *InterBlockScheduling::latencyConverged(BlockState &BS) const { | ||
| const auto &SubTarget = BS.TheBlock->getParent()->getSubtarget(); | ||
| auto *TII = static_cast<const AIEBaseInstrInfo *>(SubTarget.getInstrInfo()); | ||
| auto *ItinData = SubTarget.getInstrItineraryData(); | ||
|
|
@@ -283,7 +290,7 @@ bool InterBlockScheduling::latencyConverged(BlockState &BS) const { | |
| << " not met (" << Distance << ")\n"); | ||
| DEBUG_LOOPAWARE(dbgs() << " " << Succ->NodeNum << ": " | ||
| << *Succ->getInstr()); | ||
| return false; | ||
| return Pred->getInstr(); | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -296,7 +303,7 @@ bool InterBlockScheduling::latencyConverged(BlockState &BS) const { | |
| // upperbound of the latency safety margin that should be provided by | ||
| // the epilogue | ||
| BS.FixPoint.MaxLatencyExtent = MaxExtent; | ||
| return true; | ||
| return nullptr; | ||
| } | ||
|
|
||
| bool InterBlockScheduling::updateFixPoint(BlockState &BS) { | ||
|
|
@@ -316,11 +323,20 @@ bool InterBlockScheduling::updateFixPoint(BlockState &BS) { | |
| // Iterate on CurMBB | ||
| return false; | ||
| } | ||
| if (!latencyConverged(BS)) { | ||
| BS.FixPoint.LatencyMargin++; | ||
|
|
||
| if (MachineInstr *MINeedsHigherCap = latencyConverged(BS)) { | ||
| auto Res = BS.FixPoint.PerMILatencyMargin.try_emplace(MINeedsHigherCap, 0); | ||
| // Increase the latency margin per instruction, unless we already iterated | ||
| // more than MaxExpensiveIterations without converging. | ||
| if (BS.FixPoint.NumIters <= MaxExpensiveIterations) { | ||
| ++Res.first->second; | ||
|
||
| } else { | ||
| BS.FixPoint.LatencyMargin++; | ||
| } | ||
| DEBUG_LOOPAWARE(dbgs() << " not converged: latency RM=" | ||
| << BS.FixPoint.ResourceMargin << " LM=>" | ||
| << BS.FixPoint.LatencyMargin << "\n"); | ||
| << BS.FixPoint.ResourceMargin | ||
| << " LM=" << BS.FixPoint.LatencyMargin | ||
| << " MIM=" << Res.first->second << "\n"); | ||
| // Iterate on CurMBB | ||
| return false; | ||
| } | ||
|
|
@@ -341,13 +357,18 @@ bool InterBlockScheduling::successorsAreScheduled( | |
| }); | ||
| } | ||
|
|
||
| std::optional<int> | ||
| InterBlockScheduling::getLatencyCap(MachineBasicBlock *BB) const { | ||
| auto &BS = getBlockState(BB); | ||
| std::optional<int> InterBlockScheduling::getLatencyCap(MachineInstr &MI) const { | ||
| auto &BS = getBlockState(MI.getParent()); | ||
| if (BS.Kind != BlockType::Loop) { | ||
| return {}; | ||
| } | ||
| return BS.FixPoint.LatencyMargin; | ||
| if (BS.FixPoint.LatencyMargin) | ||
| return BS.FixPoint.LatencyMargin; | ||
| if (const auto *It = BS.FixPoint.PerMILatencyMargin.find(&MI); | ||
| It != BS.FixPoint.PerMILatencyMargin.end()) { | ||
| return It->second; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| std::optional<int> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,7 +40,7 @@ static cl::opt<bool> | |
| cl::desc("Track reg pressure more accurately and " | ||
| "delay some instructions to avoid spills.")); | ||
| static cl::opt<unsigned> NumCriticalFreeRegs( | ||
| "aie-premisched-near-critical-regs", cl::init(4), | ||
| "aie-premisched-near-critical-regs", cl::init(2), | ||
|
||
| cl::desc("Number of free registers below which premisched should actively " | ||
| "try to reduce the pressure.")); | ||
|
|
||
|
|
@@ -761,6 +761,33 @@ bool AIEPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, | |
| return false; | ||
| } | ||
|
|
||
| void AIEPreRASchedStrategy::initialize(ScheduleDAGMI *DAG) { | ||
| GenericScheduler::initialize(DAG); | ||
|
|
||
| // Cache the threshold for each pressure set. | ||
| const std::vector<unsigned> &RegionMaxPressure = | ||
| static_cast<ScheduleDAGMILive *>(DAG)->getRegPressure().MaxSetPressure; | ||
| PSetThresholds.clear(); | ||
| for (unsigned PSet = 0, EndPSet = RegionMaxPressure.size(); PSet < EndPSet; | ||
| ++PSet) { | ||
| unsigned MaxPressure = RegionMaxPressure[PSet]; | ||
|
||
| unsigned Limit = Context->RegClassInfo->getRegPressureSetLimit(PSet); | ||
|
|
||
| // If the region has a maximum pressure that exceeds the target threshold, | ||
| // artificially reduce that threshold to force more conservative scheduling. | ||
| if (MaxPressure > Limit) { | ||
| unsigned ExtraPressure = MaxPressure - Limit; | ||
| if (Limit > ExtraPressure) | ||
| Limit -= ExtraPressure; | ||
| else | ||
| Limit = 0; | ||
| LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(PSet) | ||
| << " Decreased Threshold to " << Limit << "\n"); | ||
| } | ||
| PSetThresholds.push_back(Limit); | ||
| } | ||
| } | ||
|
|
||
| void AIEPreRASchedStrategy::enterRegion(MachineBasicBlock *BB, | ||
| MachineBasicBlock::iterator Begin, | ||
| MachineBasicBlock::iterator End, | ||
|
|
@@ -874,8 +901,9 @@ bool AIEPreRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone, | |
| } | ||
|
|
||
| unsigned CurrPressure = BotRPT.getRegSetPressureAtPos()[WorstPC.getPSet()]; | ||
| if (CurrPressure + WorstPC.getUnitInc() < | ||
| TRI->getRegPressureSetLimit(*CurMBB->getParent(), WorstPC.getPSet())) { | ||
| if (CurrPressure + WorstPC.getUnitInc() + | ||
| (NumCriticalFreeRegs * WorstPC.getUnitInc()) < | ||
| PSetThresholds[WorstPC.getPSet()]) { | ||
| // Worsening pressure, but still within limits, keep node as available | ||
| return true; | ||
| } | ||
|
|
@@ -960,10 +988,11 @@ bool AIEPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, | |
| if (!PC.isValid()) | ||
| return false; | ||
| unsigned CurrPressure = BotRPT.getRegSetPressureAtPos()[PC.getPSet()]; | ||
| unsigned Threshold = | ||
| TRI->getRegPressureSetLimit(*CurMBB->getParent(), PC.getPSet()); | ||
| return Threshold <= NumCriticalFreeRegs || | ||
| CurrPressure >= Threshold - NumCriticalFreeRegs; | ||
| unsigned Threshold = PSetThresholds[PC.getPSet()]; | ||
| unsigned NumCriticalFreeUnits = | ||
| NumCriticalFreeRegs * std::abs(PC.getUnitInc()); | ||
martien-de-jong marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return Threshold <= NumCriticalFreeUnits || | ||
| CurrPressure >= Threshold - NumCriticalFreeUnits; | ||
| }; | ||
| PressureChange TryCandPC = | ||
| getPressureChange(estimatePressureDiff(*TryCand.SU, BotRPT)); | ||
|
|
@@ -972,13 +1001,12 @@ bool AIEPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, | |
| if ((IsNearCritical(TryCandPC) || IsNearCritical(CandPC)) && | ||
| tryPressure(TryCandPC, CandPC, TryCand, Cand, RegMax, TRI, DAG->MF)) | ||
| return TryCand.Reason != NoCand; | ||
| } | ||
|
|
||
| // Avoid increasing the max pressure of the entire region. | ||
| if (DAG->isTrackingPressure() && | ||
| tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, | ||
| Cand, RegMax, TRI, DAG->MF)) | ||
| return TryCand.Reason != NoCand; | ||
| // Avoid increasing the max pressure of the entire region. | ||
|
||
| if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, | ||
| TryCand, Cand, RegMax, TRI, DAG->MF)) | ||
| return TryCand.Reason != NoCand; | ||
| } | ||
|
|
||
| // Fall through to original instruction order. | ||
| if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a rationale behind this number?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes and no. I feel anything over 50 is too much, and anything below 10 is not enough if we need to move a couple of instructions up by 2-3 cycles. So 25 felt like a good compromise. And this works well for loops with an II between 5 and 10 cycles, which is the territory of the PreRA pipeliner for us.