Skip to content

Commit 71614b9

Browse files
committed
[AIEX] Premisched: more conservative reg pressure reduction
- Reserve a certain number of registers, not regunits - Be extra careful when the region max pressure exceeds limits
1 parent 73f1cd4 commit 71614b9

File tree

11 files changed

+293
-261
lines changed

11 files changed

+293
-261
lines changed

llvm/lib/Target/AIE/AIEMachineScheduler.cpp

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static cl::opt<bool>
4040
cl::desc("Track reg pressure more accurately and "
4141
"delay some instructions to avoid spills."));
4242
static cl::opt<unsigned> NumCriticalFreeRegs(
43-
"aie-premisched-near-critical-regs", cl::init(4),
43+
"aie-premisched-near-critical-regs", cl::init(2),
4444
cl::desc("Number of free registers below which premisched should actively "
4545
"try to reduce the pressure."));
4646

@@ -761,6 +761,33 @@ bool AIEPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
761761
return false;
762762
}
763763

764+
void AIEPreRASchedStrategy::initialize(ScheduleDAGMI *DAG) {
765+
GenericScheduler::initialize(DAG);
766+
767+
// Cache the threshold for each pressure set.
768+
const std::vector<unsigned> &RegionMaxPressure =
769+
static_cast<ScheduleDAGMILive *>(DAG)->getRegPressure().MaxSetPressure;
770+
PSetThresholds.clear();
771+
for (unsigned PSet = 0, EndPSet = RegionMaxPressure.size(); PSet < EndPSet;
772+
++PSet) {
773+
unsigned MaxPressure = RegionMaxPressure[PSet];
774+
unsigned Limit = Context->RegClassInfo->getRegPressureSetLimit(PSet);
775+
776+
// If the region has a maximum pressure that exceeds the target threshold,
777+
// artificially reduce that threshold to force more conservative scheduling.
778+
if (MaxPressure > Limit) {
779+
unsigned ExtraPressure = MaxPressure - Limit;
780+
if (Limit > ExtraPressure)
781+
Limit -= ExtraPressure;
782+
else
783+
Limit = 0;
784+
LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(PSet)
785+
<< " Decreased Threshold to " << Limit << "\n");
786+
}
787+
PSetThresholds.push_back(Limit);
788+
}
789+
}
790+
764791
void AIEPreRASchedStrategy::enterRegion(MachineBasicBlock *BB,
765792
MachineBasicBlock::iterator Begin,
766793
MachineBasicBlock::iterator End,
@@ -874,8 +901,9 @@ bool AIEPreRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone,
874901
}
875902

876903
unsigned CurrPressure = BotRPT.getRegSetPressureAtPos()[WorstPC.getPSet()];
877-
if (CurrPressure + WorstPC.getUnitInc() <
878-
TRI->getRegPressureSetLimit(*CurMBB->getParent(), WorstPC.getPSet())) {
904+
if (CurrPressure + WorstPC.getUnitInc() +
905+
(NumCriticalFreeRegs * WorstPC.getUnitInc()) <
906+
PSetThresholds[WorstPC.getPSet()]) {
879907
// Worsening pressure, but still within limits, keep node as available
880908
return true;
881909
}
@@ -960,10 +988,11 @@ bool AIEPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
960988
if (!PC.isValid())
961989
return false;
962990
unsigned CurrPressure = BotRPT.getRegSetPressureAtPos()[PC.getPSet()];
963-
unsigned Threshold =
964-
TRI->getRegPressureSetLimit(*CurMBB->getParent(), PC.getPSet());
965-
return Threshold <= NumCriticalFreeRegs ||
966-
CurrPressure >= Threshold - NumCriticalFreeRegs;
991+
unsigned Threshold = PSetThresholds[PC.getPSet()];
992+
unsigned NumCriticalFreeUnits =
993+
NumCriticalFreeRegs * std::abs(PC.getUnitInc());
994+
return Threshold <= NumCriticalFreeUnits ||
995+
CurrPressure >= Threshold - NumCriticalFreeUnits;
967996
};
968997
PressureChange TryCandPC =
969998
getPressureChange(estimatePressureDiff(*TryCand.SU, BotRPT));
@@ -972,13 +1001,12 @@ bool AIEPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
9721001
if ((IsNearCritical(TryCandPC) || IsNearCritical(CandPC)) &&
9731002
tryPressure(TryCandPC, CandPC, TryCand, Cand, RegMax, TRI, DAG->MF))
9741003
return TryCand.Reason != NoCand;
975-
}
9761004

977-
// Avoid increasing the max pressure of the entire region.
978-
if (DAG->isTrackingPressure() &&
979-
tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
980-
Cand, RegMax, TRI, DAG->MF))
981-
return TryCand.Reason != NoCand;
1005+
// Avoid increasing the max pressure of the entire region.
1006+
if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax,
1007+
TryCand, Cand, RegMax, TRI, DAG->MF))
1008+
return TryCand.Reason != NoCand;
1009+
}
9821010

9831011
// Fall through to original instruction order.
9841012
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||

llvm/lib/Target/AIE/AIEMachineScheduler.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ class AIEPreRASchedStrategy : public GenericScheduler {
151151
public:
152152
AIEPreRASchedStrategy(const MachineSchedContext *C) : GenericScheduler(C) {}
153153

154+
void initialize(ScheduleDAGMI *DAG) override;
155+
154156
void enterRegion(MachineBasicBlock *BB, MachineBasicBlock::iterator Begin,
155157
MachineBasicBlock::iterator End, unsigned RegionInstrs);
156158
void leaveRegion(const SUnit &ExitSU);
@@ -182,6 +184,8 @@ class AIEPreRASchedStrategy : public GenericScheduler {
182184
/// pressure-reducing SU to be scheduled first.
183185
/// SUDelayerMap[0] = 2 means that SU(0) is waiting on SU(2).
184186
std::vector<unsigned> SUDelayerMap;
187+
188+
std::vector<unsigned> PSetThresholds;
185189
};
186190

187191
/// An extension to ScheduleDAGMI that provides callbacks on region entry/exit

llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -150,34 +150,33 @@ define void @test_huge_stack(i32 noundef %n) #0 {
150150
; CHECK-NEXT: mov p2, p7
151151
; CHECK-NEXT: mov p6, p7
152152
; CHECK-NEXT: paddb [p0], m0
153-
; CHECK-NEXT: paddb [p2], #-32
153+
; CHECK-NEXT: paddb [p6], #-32
154+
; CHECK-NEXT: movxm m0, #-40032
154155
; CHECK-NEXT: st r0, [p0, #0]
155156
; CHECK-NEXT: lda r0, [p0, #0]
156-
; CHECK-NEXT: mov r16, p2
157+
; CHECK-NEXT: paddb [p2], m0
157158
; CHECK-NEXT: mov p0, sp
158-
; CHECK-NEXT: st p0, [p2, #0]
159-
; CHECK-NEXT: mov p0, p1
160-
; CHECK-NEXT: mov p2, p7
161-
; CHECK-NEXT: paddb [p2], #-24
159+
; CHECK-NEXT: mov r16, p2
160+
; CHECK-NEXT: st p0, [p6, #0]
161+
; CHECK-NEXT: mov p0, p7
162+
; CHECK-NEXT: paddb [p0], #-24
162163
; CHECK-NEXT: lshl r2, r0, r2
163-
; CHECK-NEXT: st r0, [p2], #4
164+
; CHECK-NEXT: st r0, [p0], #4
164165
; CHECK-NEXT: add r2, r2, #31
165-
; CHECK-NEXT: st r1, [p2, #0]
166-
; CHECK-NEXT: and r2, r2, r3
166+
; CHECK-NEXT: st r1, [p0, #0]
167167
; CHECK-NEXT: jl #extern_call
168-
; CHECK-NEXT: mov m0, r2 // Delay Slot 5
169-
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 4
170-
; CHECK-NEXT: movxm m0, #-40032 // Delay Slot 3
171-
; CHECK-NEXT: paddb [p6], m0 // Delay Slot 2
168+
; CHECK-NEXT: mov p0, p1 // Delay Slot 5
169+
; CHECK-NEXT: and r2, r2, r3 // Delay Slot 4
170+
; CHECK-NEXT: mov m0, r2 // Delay Slot 3
171+
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 2
172172
; CHECK-NEXT: mov sp, p1 // Delay Slot 1
173173
; CHECK-NEXT: nopb ; nopa ; nops ; jl #extern_call; nopv
174174
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
175175
; CHECK-NEXT: nop // Delay Slot 4
176176
; CHECK-NEXT: nop // Delay Slot 3
177177
; CHECK-NEXT: nop // Delay Slot 2
178-
; CHECK-NEXT: mov p0, p6 // Delay Slot 1
179-
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov p0, r16; nopv
180-
; CHECK-NEXT: lda p0, [p0, #0]; nopx
178+
; CHECK-NEXT: mov p0, r16 // Delay Slot 1
179+
; CHECK-NEXT: lda p0, [p6, #0]; nopx
181180
; CHECK-NEXT: nop
182181
; CHECK-NEXT: nop
183182
; CHECK-NEXT: nop

llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -150,34 +150,33 @@ define void @test_huge_stack(i32 noundef %n) #0 {
150150
; CHECK-NEXT: mov p2, p7
151151
; CHECK-NEXT: mov p6, p7
152152
; CHECK-NEXT: paddb [p0], m0
153-
; CHECK-NEXT: paddb [p2], #-32
153+
; CHECK-NEXT: paddb [p6], #-32
154+
; CHECK-NEXT: movxm m0, #-40032
154155
; CHECK-NEXT: st r0, [p0, #0]
155156
; CHECK-NEXT: lda r0, [p0, #0]
156-
; CHECK-NEXT: mov r16, p2
157+
; CHECK-NEXT: paddb [p2], m0
157158
; CHECK-NEXT: mov p0, sp
158-
; CHECK-NEXT: st p0, [p2, #0]
159-
; CHECK-NEXT: mov p0, p1
160-
; CHECK-NEXT: mov p2, p7
161-
; CHECK-NEXT: paddb [p2], #-24
159+
; CHECK-NEXT: mov r16, p2
160+
; CHECK-NEXT: st p0, [p6, #0]
161+
; CHECK-NEXT: mov p0, p7
162+
; CHECK-NEXT: paddb [p0], #-24
162163
; CHECK-NEXT: lshl r2, r0, r2
163-
; CHECK-NEXT: st r0, [p2], #4
164+
; CHECK-NEXT: st r0, [p0], #4
164165
; CHECK-NEXT: add r2, r2, #31
165-
; CHECK-NEXT: st r1, [p2, #0]
166-
; CHECK-NEXT: and r2, r2, r3
166+
; CHECK-NEXT: st r1, [p0, #0]
167167
; CHECK-NEXT: jl #extern_call
168-
; CHECK-NEXT: mov m0, r2 // Delay Slot 5
169-
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 4
170-
; CHECK-NEXT: movxm m0, #-40032 // Delay Slot 3
171-
; CHECK-NEXT: paddb [p6], m0 // Delay Slot 2
168+
; CHECK-NEXT: mov p0, p1 // Delay Slot 5
169+
; CHECK-NEXT: and r2, r2, r3 // Delay Slot 4
170+
; CHECK-NEXT: mov m0, r2 // Delay Slot 3
171+
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 2
172172
; CHECK-NEXT: mov sp, p1 // Delay Slot 1
173173
; CHECK-NEXT: nopb ; nopa ; nops ; jl #extern_call; nopv
174174
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
175175
; CHECK-NEXT: nop // Delay Slot 4
176176
; CHECK-NEXT: nop // Delay Slot 3
177177
; CHECK-NEXT: nop // Delay Slot 2
178-
; CHECK-NEXT: mov p0, p6 // Delay Slot 1
179-
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov p0, r16; nopv
180-
; CHECK-NEXT: lda p0, [p0, #0]; nopx
178+
; CHECK-NEXT: mov p0, r16 // Delay Slot 1
179+
; CHECK-NEXT: lda p0, [p6, #0]; nopx
181180
; CHECK-NEXT: nop
182181
; CHECK-NEXT: nop
183182
; CHECK-NEXT: nop

0 commit comments

Comments
 (0)