Skip to content

Commit c8ab74b

Browse files
committed
Merge remote-tracking branch 'origin/main' into mlir-llvm-indirect-br
* origin/main: [mlir][vector] Prevent folding non memref-type gather into maskedload (llvm#135371) [mlir][SMT] remove custom forall/exists builder because of asan memory leak [bazel] Fix a typo (llvm#135460) [bazel] Add support for SMT Dialect (llvm#135454) [clang] ASTImporter: fix SubstNonTypeTemplateParmExpr source location (llvm#135450) [RISCV] Don't fold offsets into auipc if offset is larger than the reference global variable. (llvm#135297) [gn] port d1fd977 [NFC][LLVM] Apply std::move to object being pushed back in findSymbolCommon (llvm#135290) [AMDGPU] Teach iterative schedulers about IGLP (llvm#134953)
2 parents 79561f0 + 357e380 commit c8ab74b

File tree

20 files changed

+1191
-258
lines changed

20 files changed

+1191
-258
lines changed

clang/lib/AST/ASTImporter.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -8931,14 +8931,14 @@ ExpectedStmt ASTNodeImporter::VisitSubstNonTypeTemplateParmExpr(
89318931
SubstNonTypeTemplateParmExpr *E) {
89328932
Error Err = Error::success();
89338933
auto ToType = importChecked(Err, E->getType());
8934-
auto ToExprLoc = importChecked(Err, E->getExprLoc());
8934+
auto ToNameLoc = importChecked(Err, E->getNameLoc());
89358935
auto ToAssociatedDecl = importChecked(Err, E->getAssociatedDecl());
89368936
auto ToReplacement = importChecked(Err, E->getReplacement());
89378937
if (Err)
89388938
return std::move(Err);
89398939

89408940
return new (Importer.getToContext()) SubstNonTypeTemplateParmExpr(
8941-
ToType, E->getValueKind(), ToExprLoc, ToReplacement, ToAssociatedDecl,
8941+
ToType, E->getValueKind(), ToNameLoc, ToReplacement, ToAssociatedDecl,
89428942
E->getIndex(), E->getPackIndex(), E->isReferenceParameter(),
89438943
E->getFinal());
89448944
}

llvm/lib/DebugInfo/Symbolize/Symbolize.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
257257
if (LineInfo.FileName != DILineInfo::BadString) {
258258
if (Opts.Demangle)
259259
LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
260-
Result.push_back(LineInfo);
260+
Result.push_back(std::move(LineInfo));
261261
}
262262
}
263263

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -616,12 +616,15 @@ createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
616616
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
617617
if (ST.shouldClusterStores())
618618
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
619+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
619620
return DAG;
620621
}
621622

622623
static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
623-
return new GCNIterativeScheduler(C,
624-
GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
624+
auto *DAG = new GCNIterativeScheduler(
625+
C, GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
626+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
627+
return DAG;
625628
}
626629

627630
static ScheduleDAGInstrs *
@@ -632,6 +635,7 @@ createIterativeILPMachineScheduler(MachineSchedContext *C) {
632635
if (ST.shouldClusterStores())
633636
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
634637
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
638+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
635639
return DAG;
636640
}
637641

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp

+40-9
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "GCNIterativeScheduler.h"
15+
#include "AMDGPUIGroupLP.h"
1516
#include "GCNSchedStrategy.h"
1617
#include "SIMachineFunctionInfo.h"
1718

@@ -118,21 +119,42 @@ void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
118119
}
119120
#endif
120121

122+
void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) {
123+
bool HasIGLPInstrs = false;
124+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(TII);
125+
for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I++) {
126+
if (SII->isIGLPMutationOnly(I->getOpcode())) {
127+
HasIGLPInstrs = true;
128+
break;
129+
}
130+
}
131+
132+
if (HasIGLPInstrs) {
133+
SavedMutations.clear();
134+
SavedMutations.swap(Mutations);
135+
auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry
136+
: AMDGPU::SchedulingPhase::Initial;
137+
138+
addMutation(createIGroupLPDAGMutation(SchedPhase));
139+
}
140+
}
141+
121142
// DAG builder helper
122143
class GCNIterativeScheduler::BuildDAG {
123144
GCNIterativeScheduler &Sch;
124145
SmallVector<SUnit *, 8> TopRoots;
125146

126147
SmallVector<SUnit*, 8> BotRoots;
127148
public:
128-
BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
129-
: Sch(_Sch) {
149+
BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false)
150+
: Sch(_Sch) {
130151
auto *BB = R.Begin->getParent();
131152
Sch.BaseClass::startBlock(BB);
132153
Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
133-
154+
Sch.swapIGLPMutations(R, IsReentry);
134155
Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
135156
/*TrackLaneMask*/true);
157+
Sch.postProcessDAG();
136158
Sch.Topo.InitDAGTopologicalSorting();
137159
Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
138160
}
@@ -432,13 +454,15 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
432454

433455
auto NewOcc = TargetOcc;
434456
for (auto *R : Regions) {
457+
// Always build the DAG to add mutations
458+
BuildDAG DAG(*R, *this);
459+
435460
if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
436-
break;
461+
continue;
437462

438463
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
439464
printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
440465

441-
BuildDAG DAG(*R, *this);
442466
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
443467
const auto MaxRP = getSchedulePressure(*R, MinSchedule);
444468
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
@@ -469,8 +493,11 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
469493
sortRegionsByPressure(TgtOcc);
470494
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
471495

472-
if (TryMaximizeOccupancy && Occ < TgtOcc)
496+
bool IsReentry = false;
497+
if (TryMaximizeOccupancy && Occ < TgtOcc) {
473498
Occ = tryMaximizeOccupancy(TgtOcc);
499+
IsReentry = true;
500+
}
474501

475502
// This is really weird but for some magic scheduling regions twice
476503
// gives performance improvement
@@ -489,7 +516,8 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
489516
LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
490517
for (auto *R : Regions) {
491518
OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
492-
519+
IsReentry |= I > 0;
520+
swapIGLPMutations(*R, IsReentry);
493521
Ovr.schedule();
494522
const auto RP = getRegionPressure(*R);
495523
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
@@ -556,8 +584,11 @@ void GCNIterativeScheduler::scheduleILP(
556584
sortRegionsByPressure(TgtOcc);
557585
auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
558586

559-
if (TryMaximizeOccupancy && Occ < TgtOcc)
587+
bool IsReentry = false;
588+
if (TryMaximizeOccupancy && Occ < TgtOcc) {
560589
Occ = tryMaximizeOccupancy(TgtOcc);
590+
IsReentry = true;
591+
}
561592

562593
TgtOcc = std::min(Occ, TgtOcc);
563594
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
@@ -566,7 +597,7 @@ void GCNIterativeScheduler::scheduleILP(
566597

567598
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
568599
for (auto *R : Regions) {
569-
BuildDAG DAG(*R, *this);
600+
BuildDAG DAG(*R, *this, IsReentry);
570601
const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
571602

572603
const auto RP = getSchedulePressure(*R, ILPSchedule);

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h

+3
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
7777
const StrategyKind Strategy;
7878
mutable GCNUpwardRPTracker UPTracker;
7979

80+
std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
81+
8082
class BuildDAG;
8183
class OverrideLegacyStrategy;
8284

@@ -91,6 +93,7 @@ class GCNIterativeScheduler : public ScheduleDAGMILive {
9193
return getRegionPressure(R.Begin, R.End);
9294
}
9395

96+
void swapIGLPMutations(const Region &R, bool IsReentry);
9497
void setBestSchedule(Region &R,
9598
ScheduleRef Schedule,
9699
const GCNRegPressure &MaxRP = GCNRegPressure());

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

+5-9
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,6 @@ static void getRegisterPressures(
188188
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
189189
}
190190

191-
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
192-
// mutations, requiring all other mutations to be disabled.
193-
static bool isIGLPMutationOnly(unsigned Opcode) {
194-
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
195-
}
196-
197191
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
198192
bool AtTop,
199193
const RegPressureTracker &RPTracker,
@@ -1161,9 +1155,10 @@ bool GCNSchedStage::initGCNRegion() {
11611155
Unsched.reserve(DAG.NumRegionInstrs);
11621156
if (StageID == GCNSchedStageID::OccInitialSchedule ||
11631157
StageID == GCNSchedStageID::ILPInitialSchedule) {
1158+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG.TII);
11641159
for (auto &I : DAG) {
11651160
Unsched.push_back(&I);
1166-
if (isIGLPMutationOnly(I.getOpcode()))
1161+
if (SII->isIGLPMutationOnly(I.getOpcode()))
11671162
DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
11681163
}
11691164
} else {
@@ -2047,8 +2042,9 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
20472042
}
20482043

20492044
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
2050-
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
2051-
return isIGLPMutationOnly(MI->getOpcode());
2045+
const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);
2046+
return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {
2047+
return SII->isIGLPMutationOnly(MI->getOpcode());
20522048
});
20532049
}
20542050

llvm/lib/Target/AMDGPU/SIInstrInfo.h

+6
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
985985

986986
bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
987987

988+
// Return true if the instruction is mutually exclusive with all non-IGLP DAG
989+
// mutations, requiring all other mutations to be disabled.
990+
bool isIGLPMutationOnly(unsigned Opcode) const {
991+
return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
992+
}
993+
988994
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
989995
switch (Opcode) {
990996
case AMDGPU::S_WAITCNT_soft:

llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp

+26-9
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
3535
bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
3636

3737
bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
38-
void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
38+
bool foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
3939
int64_t Offset);
4040
bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
4141
MachineInstr &TailAdd, Register GSReg);
@@ -142,9 +142,21 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
142142
// Update the offset in Hi and Lo instructions.
143143
// Delete the tail instruction and update all the uses to use the
144144
// output from Lo.
145-
void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
145+
bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
146146
MachineInstr &Tail, int64_t Offset) {
147147
assert(isInt<32>(Offset) && "Unexpected offset");
148+
149+
// If Hi is an AUIPC, don't fold the offset if it is outside the bounds of
150+
// the global object. The object may be within 2GB of the PC, but addresses
151+
// outside of the object might not be.
152+
if (Hi.getOpcode() == RISCV::AUIPC && Hi.getOperand(1).isGlobal()) {
153+
const GlobalValue *GV = Hi.getOperand(1).getGlobal();
154+
Type *Ty = GV->getValueType();
155+
if (!Ty->isSized() || Offset < 0 ||
156+
(uint64_t)Offset > GV->getDataLayout().getTypeAllocSize(Ty))
157+
return false;
158+
}
159+
148160
// Put the offset back in Hi and the Lo
149161
Hi.getOperand(1).setOffset(Offset);
150162
if (Hi.getOpcode() != RISCV::AUIPC)
@@ -156,6 +168,7 @@ void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
156168
Tail.eraseFromParent();
157169
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
158170
<< " " << Hi << " " << Lo;);
171+
return true;
159172
}
160173

161174
// Detect patterns for large offsets that are passed into an ADD instruction.
@@ -205,7 +218,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
205218
// Handle rs1 of ADDI is X0.
206219
if (AddiReg == RISCV::X0) {
207220
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
208-
foldOffset(Hi, Lo, TailAdd, OffLo);
221+
if (!foldOffset(Hi, Lo, TailAdd, OffLo))
222+
return false;
209223
OffsetTail.eraseFromParent();
210224
return true;
211225
}
@@ -226,7 +240,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
226240
return false;
227241
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
228242
<< " " << OffsetLui);
229-
foldOffset(Hi, Lo, TailAdd, Offset);
243+
if (!foldOffset(Hi, Lo, TailAdd, Offset))
244+
return false;
230245
OffsetTail.eraseFromParent();
231246
OffsetLui.eraseFromParent();
232247
return true;
@@ -235,7 +250,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
235250
// exists.
236251
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
237252
int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
238-
foldOffset(Hi, Lo, TailAdd, Offset);
253+
if (!foldOffset(Hi, Lo, TailAdd, Offset))
254+
return false;
239255
OffsetTail.eraseFromParent();
240256
return true;
241257
}
@@ -294,7 +310,8 @@ bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
294310
Offset = (uint64_t)Offset << ShAmt;
295311

296312
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
297-
foldOffset(Hi, Lo, TailShXAdd, Offset);
313+
if (!foldOffset(Hi, Lo, TailShXAdd, Offset))
314+
return false;
298315
OffsetTail.eraseFromParent();
299316
return true;
300317
}
@@ -327,15 +344,15 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
327344
if (TailTail.getOpcode() == RISCV::ADDI) {
328345
Offset += TailTail.getOperand(2).getImm();
329346
LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
330-
foldOffset(Hi, Lo, TailTail, Offset);
347+
if (!foldOffset(Hi, Lo, TailTail, Offset))
348+
return false;
331349
Tail.eraseFromParent();
332350
return true;
333351
}
334352
}
335353

336354
LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
337-
foldOffset(Hi, Lo, Tail, Offset);
338-
return true;
355+
return foldOffset(Hi, Lo, Tail, Offset);
339356
}
340357
case RISCV::ADD:
341358
// The offset is too large to fit in the immediate field of ADDI.

llvm/test/CodeGen/AMDGPU/iglp.opt.reentry.ll

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 < %s | FileCheck %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 -misched=gcn-iterative-max-occupancy-experimental < %s | FileCheck %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -O3 -misched=gcn-iterative-ilp < %s | FileCheck %s
24

35
; Test should not result in build failure
46
; CHECK-LABEL: shouldNotReApply

0 commit comments

Comments
 (0)