-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Use reverse iteration in CodeGenPrepare #145484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Pierre-vh
wants to merge
1
commit into
users/pierre-vh/rm-promote-i32-cgp
Choose a base branch
from
users/pierre-vh/reverse-iterate-cgp
base: users/pierre-vh/rm-promote-i32-cgp
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -15,6 +15,7 @@ | |||||
#include "AMDGPU.h" | ||||||
#include "AMDGPUTargetMachine.h" | ||||||
#include "SIModeRegisterDefaults.h" | ||||||
#include "llvm/ADT/SetVector.h" | ||||||
#include "llvm/Analysis/AssumptionCache.h" | ||||||
#include "llvm/Analysis/ConstantFolding.h" | ||||||
#include "llvm/Analysis/TargetLibraryInfo.h" | ||||||
|
@@ -109,6 +110,7 @@ class AMDGPUCodeGenPrepareImpl | |||||
bool FlowChanged = false; | ||||||
mutable Function *SqrtF32 = nullptr; | ||||||
mutable Function *LdexpF32 = nullptr; | ||||||
mutable SetVector<Value *> DeadVals; | ||||||
|
||||||
DenseMap<const PHINode *, bool> BreakPhiNodesCache; | ||||||
|
||||||
|
@@ -285,28 +287,19 @@ bool AMDGPUCodeGenPrepareImpl::run() { | |||||
BreakPhiNodesCache.clear(); | ||||||
bool MadeChange = false; | ||||||
|
||||||
Function::iterator NextBB; | ||||||
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; FI = NextBB) { | ||||||
BasicBlock *BB = &*FI; | ||||||
NextBB = std::next(FI); | ||||||
|
||||||
BasicBlock::iterator Next; | ||||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; | ||||||
I = Next) { | ||||||
Next = std::next(I); | ||||||
|
||||||
MadeChange |= visit(*I); | ||||||
|
||||||
if (Next != E) { // Control flow changed | ||||||
BasicBlock *NextInstBB = Next->getParent(); | ||||||
if (NextInstBB != BB) { | ||||||
BB = NextInstBB; | ||||||
E = BB->end(); | ||||||
FE = F.end(); | ||||||
} | ||||||
} | ||||||
for (BasicBlock &BB : reverse(F)) { | ||||||
for (Instruction &I : make_early_inc_range(reverse(BB))) { | ||||||
if (!DeadVals.contains(&I)) | ||||||
MadeChange |= visit(I); | ||||||
} | ||||||
} | ||||||
|
||||||
while (!DeadVals.empty()) { | ||||||
RecursivelyDeleteTriviallyDeadInstructions( | ||||||
DeadVals.pop_back_val(), TLI, /*MSSAU*/ nullptr, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
[&](Value *V) { DeadVals.remove(V); }); | ||||||
} | ||||||
|
||||||
return MadeChange; | ||||||
} | ||||||
|
||||||
|
@@ -426,7 +419,7 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const { | |||||
Value *NewVal = insertValues(Builder, Ty, ResultVals); | ||||||
NewVal->takeName(&I); | ||||||
I.replaceAllUsesWith(NewVal); | ||||||
I.eraseFromParent(); | ||||||
DeadVals.insert(&I); | ||||||
|
||||||
return true; | ||||||
} | ||||||
|
@@ -500,10 +493,10 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const { | |||||
FoldedT, FoldedF); | ||||||
NewSelect->takeName(&BO); | ||||||
BO.replaceAllUsesWith(NewSelect); | ||||||
BO.eraseFromParent(); | ||||||
DeadVals.insert(&BO); | ||||||
if (CastOp) | ||||||
CastOp->eraseFromParent(); | ||||||
Sel->eraseFromParent(); | ||||||
DeadVals.insert(CastOp); | ||||||
DeadVals.insert(Sel); | ||||||
return true; | ||||||
} | ||||||
|
||||||
|
@@ -900,7 +893,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) { | |||||
if (NewVal) { | ||||||
FDiv.replaceAllUsesWith(NewVal); | ||||||
NewVal->takeName(&FDiv); | ||||||
RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI); | ||||||
DeadVals.insert(&FDiv); | ||||||
} | ||||||
|
||||||
return true; | ||||||
|
@@ -1310,7 +1303,8 @@ within the byte are all 0. | |||||
static bool tryNarrowMathIfNoOverflow(Instruction *I, | ||||||
const SITargetLowering *TLI, | ||||||
const TargetTransformInfo &TTI, | ||||||
const DataLayout &DL) { | ||||||
const DataLayout &DL, | ||||||
SetVector<Value *> &DeadVals) { | ||||||
unsigned Opc = I->getOpcode(); | ||||||
Type *OldType = I->getType(); | ||||||
|
||||||
|
@@ -1365,7 +1359,7 @@ static bool tryNarrowMathIfNoOverflow(Instruction *I, | |||||
|
||||||
Value *Zext = Builder.CreateZExt(Arith, OldType); | ||||||
I->replaceAllUsesWith(Zext); | ||||||
I->eraseFromParent(); | ||||||
DeadVals.insert(I); | ||||||
return true; | ||||||
} | ||||||
|
||||||
|
@@ -1376,7 +1370,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { | |||||
if (UseMul24Intrin && replaceMulWithMul24(I)) | ||||||
return true; | ||||||
if (tryNarrowMathIfNoOverflow(&I, ST.getTargetLowering(), | ||||||
TM.getTargetTransformInfo(F), DL)) | ||||||
TM.getTargetTransformInfo(F), DL, DeadVals)) | ||||||
return true; | ||||||
|
||||||
bool Changed = false; | ||||||
|
@@ -1441,7 +1435,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { | |||||
|
||||||
if (NewDiv) { | ||||||
I.replaceAllUsesWith(NewDiv); | ||||||
I.eraseFromParent(); | ||||||
DeadVals.insert(&I); | ||||||
Changed = true; | ||||||
} | ||||||
} | ||||||
|
@@ -1497,7 +1491,7 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) { | |||||
Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy); | ||||||
Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType()); | ||||||
I.replaceAllUsesWith(ValOrig); | ||||||
I.eraseFromParent(); | ||||||
DeadVals.insert(&I); | ||||||
return true; | ||||||
} | ||||||
|
||||||
|
@@ -1539,7 +1533,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) { | |||||
|
||||||
Fract->takeName(&I); | ||||||
I.replaceAllUsesWith(Fract); | ||||||
RecursivelyDeleteTriviallyDeadInstructions(&I, TLI); | ||||||
DeadVals.insert(&I); | ||||||
return true; | ||||||
} | ||||||
|
||||||
|
@@ -1827,7 +1821,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) { | |||||
} | ||||||
|
||||||
I.replaceAllUsesWith(Vec); | ||||||
I.eraseFromParent(); | ||||||
DeadVals.insert(&I); | ||||||
return true; | ||||||
} | ||||||
|
||||||
|
@@ -1908,7 +1902,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { | |||||
auto *Intrin = B.CreateIntrinsic( | ||||||
I.getType(), Intrinsic::amdgcn_addrspacecast_nonnull, {I.getOperand(0)}); | ||||||
I.replaceAllUsesWith(Intrin); | ||||||
I.eraseFromParent(); | ||||||
DeadVals.insert(&I); | ||||||
return true; | ||||||
} | ||||||
|
||||||
|
@@ -2005,16 +1999,10 @@ bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &I) { | |||||
Value *Fract = applyFractPat(Builder, FractArg); | ||||||
Fract->takeName(&I); | ||||||
I.replaceAllUsesWith(Fract); | ||||||
|
||||||
RecursivelyDeleteTriviallyDeadInstructions(&I, TLI); | ||||||
DeadVals.insert(&I); | ||||||
return true; | ||||||
} | ||||||
|
||||||
static bool isOneOrNegOne(const Value *Val) { | ||||||
const APFloat *C; | ||||||
return match(Val, m_APFloat(C)) && C->getExactLog2Abs() == 0; | ||||||
} | ||||||
|
||||||
// Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way. | ||||||
bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) { | ||||||
Type *Ty = Sqrt.getType()->getScalarType(); | ||||||
|
@@ -2035,18 +2023,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) { | |||||
if (ReqdAccuracy < 1.0f) | ||||||
return false; | ||||||
|
||||||
// FIXME: This is an ugly hack for this pass using forward iteration instead | ||||||
// of reverse. If it worked like a normal combiner, the rsq would form before | ||||||
// we saw a sqrt call. | ||||||
auto *FDiv = | ||||||
dyn_cast_or_null<FPMathOperator>(Sqrt.getUniqueUndroppableUser()); | ||||||
if (FDiv && FDiv->getOpcode() == Instruction::FDiv && | ||||||
FDiv->getFPAccuracy() >= 1.0f && | ||||||
canOptimizeWithRsq(FPOp, FDiv->getFastMathFlags(), SqrtFMF) && | ||||||
// TODO: We should also handle the arcp case for the fdiv with non-1 value | ||||||
isOneOrNegOne(FDiv->getOperand(0))) | ||||||
return false; | ||||||
|
||||||
Value *SrcVal = Sqrt.getOperand(0); | ||||||
bool CanTreatAsDAZ = canIgnoreDenormalInput(SrcVal, &Sqrt); | ||||||
|
||||||
|
@@ -2070,7 +2046,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) { | |||||
Value *NewSqrt = insertValues(Builder, Sqrt.getType(), ResultVals); | ||||||
NewSqrt->takeName(&Sqrt); | ||||||
Sqrt.replaceAllUsesWith(NewSqrt); | ||||||
Sqrt.eraseFromParent(); | ||||||
DeadVals.insert(&Sqrt); | ||||||
return true; | ||||||
} | ||||||
|
||||||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this needs to be a set, the iteration shouldn't revisit the same instruction twice (at least other IR combiner passes seem to all use vectors)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't use a set because I thought we could visit twice, I used one because I call the recursive delete function and I need to avoid cases where it may delete another instruction in that set (and then we visit a dead pointer).
I can use a vector + find of course if you prefer