Skip to content

[LV] Use VPReductionRecipe for partial reductions #146073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: users/SamTebbs33/fhahn-vpbundle-recipe
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ class TargetTransformInfo {
/// Get the kind of extension that an instruction represents.
LLVM_ABI static PartialReductionExtendKind
getPartialReductionExtendKind(Instruction *I);
LLVM_ABI static PartialReductionExtendKind
getPartialReductionExtendKind(Instruction::CastOps CastOpc);

/// Construct a TTI object using a type implementing the \c Concept
/// API below.
Expand Down
19 changes: 15 additions & 4 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1000,11 +1000,22 @@ InstructionCost TargetTransformInfo::getShuffleCost(
}

TargetTransformInfo::PartialReductionExtendKind
TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) {
if (isa<SExtInst>(I))
return PR_SignExtend;
if (isa<ZExtInst>(I))
TargetTransformInfo::getPartialReductionExtendKind(
Instruction::CastOps CastOpc) {
switch (CastOpc) {
case Instruction::CastOps::ZExt:
return PR_ZeroExtend;
case Instruction::CastOps::SExt:
return PR_SignExtend;
default:
return PR_None;
}
}

TargetTransformInfo::PartialReductionExtendKind
TargetTransformInfo::getPartialReductionExtendKind(Instruction *I) {
if (auto *Cast = dyn_cast<CastInst>(I))
return getPartialReductionExtendKind(Cast->getOpcode());
return PR_None;
}

Expand Down
34 changes: 20 additions & 14 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7050,7 +7050,8 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
}
// The VPlan-based cost model is more accurate for partial reduction and
// comparing against the legacy cost isn't desirable.
if (isa<VPPartialReductionRecipe>(&R))
if (auto *VPR = dyn_cast<VPReductionRecipe>(&R);
VPR && VPR->isPartialReduction())
return true;

/// If a VPlan transform folded a recipe to one producing a single-scalar,
Expand Down Expand Up @@ -8280,11 +8281,14 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));

// If the PHI is used by a partial reduction, set the scale factor.
unsigned ScaleFactor =
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
bool UseInLoopReduction = CM.isInLoopReduction(Phi);
bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
auto ScaleFactor =
(UseOrderedReductions || UseInLoopReduction)
? 0
: getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV,
UseOrderedReductions, ScaleFactor);
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
Expand Down Expand Up @@ -8348,7 +8352,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
VPValue *Accumulator = Operands[1];
VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
if (isa<VPReductionPHIRecipe>(BinOpRecipe) ||
isa<VPPartialReductionRecipe>(BinOpRecipe))
(isa<VPReductionRecipe>(BinOpRecipe) &&
cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
std::swap(BinOp, Accumulator);

unsigned ReductionOpcode = Reduction->getOpcode();
Expand All @@ -8369,12 +8374,10 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
Cond = getBlockInMask(Builder.getInsertBlock());
VPValue *Zero =
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
}
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
ScaleFactor, Reduction);

return new VPReductionRecipe(RecurKind::Add, FastMathFlags(), Reduction,
Accumulator, BinOp, Cond, false, ScaleFactor);
}

void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
Expand Down Expand Up @@ -9154,9 +9157,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
: FastMathFlags();
bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
unsigned VFScaleFactor = !UseOrderedReductions;
auto *RedRecipe = new VPReductionRecipe(
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
UseOrderedReductions, VFScaleFactor, CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
Expand Down Expand Up @@ -9190,8 +9195,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Don't output selects for partial reductions because they have an output
// with fewer lanes than the VF. So the operands of the select would have
// different numbers of lanes. Partial reductions mask the input instead.
auto *RR = dyn_cast<VPReductionRecipe>(OrigExitingVPV->getDefiningRecipe());
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
(!RR || !RR->isPartialReduction())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
Expand Down
143 changes: 56 additions & 87 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
case VPRecipeBase::VPPartialReductionSC:
return true;
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPInterleaveSC:
Expand Down Expand Up @@ -2182,34 +2181,37 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Descriptor for the reduction.
const RecurrenceDescriptor &RdxDesc;

/// The phi is part of an in-loop reduction.
bool IsInLoop;

/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
bool IsOrdered;

/// When expanding the reduction PHI, the plan's VF element count is divided
/// by this factor to form the reduction phi's VF.
unsigned VFScaleFactor = 1;
/// The scaling factor, relative to the VF, that this recipe's output is
/// divided by.
/// For outer-loop reductions this is equal to 1.
/// For in-loop reductions this is equal to 0, to specify that this is equal
/// to the VF (which may not be known yet). For partial-reductions this is
/// equal to another scalar value.
unsigned VFScaleFactor;

public:
/// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
/// RdxDesc.
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
VPValue &Start, bool IsInLoop = false,
bool IsOrdered = false, unsigned VFScaleFactor = 1)
VPValue &Start, bool IsOrdered = false,
unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
RdxDesc(RdxDesc), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || isInLoop()) &&
"IsOrdered requires the reduction to be in-loop");
assert(((!isInLoop() && !IsOrdered) || isInLoop()) &&
"Invalid VFScaleFactor");
}

~VPReductionPHIRecipe() override = default;

VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
RdxDesc, *getOperand(0), IsInLoop,
IsOrdered, VFScaleFactor);
auto *R =
new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
*getOperand(0), IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
}
Expand All @@ -2235,8 +2237,10 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }

/// Returns true, if the phi is part of an in-loop reduction.
bool isInLoop() const { return IsInLoop; }
/// Returns true if the phi is part of an in-loop reduction.
bool isInLoop() const { return VFScaleFactor == 0; }

bool isPartialReduction() const { return VFScaleFactor > 1; }

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
Expand Down Expand Up @@ -2409,23 +2413,32 @@ class VPInterleaveRecipe : public VPRecipeBase {
Instruction *getInsertPos() const { return IG->getInsertPos(); }
};

/// A recipe to represent inloop reduction operations, performing a reduction on
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
/// A recipe to represent inloop, ordered or partial reduction operations. It
/// performs a reduction on a vector operand into a scalar (vector in the case
/// of a partial reduction) value, and adds the result to a chain. The Operands
/// are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPRecipeWithIRFlags {
/// The recurrence kind for the reduction in question.
RecurKind RdxKind;
bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;
/// The scaling factor, relative to the VF, that this recipe's output is
/// divided by.
/// For outer-loop reductions this is equal to 1.
/// For in-loop reductions this is equal to 0, to specify that this is equal
/// to the VF (which may not be known yet).
/// For partial-reductions this is equal to another scalar value.
unsigned VFScaleFactor;

protected:
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
FastMathFlags FMFs, Instruction *I,
ArrayRef<VPValue *> Operands, VPValue *CondOp,
bool IsOrdered, DebugLoc DL)
bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
IsOrdered(IsOrdered) {
IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || VFScaleFactor == 0) && "Invalid scale factor");
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
Expand All @@ -2436,24 +2449,24 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
public:
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = {})
bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}
IsOrdered, VFScaleFactor, DL) {}

VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = {})
bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}
IsOrdered, VFScaleFactor, DL) {}

~VPReductionRecipe() override = default;

VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxKind, getFastMathFlags(),
getUnderlyingInstr(), getChainOp(), getVecOp(),
getCondOp(), IsOrdered, getDebugLoc());
return new VPReductionRecipe(
RdxKind, getFastMathFlags(), getUnderlyingInstr(), getChainOp(),
getVecOp(), getCondOp(), IsOrdered, VFScaleFactor, getDebugLoc());
}

static inline bool classof(const VPRecipeBase *R) {
Expand Down Expand Up @@ -2485,6 +2498,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
bool isOrdered() const { return IsOrdered; };
/// Return true if the in-loop reduction is conditional.
bool isConditional() const { return IsConditional; };
/// Return true if the reduction is a partial reduction.
bool isPartialReduction() const { return VFScaleFactor > 1; }
/// The VPValue of the scalar Chain being accumulated.
VPValue *getChainOp() const { return getOperand(0); }
/// The VPValue of the vector value to be reduced.
Expand All @@ -2493,65 +2508,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
VPValue *getCondOp() const {
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
}
};

/// A recipe for forming partial reductions. In the loop, an accumulator and
/// vector operand are added together and passed to the next iteration as the
/// next accumulator. After the loop body, the accumulator is reduced to a
/// scalar value.
class VPPartialReductionRecipe : public VPReductionRecipe {
unsigned Opcode;

/// The divisor by which the VF of this recipe's output should be divided
/// during execution.
unsigned VFScaleFactor;

public:
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
: VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
VFScaleFactor, ReductionInst) {}
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
VPValue *Cond, unsigned ScaleFactor,
Instruction *ReductionInst = nullptr)
: VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
FastMathFlags(), ReductionInst,
ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
Opcode(Opcode), VFScaleFactor(ScaleFactor) {
[[maybe_unused]] auto *AccumulatorRecipe =
getChainOp()->getDefiningRecipe();
assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
"Unexpected operand order for partial reduction recipe");
}
~VPPartialReductionRecipe() override = default;

VPPartialReductionRecipe *clone() override {
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
getCondOp(), VFScaleFactor,
getUnderlyingInstr());
}

VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)

/// Generate the reduction in the loop.
void execute(VPTransformState &State) override;

/// Return the cost of this VPPartialReductionRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

/// Get the binary op's opcode.
unsigned getOpcode() const { return Opcode; }

/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};

/// A recipe to represent inloop reduction operations with vector-predication
Expand All @@ -2567,7 +2525,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
R.isOrdered(), 0, DL) {}

~VPReductionEVLRecipe() override = default;

Expand Down Expand Up @@ -2744,6 +2702,12 @@ class VPSingleDefBundleRecipe : public VPSingleDefRecipe {
/// vector operands, performing a reduction.add on the result, and adding
/// the scalar result to a chain.
MulAccumulateReduction,
/// Represent an inloop multiply-accumulate reduction, multiplying the
/// extended vector operands, negating the multiplication, performing a
/// reduction.add
/// on the result, and adding
/// the scalar result to a chain.
ExtNegatedMulAccumulateReduction,
};

/// Type of the bundle.
Expand All @@ -2768,6 +2732,11 @@ class VPSingleDefBundleRecipe : public VPSingleDefRecipe {
VPWidenRecipe *Mul, VPReductionRecipe *Red)
: VPSingleDefBundleRecipe(BundleTypes::ExtMulAccumulateReduction,
{Ext0, Ext1, Mul, Red}) {}
VPSingleDefBundleRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
VPWidenRecipe *Mul, VPWidenRecipe *Sub,
VPReductionRecipe *Red)
: VPSingleDefBundleRecipe(BundleTypes::ExtNegatedMulAccumulateReduction,
{Ext0, Ext1, Mul, Sub, Red}) {}

~VPSingleDefBundleRecipe() override {
SmallPtrSet<VPRecipeBase *, 4> Seen;
Expand Down
Loading
Loading