Skip to content

[VectorCombine] Scalarize vector intrinsics with scalar arguments #146530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 31 additions & 28 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
Expand Down Expand Up @@ -1091,12 +1091,14 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
return false;

// TODO: Allow intrinsics with different argument types
// TODO: Allow intrinsics with scalar arguments
if (II && (!isTriviallyVectorizable(II->getIntrinsicID()) ||
!all_of(II->args(), [&II](Value *Arg) {
return Arg->getType() == II->getType();
})))
return false;
if (II) {
if (!isTriviallyVectorizable(II->getIntrinsicID()))
return false;
for (auto [Idx, Arg] : enumerate(II->args()))
if (Arg->getType() != II->getType() &&
!isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Idx, &TTI))
return false;
}

// Do not convert the vector condition of a vector select into a scalar
// condition. That may cause problems for codegen because of differences in
Expand All @@ -1109,19 +1111,18 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {

// Match constant vectors or scalars being inserted into constant vectors:
// vec_op [VecC0 | (inselt VecC0, V0, Index)], ...
SmallVector<Constant *> VecCs;
SmallVector<Value *> ScalarOps;
SmallVector<Value *> VecCs, ScalarOps;
std::optional<uint64_t> Index;

auto Ops = II ? II->args() : I.operands();
for (Value *Op : Ops) {
for (auto [OpNum, Op] : enumerate(Ops)) {
Constant *VecC;
Value *V;
uint64_t InsIdx = 0;
VectorType *OpTy = cast<VectorType>(Op->getType());
if (match(Op, m_InsertElt(m_Constant(VecC), m_Value(V),
m_ConstantInt(InsIdx)))) {
if (match(Op.get(), m_InsertElt(m_Constant(VecC), m_Value(V),
m_ConstantInt(InsIdx)))) {
// Bail if any inserts are out of bounds.
VectorType *OpTy = cast<VectorType>(Op->getType());
if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
return false;
// All inserts must have the same index.
Expand All @@ -1132,7 +1133,11 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
return false;
VecCs.push_back(VecC);
ScalarOps.push_back(V);
} else if (match(Op, m_Constant(VecC))) {
} else if (II && isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
OpNum, &TTI)) {
VecCs.push_back(Op.get());
ScalarOps.push_back(Op.get());
} else if (match(Op.get(), m_Constant(VecC))) {
VecCs.push_back(VecC);
ScalarOps.push_back(nullptr);
} else {
Expand Down Expand Up @@ -1176,25 +1181,27 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
// Fold the vector constants in the original vectors into a new base vector to
// get more accurate cost modelling.
Value *NewVecC = nullptr;
TargetFolder Folder(*DL);
if (CI)
NewVecC = ConstantFoldCompareInstOperands(CI->getPredicate(), VecCs[0],
VecCs[1], *DL);
NewVecC = Folder.FoldCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
else if (UO)
NewVecC = ConstantFoldUnaryOpOperand(Opcode, VecCs[0], *DL);
NewVecC =
Folder.FoldUnOpFMF(UO->getOpcode(), VecCs[0], UO->getFastMathFlags());
else if (BO)
NewVecC = ConstantFoldBinaryOpOperands(Opcode, VecCs[0], VecCs[1], *DL);
NewVecC = Folder.FoldBinOp(BO->getOpcode(), VecCs[0], VecCs[1]);
else if (II->arg_size() == 2)
NewVecC = ConstantFoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0],
VecCs[1], II->getType(), II);
NewVecC = Folder.FoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0],
VecCs[1], II->getType(), &I);
Comment on lines +1184 to +1194
Copy link
Contributor Author

@lukel97 lukel97 Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've switched the API here to use ConstantFolder which basically calls the same underlying ConstantFoldFoo functions, the only difference being that it also checks to see if the arguments are constant which allows us to make VecCs Values instead of Constants.


// Get cost estimate for the insert element. This cost will factor into
// both sequences.
InstructionCost OldCost = VectorOpCost;
InstructionCost NewCost =
ScalarOpCost + TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
CostKind, *Index, NewVecC);
for (auto [Op, VecC, Scalar] : zip(Ops, VecCs, ScalarOps)) {
if (!Scalar)
for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {
if (!Scalar || (II && isVectorIntrinsicWithScalarOpAtArg(
II->getIntrinsicID(), Idx, &TTI)))
continue;
InstructionCost InsertCost = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, *Index, VecC, Scalar);
Expand Down Expand Up @@ -1238,16 +1245,12 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {

// Create a new base vector if the constant folding failed.
if (!NewVecC) {
SmallVector<Value *> VecCValues;
VecCValues.reserve(VecCs.size());
append_range(VecCValues, VecCs);
if (CI)
NewVecC = Builder.CreateCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
else if (UO || BO)
NewVecC = Builder.CreateNAryOp(Opcode, VecCValues);
NewVecC = Builder.CreateNAryOp(Opcode, VecCs);
else
NewVecC =
Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCValues);
NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCs);
}
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, *Index);
replaceValue(I, *Insert);
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,12 @@ define <vscale x 4 x float> @fma_scalable(float %x, float %y, float %z) {
ret <vscale x 4 x float> %v
}

; TODO: We should be able to scalarize this if we preserve the scalar argument.
define <4 x float> @scalar_argument(float %x) {
; CHECK-LABEL: define <4 x float> @scalar_argument(
; CHECK-SAME: float [[X:%.*]]) {
; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[X_INSERT]], i32 42)
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 42)
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 42)
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
Expand Down
Loading