Skip to content

Commit 09adb45

Browse files
committed
Include support for Add/Mul/Or/And/Xor Binary Operations
1 parent 44a3268 commit 09adb45

File tree

3 files changed

+97
-42
lines changed

3 files changed

+97
-42
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 90 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2996,10 +2996,11 @@ bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
29962996

29972997
std::queue<Value *> InstWorklist;
29982998
Value *InitEEV = nullptr;
2999-
Intrinsic::ID CommonOp = 0;
30002999

3001-
bool IsFirstCallInst = true;
3002-
bool ShouldBeCallInst = true;
3000+
unsigned int CommonCallOp = 0, CommonBinOp = 0;
3001+
3002+
bool IsFirstCallOrBinInst = true;
3003+
bool ShouldBeCallOrBinInst = true;
30033004

30043005
SmallVector<Value *, 3> PrevVecV(3, nullptr);
30053006
int64_t ShuffleMaskHalf = -1, ExpectedShuffleMaskHalf = 1;
@@ -3032,24 +3033,24 @@ bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
30323033
return false;
30333034

30343035
if (auto *CallI = dyn_cast<CallInst>(CI)) {
3035-
if (!ShouldBeCallInst || !PrevVecV[2])
3036+
if (!ShouldBeCallOrBinInst || !PrevVecV[2])
30363037
return false;
30373038

3038-
if (!IsFirstCallInst &&
3039+
if (!IsFirstCallOrBinInst &&
30393040
any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
30403041
return false;
30413042

3042-
if (CallI != (IsFirstCallInst ? PrevVecV[2] : PrevVecV[0]))
3043+
if (CallI != (IsFirstCallOrBinInst ? PrevVecV[2] : PrevVecV[0]))
30433044
return false;
3044-
IsFirstCallInst = false;
3045+
IsFirstCallOrBinInst = false;
30453046

30463047
auto *II = dyn_cast<IntrinsicInst>(CallI);
30473048
if (!II)
30483049
return false;
30493050

3050-
if (!CommonOp)
3051-
CommonOp = II->getIntrinsicID();
3052-
if (II->getIntrinsicID() != CommonOp)
3051+
if (!CommonCallOp)
3052+
CommonCallOp = II->getIntrinsicID();
3053+
if (II->getIntrinsicID() != CommonCallOp)
30533054
return false;
30543055

30553056
switch (II->getIntrinsicID()) {
@@ -3066,14 +3067,52 @@ bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
30663067
default:
30673068
return false;
30683069
}
3069-
ShouldBeCallInst ^= 1;
3070+
ShouldBeCallOrBinInst ^= 1;
3071+
3072+
if (!isa<ShuffleVectorInst>(PrevVecV[1]))
3073+
std::swap(PrevVecV[0], PrevVecV[1]);
3074+
InstWorklist.push(PrevVecV[1]);
3075+
InstWorklist.push(PrevVecV[0]);
3076+
} else if (auto *BinOp = dyn_cast<BinaryOperator>(CI)) {
3077+
if (!ShouldBeCallOrBinInst || !PrevVecV[2])
3078+
return false;
3079+
3080+
if (!IsFirstCallOrBinInst &&
3081+
any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
3082+
return false;
3083+
3084+
if (BinOp != (IsFirstCallOrBinInst ? PrevVecV[2] : PrevVecV[0]))
3085+
return false;
3086+
IsFirstCallOrBinInst = false;
3087+
3088+
if (!CommonBinOp)
3089+
CommonBinOp = CI->getOpcode();
3090+
if (CI->getOpcode() != CommonBinOp)
3091+
return false;
3092+
3093+
switch (CI->getOpcode()) {
3094+
case BinaryOperator::Add:
3095+
case BinaryOperator::Mul:
3096+
case BinaryOperator::Or:
3097+
case BinaryOperator::And:
3098+
case BinaryOperator::Xor: {
3099+
auto *Op0 = BinOp->getOperand(0);
3100+
auto *Op1 = BinOp->getOperand(1);
3101+
PrevVecV[0] = Op0;
3102+
PrevVecV[1] = Op1;
3103+
break;
3104+
}
3105+
default:
3106+
return false;
3107+
}
3108+
ShouldBeCallOrBinInst ^= 1;
30703109

30713110
if (!isa<ShuffleVectorInst>(PrevVecV[1]))
30723111
std::swap(PrevVecV[0], PrevVecV[1]);
30733112
InstWorklist.push(PrevVecV[1]);
30743113
InstWorklist.push(PrevVecV[0]);
30753114
} else if (auto *SVInst = dyn_cast<ShuffleVectorInst>(CI)) {
3076-
if (ShouldBeCallInst ||
3115+
if (ShouldBeCallOrBinInst ||
30773116
any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
30783117
return false;
30793118

@@ -3100,13 +3139,13 @@ bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
31003139
ShuffleMaskHalf *= 2;
31013140
if (ExpectedShuffleMaskHalf == VecSize)
31023141
break;
3103-
ShouldBeCallInst ^= 1;
3142+
ShouldBeCallOrBinInst ^= 1;
31043143
} else {
31053144
return false;
31063145
}
31073146
}
31083147

3109-
if (ShouldBeCallInst)
3148+
if (ShouldBeCallOrBinInst)
31103149
return false;
31113150

31123151
assert(VecSize != -1 && ExpectedShuffleMaskHalf == VecSize &&
@@ -3121,21 +3160,43 @@ bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
31213160
assert(FinalVecVTy && "Expected non-null value for Vector Type");
31223161

31233162
Intrinsic::ID ReducedOp = 0;
3124-
switch (CommonOp) {
3125-
case Intrinsic::umin:
3126-
ReducedOp = Intrinsic::vector_reduce_umin;
3127-
break;
3128-
case Intrinsic::umax:
3129-
ReducedOp = Intrinsic::vector_reduce_umax;
3130-
break;
3131-
case Intrinsic::smin:
3132-
ReducedOp = Intrinsic::vector_reduce_smin;
3133-
break;
3134-
case Intrinsic::smax:
3135-
ReducedOp = Intrinsic::vector_reduce_smax;
3136-
break;
3137-
default:
3138-
return false;
3163+
if (CommonCallOp) {
3164+
switch (CommonCallOp) {
3165+
case Intrinsic::umin:
3166+
ReducedOp = Intrinsic::vector_reduce_umin;
3167+
break;
3168+
case Intrinsic::umax:
3169+
ReducedOp = Intrinsic::vector_reduce_umax;
3170+
break;
3171+
case Intrinsic::smin:
3172+
ReducedOp = Intrinsic::vector_reduce_smin;
3173+
break;
3174+
case Intrinsic::smax:
3175+
ReducedOp = Intrinsic::vector_reduce_smax;
3176+
break;
3177+
default:
3178+
return false;
3179+
}
3180+
} else if (CommonBinOp) {
3181+
switch (CommonBinOp) {
3182+
case BinaryOperator::Add:
3183+
ReducedOp = Intrinsic::vector_reduce_add;
3184+
break;
3185+
case BinaryOperator::Mul:
3186+
ReducedOp = Intrinsic::vector_reduce_mul;
3187+
break;
3188+
case BinaryOperator::Or:
3189+
ReducedOp = Intrinsic::vector_reduce_or;
3190+
break;
3191+
case BinaryOperator::And:
3192+
ReducedOp = Intrinsic::vector_reduce_and;
3193+
break;
3194+
case BinaryOperator::Xor:
3195+
ReducedOp = Intrinsic::vector_reduce_xor;
3196+
break;
3197+
default:
3198+
return false;
3199+
}
31393200
}
31403201

31413202
InstructionCost OrigCost = 0;

llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -280,14 +280,12 @@ define i1 @cmp_lt_gt(double %a, double %b, double %c) {
280280
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i64 0
281281
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer
282282
; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
283-
; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], splat (double 0x3EB0C6F7A0B5ED8D)
284-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison>
285-
; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i1> [[TMP8]], [[SHIFT]]
286-
; CHECK-NEXT: [[OR_COND:%.*]] = extractelement <2 x i1> [[TMP9]], i64 0
283+
; CHECK-NEXT: [[TMP8:%.*]] = fcmp uge <2 x double> [[TMP7]], splat (double 0x3EB0C6F7A0B5ED8D)
284+
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i1> [[TMP8]] to i2
285+
; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i2 [[TMP9]], 0
287286
; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00)
288-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <2 x i1> [[TMP10]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison>
289-
; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i1> [[TMP10]], [[SHIFT2]]
290-
; CHECK-NEXT: [[OR_COND1_NOT:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0
287+
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i1> [[TMP10]] to i2
288+
; CHECK-NEXT: [[OR_COND1_NOT:%.*]] = icmp ne i2 [[TMP11]], 0
291289
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i1 false, i1 [[OR_COND1_NOT]]
292290
; CHECK-NEXT: ret i1 [[RETVAL_0]]
293291
;

llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@ define i1 @fcmp_and_v2f64(<2 x double> %a) {
1313
;
1414
; AVX-LABEL: @fcmp_and_v2f64(
1515
; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A:%.*]], <double 4.200000e+01, double -8.000000e+00>
16-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison>
17-
; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]]
18-
; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
16+
; AVX-NEXT: [[R:%.*]] = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> [[TMP1]])
1917
; AVX-NEXT: ret i1 [[R]]
2018
;
2119
%e1 = extractelement <2 x double> %a, i32 0
@@ -117,9 +115,7 @@ define i1 @fcmp_and_v2f64_multiuse(<2 x double> %a) {
117115
; AVX-NEXT: [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
118116
; AVX-NEXT: call void @use(double [[E1]])
119117
; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A]], <double 4.200000e+01, double -8.000000e+00>
120-
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison>
121-
; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]]
122-
; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
118+
; AVX-NEXT: [[R:%.*]] = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> [[TMP1]])
123119
; AVX-NEXT: call void @use(i1 [[R]])
124120
; AVX-NEXT: ret i1 [[R]]
125121
;

0 commit comments

Comments
 (0)