Skip to content

Commit 185ae46

Browse files
committed
Fix issue 50142 by adding AnyTrueCombine
This introduces the fold (any_true (setcc <X> 0, eq)) to (not (all_true)), allowing potential extra fold of (not (not ...)) Introduces test simd-setcc-reductions and readjusts simd-vecreduce-bool
1 parent cdd9b6e commit 185ae46

File tree

3 files changed

+51
-36
lines changed

3 files changed

+51
-36
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3239,6 +3239,42 @@ static SDValue performBitcastCombine(SDNode *N,
32393239
return SDValue();
32403240
}
32413241

3242+
static SDValue performAnyTrueCombine(SDNode *N, SelectionDAG &DAG) {
3243+
// any_true (setcc <X>, 0, eq)
3244+
// => not (all_true X)
3245+
3246+
SDLoc DL(N);
3247+
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3248+
if (N->getConstantOperandVal(0) != Intrinsic::wasm_anytrue)
3249+
return SDValue();
3250+
3251+
SDValue SetCC = N->getOperand(1);
3252+
if (SetCC.getOpcode() != ISD::SETCC)
3253+
return SDValue();
3254+
3255+
SDValue LHS = SetCC->getOperand(0);
3256+
SDValue RHS = SetCC->getOperand(1);
3257+
ISD::CondCode Cond = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
3258+
EVT LT = LHS.getValueType();
3259+
unsigned NumElts = LT.getVectorNumElements();
3260+
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3261+
return SDValue();
3262+
3263+
EVT Width = MVT::getIntegerVT(128 / NumElts);
3264+
3265+
if (!isNullOrNullSplat(RHS) || Cond != ISD::SETEQ)
3266+
return SDValue();
3267+
3268+
SDValue Ret = DAG.getZExtOrTrunc(
3269+
DAG.getNode(
3270+
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3271+
{DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32),
3272+
DAG.getSExtOrTrunc(LHS, DL, LT.changeVectorElementType(Width))}),
3273+
DL, MVT::i1);
3274+
Ret = DAG.getNOT(DL, Ret, MVT::i1);
3275+
return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3276+
}
3277+
32423278
static SDValue performSETCCCombine(SDNode *N,
32433279
TargetLowering::DAGCombinerInfo &DCI) {
32443280
auto &DAG = DCI.DAG;
@@ -3400,8 +3436,11 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
34003436
return performVectorTruncZeroCombine(N, DCI);
34013437
case ISD::TRUNCATE:
34023438
return performTruncateCombine(N, DCI);
3403-
case ISD::INTRINSIC_WO_CHAIN:
3439+
case ISD::INTRINSIC_WO_CHAIN: {
3440+
if (auto AnyTrueCombine = performAnyTrueCombine(N, DCI.DAG))
3441+
return AnyTrueCombine;
34043442
return performLowerPartialReduction(N, DCI.DAG);
3443+
}
34053444
case ISD::MUL:
34063445
return performMulCombine(N, DCI.DAG);
34073446
}

llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,8 @@ define i32 @all_true_16_i8(<16 x i8> %v) {
77
; CHECK-LABEL: all_true_16_i8:
88
; CHECK: .functype all_true_16_i8 (v128) -> (i32)
99
; CHECK-NEXT: # %bb.0:
10-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
11-
; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0
12-
; CHECK-NEXT: v128.any_true $push2=, $pop1
13-
; CHECK-NEXT: i32.const $push3=, -1
14-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
15-
; CHECK-NEXT: i32.const $push5=, 1
16-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
17-
; CHECK-NEXT: return $pop6
10+
; CHECK-NEXT: i8x16.all_true $push0=, $0
11+
; CHECK-NEXT: return $pop0
1812
%1 = icmp eq <16 x i8> %v, zeroinitializer
1913
%2 = bitcast <16 x i1> %1 to i16
2014
%3 = icmp eq i16 %2, 0
@@ -27,14 +21,8 @@ define i32 @all_true_4_i32(<4 x i32> %v) {
2721
; CHECK-LABEL: all_true_4_i32:
2822
; CHECK: .functype all_true_4_i32 (v128) -> (i32)
2923
; CHECK-NEXT: # %bb.0:
30-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0
31-
; CHECK-NEXT: i32x4.eq $push1=, $0, $pop0
32-
; CHECK-NEXT: v128.any_true $push2=, $pop1
33-
; CHECK-NEXT: i32.const $push3=, -1
34-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
35-
; CHECK-NEXT: i32.const $push5=, 1
36-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
37-
; CHECK-NEXT: return $pop6
24+
; CHECK-NEXT: i32x4.all_true $push0=, $0
25+
; CHECK-NEXT: return $pop0
3826
%1 = icmp eq <4 x i32> %v, zeroinitializer
3927
%2 = bitcast <4 x i1> %1 to i4
4028
%3 = icmp eq i4 %2, 0
@@ -47,14 +35,8 @@ define i32 @all_true_8_i16(<8 x i16> %v) {
4735
; CHECK-LABEL: all_true_8_i16:
4836
; CHECK: .functype all_true_8_i16 (v128) -> (i32)
4937
; CHECK-NEXT: # %bb.0:
50-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0
51-
; CHECK-NEXT: i16x8.eq $push1=, $0, $pop0
52-
; CHECK-NEXT: v128.any_true $push2=, $pop1
53-
; CHECK-NEXT: i32.const $push3=, -1
54-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
55-
; CHECK-NEXT: i32.const $push5=, 1
56-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
57-
; CHECK-NEXT: return $pop6
38+
; CHECK-NEXT: i16x8.all_true $push0=, $0
39+
; CHECK-NEXT: return $pop0
5840
%1 = icmp eq <8 x i16> %v, zeroinitializer
5941
%2 = bitcast <8 x i1> %1 to i8
6042
%3 = icmp eq i8 %2, 0
@@ -88,14 +70,8 @@ define i32 @all_true_2_i64(<2 x i64> %v) {
8870
; CHECK-LABEL: all_true_2_i64:
8971
; CHECK: .functype all_true_2_i64 (v128) -> (i32)
9072
; CHECK-NEXT: # %bb.0:
91-
; CHECK-NEXT: v128.const $push0=, 0, 0
92-
; CHECK-NEXT: i64x2.eq $push1=, $0, $pop0
93-
; CHECK-NEXT: v128.any_true $push2=, $pop1
94-
; CHECK-NEXT: i32.const $push3=, -1
95-
; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3
96-
; CHECK-NEXT: i32.const $push5=, 1
97-
; CHECK-NEXT: i32.and $push6=, $pop4, $pop5
98-
; CHECK-NEXT: return $pop6
73+
; CHECK-NEXT: i64x2.all_true $push0=, $0
74+
; CHECK-NEXT: return $pop0
9975
%1 = icmp eq <2 x i64> %v, zeroinitializer
10076
%2 = bitcast <2 x i1> %1 to i2
10177
%3 = icmp eq i2 %2, 0

llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,9 +1086,9 @@ define i1 @test_cmp_v16i8(<16 x i8> %x) {
10861086
; CHECK-LABEL: test_cmp_v16i8:
10871087
; CHECK: .functype test_cmp_v16i8 (v128) -> (i32)
10881088
; CHECK-NEXT: # %bb.0:
1089-
; CHECK-NEXT: v128.const $push0=, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1090-
; CHECK-NEXT: i8x16.eq $push1=, $0, $pop0
1091-
; CHECK-NEXT: v128.any_true $push2=, $pop1
1089+
; CHECK-NEXT: i8x16.all_true $push0=, $0
1090+
; CHECK-NEXT: i32.const $push1=, 1
1091+
; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1
10921092
; CHECK-NEXT: return $pop2
10931093
%zero = icmp eq <16 x i8> %x, zeroinitializer
10941094
%ret = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %zero)

0 commit comments

Comments
 (0)