Skip to content

Commit 0f35a70

Browse files
committed
[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 1)
As reported in #141034 SelectionDAG::getNode had some unexpected behaviors when trying to create vectors with UNDEF elements. Since we treat both UNDEF and POISON as undefined (when using isUndef()) we can't just fold away INSERT_VECTOR_ELT/INSERT_SUBVECTOR based on isUndef(), as that could make the resulting vector more poisonous. Same kind of bug existed in DAGCombiner::visitINSERT_SUBVECTOR. Here are some examples: This fold was done even if vec[idx] was POISON: INSERT_VECTOR_ELT vec, UNDEF, idx -> vec This fold was done even if any of vec[idx..idx+size] was POISON: INSERT_SUBVECTOR vec, UNDEF, idx -> vec This fold was done even if the elements not extracted from vec could be POISON: sub = EXTRACT_SUBVECTOR vec, idx INSERT_SUBVECTOR UNDEF, sub, idx -> vec With this patch we avoid such folds unless we can prove that the result isn't more poisonous when eliminating the insert. This patch in itself result in some regressions. Goal is to try to deal with those regressions in follow up commits. Fixes #141034
1 parent eda3161 commit 0f35a70

17 files changed

+757
-248
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23182,6 +23182,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2318223182
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
2318323183

2318423184
// Insert into out-of-bounds element is undefined.
23185+
// Code below relies on that we handle this special case early.
2318523186
if (IndexC && VT.isFixedLengthVector() &&
2318623187
IndexC->getZExtValue() >= VT.getVectorNumElements())
2318723188
return DAG.getUNDEF(VT);
@@ -23192,14 +23193,28 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2319223193
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
2319323194
return InVec;
2319423195

23195-
if (!IndexC) {
23196-
// If this is variable insert to undef vector, it might be better to splat:
23197-
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23198-
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23199-
return DAG.getSplat(VT, DL, InVal);
23200-
return SDValue();
23196+
// If this is variable insert to undef vector, it might be better to splat:
23197+
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23198+
if (!IndexC && InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23199+
return DAG.getSplat(VT, DL, InVal);
23200+
23201+
// Try to drop insert of UNDEF/POISON elements. This is also done in getNode,
23202+
// but we also do it as a DAG combine since for example simplifications into
23203+
// SPLAT_VECTOR/BUILD_VECTOR may turn poison elements into undef/zero etc, and
23204+
// then suddenly the InVec is guaranteed to not be poison.
23205+
if (InVal.isUndef()) {
23206+
if (IndexC && VT.isFixedLengthVector()) {
23207+
APInt EltMask = APInt::getOneBitSet(VT.getVectorNumElements(),
23208+
IndexC->getZExtValue());
23209+
if (DAG.isGuaranteedNotToBePoison(InVec, EltMask))
23210+
return InVec;
23211+
}
23212+
return DAG.getFreeze(InVec);
2320123213
}
2320223214

23215+
if (!IndexC)
23216+
return SDValue();
23217+
2320323218
if (VT.isScalableVector())
2320423219
return SDValue();
2320523220

@@ -27639,18 +27654,42 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2763927654
SDValue N2 = N->getOperand(2);
2764027655
uint64_t InsIdx = N->getConstantOperandVal(2);
2764127656

27642-
// If inserting an UNDEF, just return the original vector.
27643-
if (N1.isUndef())
27644-
return N0;
27657+
// If inserting an UNDEF, just return the original vector (unless it makes the
27658+
// result more poisonous).
27659+
if (N1.isUndef()) {
27660+
if (N1.getOpcode() == ISD::POISON)
27661+
return N0;
27662+
if (VT.isFixedLengthVector()) {
27663+
unsigned SubVecNumElts = N1.getValueType().getVectorNumElements();
27664+
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27665+
InsIdx + SubVecNumElts);
27666+
if (DAG.isGuaranteedNotToBePoison(N0, EltMask))
27667+
return N0;
27668+
}
27669+
return DAG.getFreeze(N0);
27670+
}
2764527671

27646-
// If this is an insert of an extracted vector into an undef vector, we can
27647-
// just use the input to the extract if the types match, and can simplify
27672+
// If this is an insert of an extracted vector into an undef/poison vector, we
27673+
// can just use the input to the extract if the types match, and can simplify
2764827674
// in some cases even if they don't.
2764927675
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
2765027676
N1.getOperand(1) == N2) {
27677+
EVT N1VT = N1.getValueType();
2765127678
EVT SrcVT = N1.getOperand(0).getValueType();
27652-
if (SrcVT == VT)
27653-
return N1.getOperand(0);
27679+
if (SrcVT == VT) {
27680+
// Need to ensure that result isn't more poisonous if skipping both the
27681+
// extract+insert.
27682+
if (N0.getOpcode() == ISD::POISON)
27683+
return N1.getOperand(0);
27684+
if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
27685+
unsigned SubVecNumElts = N1VT.getVectorNumElements();
27686+
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
27687+
InsIdx + SubVecNumElts);
27688+
if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
27689+
return N1.getOperand(0);
27690+
} else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
27691+
return N1.getOperand(0);
27692+
}
2765427693
// TODO: To remove the zero check, need to adjust the offset to
2765527694
// a multiple of the new src type.
2765627695
if (isNullConstant(N2)) {

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7952,23 +7952,42 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
79527952
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
79537953
// for scalable vectors where we will generate appropriate code to
79547954
// deal with out-of-bounds cases correctly.
7955-
if (N3C && N1.getValueType().isFixedLengthVector() &&
7956-
N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
7955+
if (N3C && VT.isFixedLengthVector() &&
7956+
N3C->getZExtValue() >= VT.getVectorNumElements())
79577957
return getUNDEF(VT);
79587958

79597959
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
79607960
if (N3.isUndef())
79617961
return getUNDEF(VT);
79627962

7963-
// If the inserted element is an UNDEF, just use the input vector.
7964-
if (N2.isUndef())
7963+
// If inserting poison, just use the input vector.
7964+
if (N2.getOpcode() == ISD::POISON)
79657965
return N1;
79667966

7967+
// Inserting undef into undef/poison is still undef.
7968+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
7969+
return getUNDEF(VT);
7970+
7971+
// If the inserted element is an UNDEF, just use the input vector.
7972+
// But not if skipping the insert could make the result more poisonous.
7973+
if (N2.isUndef()) {
7974+
if (N3C && VT.isFixedLengthVector()) {
7975+
APInt EltMask =
7976+
APInt::getOneBitSet(VT.getVectorNumElements(), N3C->getZExtValue());
7977+
if (isGuaranteedNotToBePoison(N1, EltMask))
7978+
return N1;
7979+
} else if (isGuaranteedNotToBePoison(N1))
7980+
return N1;
7981+
}
79677982
break;
79687983
}
79697984
case ISD::INSERT_SUBVECTOR: {
7970-
// Inserting undef into undef is still undef.
7971-
if (N1.isUndef() && N2.isUndef())
7985+
// If inserting poison, just use the input vector,
7986+
if (N2.getOpcode() == ISD::POISON)
7987+
return N1;
7988+
7989+
// Inserting undef into undef/poison is still undef.
7990+
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
79727991
return getUNDEF(VT);
79737992

79747993
EVT N2VT = N2.getValueType();
@@ -7997,11 +8016,37 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
79978016
if (VT == N2VT)
79988017
return N2;
79998018

8000-
// If this is an insert of an extracted vector into an undef vector, we
8001-
// can just use the input to the extract.
8019+
// If this is an insert of an extracted vector into an undef/poison vector,
8020+
// we can just use the input to the extract. But not if skipping the
8021+
// extract+insert could make the result more poisonous.
80028022
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
8003-
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
8004-
return N2.getOperand(0);
8023+
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) {
8024+
if (N1.getOpcode() == ISD::POISON)
8025+
return N2.getOperand(0);
8026+
if (VT.isFixedLengthVector() && N2VT.isFixedLengthVector()) {
8027+
unsigned LoBit = N3->getAsZExtVal();
8028+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8029+
APInt EltMask =
8030+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8031+
if (isGuaranteedNotToBePoison(N2.getOperand(0), ~EltMask))
8032+
return N2.getOperand(0);
8033+
} else if (isGuaranteedNotToBePoison(N2.getOperand(0)))
8034+
return N2.getOperand(0);
8035+
}
8036+
8037+
// If the inserted subvector is UNDEF, just use the input vector.
8038+
// But not if skipping the insert could make the result more poisonous.
8039+
if (N2.isUndef()) {
8040+
if (VT.isFixedLengthVector()) {
8041+
unsigned LoBit = N3->getAsZExtVal();
8042+
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
8043+
APInt EltMask =
8044+
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
8045+
if (isGuaranteedNotToBePoison(N1, EltMask))
8046+
return N1;
8047+
} else if (isGuaranteedNotToBePoison(N1))
8048+
return N1;
8049+
}
80058050
break;
80068051
}
80078052
case ISD::BITCAST:

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3433,8 +3433,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34333433
break;
34343434
}
34353435
case ISD::INSERT_SUBVECTOR: {
3436-
// Demand any elements from the subvector and the remainder from the src its
3437-
// inserted into.
3436+
// Demand any elements from the subvector and the remainder from the src it
3437+
// is inserted into.
34383438
SDValue Src = Op.getOperand(0);
34393439
SDValue Sub = Op.getOperand(1);
34403440
uint64_t Idx = Op.getConstantOperandVal(2);
@@ -3443,6 +3443,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
34433443
APInt DemandedSrcElts = DemandedElts;
34443444
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
34453445

3446+
// If none of the sub operand elements are demanded, bypass the insert.
3447+
if (!DemandedSubElts)
3448+
return TLO.CombineTo(Op, Src);
3449+
34463450
APInt SubUndef, SubZero;
34473451
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
34483452
Depth + 1))

llvm/test/CodeGen/AArch64/arm64-build-vector.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define void @widen_f16_build_vector(ptr %addr) {
5757
; CHECK-LABEL: widen_f16_build_vector:
5858
; CHECK: // %bb.0:
5959
; CHECK-NEXT: mov w8, #13294 // =0x33ee
60-
; CHECK-NEXT: movk w8, #13294, lsl #16
61-
; CHECK-NEXT: str w8, [x0]
60+
; CHECK-NEXT: dup v0.4h, w8
61+
; CHECK-NEXT: str s0, [x0]
6262
; CHECK-NEXT: ret
6363
store <2 x half> <half 0xH33EE, half 0xH33EE>, ptr %addr, align 2
6464
ret void

llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,14 @@ define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unname
9494
; CHECK-LABEL: combine_undef_add_8xi32:
9595
; CHECK: // %bb.0:
9696
; CHECK-NEXT: fmov s1, w0
97-
; CHECK-NEXT: movi v0.2d, #0000000000000000
97+
; CHECK-NEXT: dup v0.4s, w8
9898
; CHECK-NEXT: mov v1.s[1], w1
99-
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
10099
; CHECK-NEXT: mov v1.s[2], w2
101100
; CHECK-NEXT: mov v1.s[3], w3
102-
; CHECK-NEXT: xtn v2.4h, v1.4s
103-
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
104-
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
105-
; CHECK-NEXT: mov v1.d[1], v0.d[0]
106-
; CHECK-NEXT: uaddlv s0, v1.8h
101+
; CHECK-NEXT: uzp2 v2.8h, v1.8h, v0.8h
102+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
103+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
104+
; CHECK-NEXT: uaddlv s0, v0.8h
107105
; CHECK-NEXT: fmov w0, s0
108106
; CHECK-NEXT: ret
109107
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0

0 commit comments

Comments
 (0)