Skip to content

Commit 0b8179b

Browse files
[ValueTracking] Improve Bitcast handling to match SDAG (#125935)
Closes #125228
1 parent c0c7146 commit 0b8179b

File tree

3 files changed

+34
-18
lines changed

3 files changed

+34
-18
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,8 @@ static void computeKnownBitsFromOperator(const Operator *I,
13461346
isa<ScalableVectorType>(I->getType()))
13471347
break;
13481348

1349+
unsigned NumElts = DemandedElts.getBitWidth();
1350+
bool IsLE = Q.DL.isLittleEndian();
13491351
// Look through a cast from narrow vector elements to wider type.
13501352
// Examples: v4i32 -> v2i64, v3i8 -> v24
13511353
unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
@@ -1364,7 +1366,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
13641366
//
13651367
// The known bits of each sub-element are then inserted into place
13661368
// (dependent on endian) to form the full result of known bits.
1367-
unsigned NumElts = DemandedElts.getBitWidth();
13681369
unsigned SubScale = BitWidth / SubBitWidth;
13691370
APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
13701371
for (unsigned i = 0; i != NumElts; ++i) {
@@ -1376,10 +1377,32 @@ static void computeKnownBitsFromOperator(const Operator *I,
13761377
for (unsigned i = 0; i != SubScale; ++i) {
13771378
computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q,
13781379
Depth + 1);
1379-
unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1380+
unsigned ShiftElt = IsLE ? i : SubScale - 1 - i;
13801381
Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
13811382
}
13821383
}
1384+
// Look through a cast from wider vector elements to narrow type.
1385+
// Examples: v2i64 -> v4i32
1386+
if (SubBitWidth % BitWidth == 0) {
1387+
unsigned SubScale = SubBitWidth / BitWidth;
1388+
KnownBits KnownSrc(SubBitWidth);
1389+
APInt SubDemandedElts =
1390+
APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
1391+
computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q,
1392+
Depth + 1);
1393+
1394+
Known.Zero.setAllBits();
1395+
Known.One.setAllBits();
1396+
for (unsigned i = 0; i != SubScale; ++i) {
1397+
if (DemandedElts[i]) {
1398+
unsigned Shifts = IsLE ? i : NumElts - 1 - i;
1399+
unsigned Offset = (Shifts % SubScale) * BitWidth;
1400+
Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset));
1401+
if (Known.isUnknown())
1402+
break;
1403+
}
1404+
}
1405+
}
13831406
break;
13841407
}
13851408
case Instruction::SExt: {

llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3732,7 +3732,6 @@ define <4 x i64> @test_avx2_psrl_0() {
37323732
ret <4 x i64> %16
37333733
}
37343734

3735-
; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds.
37363735
define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
37373736
; CHECK-LABEL: @PR125228(
37383737
; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63)
@@ -3741,7 +3740,8 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
37413740
; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8>
37423741
; CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
37433742
; CHECK-NEXT: [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64>
3744-
; CHECK-NEXT: [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]])
3743+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer
3744+
; CHECK-NEXT: [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]]
37453745
; CHECK-NEXT: [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> <i32 0, i32 3>
37463746
; CHECK-NEXT: ret <2 x i64> [[SHUFP_UNCASTED]]
37473747
;

llvm/test/Transforms/InstCombine/bitcast-known-bits.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg
1212
; CHECK-NEXT: [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16>
1313
; CHECK-NEXT: [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2)
1414
; CHECK-NEXT: [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8>
15-
; CHECK-NEXT: [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52)
16-
; CHECK-NEXT: ret <16 x i8> [[AND7]]
15+
; CHECK-NEXT: ret <16 x i8> [[BITCAST6]]
1716
;
1817
%and = and <16 x i8> %arg1, splat (i8 3)
1918
%and3 = and <16 x i8> %arg2, splat (i8 48)
@@ -33,8 +32,7 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg) {
3332
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
3433
; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
3534
; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
36-
; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
37-
; CHECK-NEXT: ret <16 x i8> [[AND3]]
35+
; CHECK-NEXT: ret <16 x i8> [[BITCAST2]]
3836
;
3937
%and = and <16 x i8> %arg, splat (i8 15)
4038
%shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -53,8 +51,7 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg) {
5351
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
5452
; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
5553
; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
56-
; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
57-
; CHECK-NEXT: ret <16 x i8> [[AND3]]
54+
; CHECK-NEXT: ret <16 x i8> [[BITCAST2]]
5855
;
5956
%and = and <16 x i8> %arg, splat (i8 15)
6057
%shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -70,8 +67,7 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg) {
7067
; CHECK-SAME: <8 x i16> [[ARG:%.*]]) {
7168
; CHECK-NEXT: [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15)
7269
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8>
73-
; CHECK-NEXT: [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1)
74-
; CHECK-NEXT: ret <16 x i8> [[AND]]
70+
; CHECK-NEXT: ret <16 x i8> [[BITCAST1]]
7571
;
7672
%lshr = lshr <8 x i16> %arg, splat (i16 15)
7773
%bitcast1 = bitcast <8 x i16> %lshr to <16 x i8>
@@ -88,7 +84,8 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i
8884
; CHECK-NEXT: [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]])
8985
; CHECK-NEXT: [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32>
9086
; CHECK-NEXT: [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0
91-
; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]])
87+
; CHECK-NEXT: [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]]
88+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0
9289
; CHECK-NEXT: ret { i32, i1 } [[TMP1]]
9390
;
9491
%call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1)
@@ -110,11 +107,7 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> %
110107
; CHECK-NEXT: [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8>
111108
; CHECK-NEXT: [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]]
112109
; CHECK-NEXT: [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]]
113-
; CHECK-NEXT: [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16>
114-
; CHECK-NEXT: [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8)
115-
; CHECK-NEXT: [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8>
116-
; CHECK-NEXT: [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]]
117-
; CHECK-NEXT: ret <16 x i8> [[ADD13]]
110+
; CHECK-NEXT: ret <16 x i8> [[ADD14]]
118111
;
119112
%shl6 = shl <8 x i16> %arg2, splat (i16 8)
120113
%bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8>

0 commit comments

Comments
 (0)