diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 73320b556f825..a17417cb5189c 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1346,8 +1346,6 @@ static void computeKnownBitsFromOperator(const Operator *I, isa(I->getType())) break; - unsigned NumElts = DemandedElts.getBitWidth(); - bool IsLE = Q.DL.isLittleEndian(); // Look through a cast from narrow vector elements to wider type. // Examples: v4i32 -> v2i64, v3i8 -> v24 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); @@ -1366,6 +1364,7 @@ static void computeKnownBitsFromOperator(const Operator *I, // // The known bits of each sub-element are then inserted into place // (dependent on endian) to form the full result of known bits. + unsigned NumElts = DemandedElts.getBitWidth(); unsigned SubScale = BitWidth / SubBitWidth; APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); for (unsigned i = 0; i != NumElts; ++i) { @@ -1377,32 +1376,10 @@ static void computeKnownBitsFromOperator(const Operator *I, for (unsigned i = 0; i != SubScale; ++i) { computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q, Depth + 1); - unsigned ShiftElt = IsLE ? i : SubScale - 1 - i; + unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); } } - // Look through a cast from wider vector elements to narrow type. - // Examples: v2i64 -> v4i32 - if (SubBitWidth % BitWidth == 0) { - unsigned SubScale = SubBitWidth / BitWidth; - KnownBits KnownSrc(SubBitWidth); - APInt SubDemandedElts = - APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale); - computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q, - Depth + 1); - - Known.Zero.setAllBits(); - Known.One.setAllBits(); - for (unsigned i = 0; i != SubScale; ++i) { - if (DemandedElts[i]) { - unsigned Shifts = IsLE ? i : NumElts - 1 - i; - unsigned Offset = (Shifts % SubScale) * BitWidth; - Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset)); - if (Known.isUnknown()) - break; - } - } - } break; } case Instruction::SExt: { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll index cc252ae53803b..db56080a3ea2b 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -3732,6 +3732,7 @@ define <4 x i64> @test_avx2_psrl_0() { ret <4 x i64> %16 } +; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds. define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) { ; CHECK-LABEL: @PR125228( ; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63) @@ -3740,8 +3741,7 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) { ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8> ; CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]] +; CHECK-NEXT: [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]]) ; CHECK-NEXT: [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> ; CHECK-NEXT: ret <2 x i64> [[SHUFP_UNCASTED]] ; diff --git a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll index 65b43df752f76..3e47e775e3a28 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll @@ -12,7 +12,8 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg ; CHECK-NEXT: [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16> ; CHECK-NEXT: [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2) ; CHECK-NEXT: [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[BITCAST6]] +; CHECK-NEXT: [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52) +; CHECK-NEXT: ret <16 x i8> [[AND7]] ; %and = and <16 x i8> %arg1, splat (i8 3) %and3 = and <16 x i8> %arg2, splat (i8 48) @@ -32,7 +33,8 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg) { ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16> ; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4) ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[BITCAST2]] +; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16) +; CHECK-NEXT: ret <16 x i8> [[AND3]] ; %and = and <16 x i8> %arg, splat (i8 15) %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> @@ -51,7 +53,8 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg) { ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16> ; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4) ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[BITCAST2]] +; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16) +; CHECK-NEXT: ret <16 x i8> [[AND3]] ; %and = and <16 x i8> %arg, splat (i8 15) %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> @@ -67,7 +70,8 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg) { ; CHECK-SAME: <8 x i16> [[ARG:%.*]]) { ; CHECK-NEXT: [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15) ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8> -; CHECK-NEXT: ret <16 x i8> [[BITCAST1]] +; CHECK-NEXT: [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1) +; CHECK-NEXT: ret <16 x i8> [[AND]] ; %lshr = lshr <8 x i16> %arg, splat (i16 15) %bitcast1 = bitcast <8 x i16> %lshr to <16 x i8> @@ -84,8 +88,7 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i ; CHECK-NEXT: [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]]) ; CHECK-NEXT: [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32> ; CHECK-NEXT: [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0 -; CHECK-NEXT: [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]]) ; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1) @@ -107,7 +110,11 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> % ; CHECK-NEXT: [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8> ; CHECK-NEXT: [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]] ; CHECK-NEXT: [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]] -; CHECK-NEXT: ret <16 x i8> [[ADD14]] +; CHECK-NEXT: [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16> +; CHECK-NEXT: [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8) +; CHECK-NEXT: [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8> +; CHECK-NEXT: [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]] +; CHECK-NEXT: ret <16 x i8> [[ADD13]] ; %shl6 = shl <8 x i16> %arg2, splat (i16 8) %bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8>