diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index a17417cb5189c..2c4d55eea1dda 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1346,6 +1346,8 @@ static void computeKnownBitsFromOperator(const Operator *I, isa(I->getType())) break; + unsigned NumElts = DemandedElts.getBitWidth(); + bool IsLE = Q.DL.isLittleEndian(); // Look through a cast from narrow vector elements to wider type. // Examples: v4i32 -> v2i64, v3i8 -> v24 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); @@ -1364,7 +1366,6 @@ static void computeKnownBitsFromOperator(const Operator *I, // // The known bits of each sub-element are then inserted into place // (dependent on endian) to form the full result of known bits. - unsigned NumElts = DemandedElts.getBitWidth(); unsigned SubScale = BitWidth / SubBitWidth; APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); for (unsigned i = 0; i != NumElts; ++i) { @@ -1376,10 +1377,32 @@ static void computeKnownBitsFromOperator(const Operator *I, for (unsigned i = 0; i != SubScale; ++i) { computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q, Depth + 1); - unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; + unsigned ShiftElt = IsLE ? i : SubScale - 1 - i; Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); } } + // Look through a cast from wider vector elements to narrow type. + // Examples: v2i64 -> v4i32 + if (SubBitWidth % BitWidth == 0) { + unsigned SubScale = SubBitWidth / BitWidth; + KnownBits KnownSrc(SubBitWidth); + APInt SubDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale); + computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q, + Depth + 1); + + Known.Zero.setAllBits(); + Known.One.setAllBits(); + for (unsigned i = 0; i != NumElts; ++i) { + if (DemandedElts[i]) { + unsigned Shifts = IsLE ? i : NumElts - 1 - i; + unsigned Offset = (Shifts % SubScale) * BitWidth; + Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset)); + if (Known.isUnknown()) + break; + } + } + } break; } case Instruction::SExt: { diff --git a/llvm/test/Transforms/Inline/bitcast-knownbits.ll b/llvm/test/Transforms/Inline/bitcast-knownbits.ll new file mode 100644 index 0000000000000..07c677126c039 --- /dev/null +++ b/llvm/test/Transforms/Inline/bitcast-knownbits.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=always-inline -S | FileCheck %s + +define <2 x i64> @vpx_lpf_horizontal_4_sse2(<2 x i64> %0) { +; CHECK-LABEL: define <2 x i64> @vpx_lpf_horizontal_4_sse2( +; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i16> [[TMP5]], splat (i16 1) +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %2 = call fastcc <2 x i64> @_mm_unpackhi_epi8(<2 x i64> %0) + %3 = call fastcc <2 x i64> @_mm_srai_epi16(<2 x i64> %2) + ret <2 x i64> %3 +} + +define fastcc <2 x i64> @_mm_unpackhi_epi8(<2 x i64> %0) #0 { +; CHECK-LABEL: define fastcc <2 x i64> @_mm_unpackhi_epi8( +; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %2 = bitcast <2 x i64> %0 to <16 x i8> + %3 = shufflevector <16 x i8> %2, <16 x i8> zeroinitializer, <16 x i32> + %4 = bitcast <16 x i8> %3 to <2 x i64> + ret <2 x i64> %4 +} + +define fastcc <2 x i64> @_mm_srai_epi16(<2 x i64> %0) #0 { +; CHECK-LABEL: define fastcc <2 x i64> @_mm_srai_epi16( +; CHECK-SAME: <2 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP2]], splat (i16 1) +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %2 = bitcast <2 x i64> %0 to <8 x i16> + %3 = ashr <8 x i16> %2, splat (i16 1) + ret <2 x i64> zeroinitializer +} + +attributes #0 = { alwaysinline } diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll index db56080a3ea2b..cc252ae53803b 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -3732,7 +3732,6 @@ define <4 x i64> @test_avx2_psrl_0() { ret <4 x i64> %16 } -; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds. define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) { ; CHECK-LABEL: @PR125228( ; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63) @@ -3741,7 +3740,8 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) { ; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8> ; CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> -; CHECK-NEXT: [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]] ; CHECK-NEXT: [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> ; CHECK-NEXT: ret <2 x i64> [[SHUFP_UNCASTED]] ; diff --git a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll index 3e47e775e3a28..65b43df752f76 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll @@ -12,8 +12,7 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg ; CHECK-NEXT: [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16> ; CHECK-NEXT: [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2) ; CHECK-NEXT: [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8> -; CHECK-NEXT: [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52) -; CHECK-NEXT: ret <16 x i8> [[AND7]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST6]] ; %and = and <16 x i8> %arg1, splat (i8 3) %and3 = and <16 x i8> %arg2, splat (i8 48) @@ -33,8 +32,7 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg) { ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16> ; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4) ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8> -; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16) -; CHECK-NEXT: ret <16 x i8> [[AND3]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST2]] ; %and = and <16 x i8> %arg, splat (i8 15) %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> @@ -53,8 +51,7 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg) { ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16> ; CHECK-NEXT: [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4) ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8> -; CHECK-NEXT: [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16) -; CHECK-NEXT: ret <16 x i8> [[AND3]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST2]] ; %and = and <16 x i8> %arg, splat (i8 15) %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> @@ -70,8 +67,7 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg) { ; CHECK-SAME: <8 x i16> [[ARG:%.*]]) { ; CHECK-NEXT: [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15) ; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8> -; CHECK-NEXT: [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1) -; CHECK-NEXT: ret <16 x i8> [[AND]] +; CHECK-NEXT: ret <16 x i8> [[BITCAST1]] ; %lshr = lshr <8 x i16> %arg, splat (i16 15) %bitcast1 = bitcast <8 x i16> %lshr to <16 x i8> @@ -88,7 +84,8 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i ; CHECK-NEXT: [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]]) ; CHECK-NEXT: [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32> ; CHECK-NEXT: [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]]) +; CHECK-NEXT: [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0 ; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1) @@ -110,11 +107,7 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> % ; CHECK-NEXT: [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8> ; CHECK-NEXT: [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]] ; CHECK-NEXT: [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]] -; CHECK-NEXT: [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16> -; CHECK-NEXT: [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8) -; CHECK-NEXT: [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8> -; CHECK-NEXT: [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]] -; CHECK-NEXT: ret <16 x i8> [[ADD13]] +; CHECK-NEXT: ret <16 x i8> [[ADD14]] ; %shl6 = shl <8 x i16> %arg2, splat (i16 8) %bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8>