diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e33d111167c04..fa2a6758e98df 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1456,6 +1456,43 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) { return new ShuffleVectorInst(NewIntrinsic, Mask); } +/// If all arguments of the intrinsic are reverses, try to pull the reverse +/// after the intrinsic. +Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) { + if (!isTriviallyVectorizable(II->getIntrinsicID())) + return nullptr; + + // At least 1 operand must be a reverse with 1 use because we are creating 2 + // instructions. + if (none_of(II->args(), [](Value *V) { + return match(V, m_OneUse(m_VecReverse(m_Value()))); + })) + return nullptr; + + Value *X; + Constant *C; + SmallVector NewArgs; + for (Use &Arg : II->args()) { + if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), + Arg.getOperandNo(), nullptr)) + NewArgs.push_back(Arg); + else if (match(&Arg, m_VecReverse(m_Value(X)))) + NewArgs.push_back(X); + else if (isSplatValue(Arg)) + NewArgs.push_back(Arg); + else if (match(&Arg, m_ImmConstant(C))) + NewArgs.push_back(Builder.CreateVectorReverse(C)); + else + return nullptr; + } + + // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...) + Instruction *FPI = isa(II) ? II : nullptr; + Instruction *NewIntrinsic = Builder.CreateIntrinsic( + II->getType(), II->getIntrinsicID(), NewArgs, FPI); + return Builder.CreateVectorReverse(NewIntrinsic); +} + /// Fold the following cases and accepts bswap and bitreverse intrinsics: /// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y)) /// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse) @@ -3867,6 +3904,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *Shuf = foldShuffledIntrinsicOperands(II)) return Shuf; + if (Value *Reverse = foldReversedIntrinsicOperands(II)) + return replaceInstUsesWith(*II, Reverse); + // Some intrinsics (like experimental_gc_statepoint) can be used in invoke // context, so it is handled in visitCallBase and we should trigger it. return visitCallBase(*II); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 8c9de862fe8f2..a8645521fe053 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *foldItoFPtoI(CastInst &FI); Instruction *visitSelectInst(SelectInst &SI); Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II); + Value *foldReversedIntrinsicOperands(IntrinsicInst *II); Instruction *visitCallInst(CallInst &CI); Instruction *visitInvokeInst(InvokeInst &II); Instruction *visitCallBrInst(CallBrInst &CBI); diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll index c9c68d2241b34..9eb941d7b1c84 100644 --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -17,6 +17,18 @@ define @binop_reverse( %a, %add } +define @binop_intrinsic_reverse( %a, %b) { +; CHECK-LABEL: @binop_intrinsic_reverse( +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.smax.nxv4i32( [[A_REV:%.*]], [[B_REV:%.*]]) +; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.vector.reverse.nxv4i32( [[ADD]]) +; CHECK-NEXT: ret [[SMAX]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4i32( %b) + %smax = call @llvm.smax( %a.rev, %b.rev) + ret %smax +} + ; %a.rev has multiple uses define @binop_reverse_1( %a, %b) { ; CHECK-LABEL: @binop_reverse_1( @@ -33,6 +45,22 @@ define @binop_reverse_1( %a, %add } +; %a.rev has multiple uses +define @binop_intrinsic_reverse_1( %a, %b) { +; CHECK-LABEL: @binop_intrinsic_reverse_1( +; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[B:%.*]]) +; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.smax.nxv4i32( [[B]], [[B1:%.*]]) +; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP1]]) +; CHECK-NEXT: ret [[SMAX]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4i32( %b) + call void @use_nxv4i32( %a.rev) + %smax = call @llvm.smax( %a.rev, %b.rev) + ret %smax +} + ; %b.rev has multiple uses define @binop_reverse_2( %a, %b) { ; CHECK-LABEL: @binop_reverse_2( @@ -67,6 +95,24 @@ define @binop_reverse_3( %a, %add } +; %a.rev and %b.rev have multiple uses +define @binop_intrinsic_reverse_3( %a, %b) { +; CHECK-LABEL: @binop_intrinsic_reverse_3( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[A:%.*]]) +; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[B:%.*]]) +; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) +; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) +; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.smax.nxv4i32( [[A_REV]], [[B_REV]]) +; CHECK-NEXT: ret [[SMAX]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4i32( %b) + call void @use_nxv4i32( %a.rev) + call void @use_nxv4i32( %b.rev) + %smax = call @llvm.smax( %a.rev, %b.rev) + ret %smax +} + ; %a.rev used as both operands define @binop_reverse_4( %a) { ; CHECK-LABEL: @binop_reverse_4( @@ -184,6 +230,17 @@ define @unop_reverse_1( %a) { ret %neg } +define @unop_intrinsic_reverse( %a) { +; CHECK-LABEL: @unop_intrinsic_reverse( +; CHECK-NEXT: [[NEG:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call @llvm.vector.reverse.nxv4f32( [[NEG]]) +; CHECK-NEXT: ret [[ABS]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %abs = call @llvm.fabs( %a.rev) + ret %abs +} + define @icmp_reverse( %a, %b) { ; CHECK-LABEL: @icmp_reverse( ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A:%.*]], [[B:%.*]] @@ -629,6 +686,18 @@ define @reverse_binop_reverse( %a, %add.rev } +define @reverse_binop_intrinsic_reverse( %a, %b) { +; CHECK-LABEL: @reverse_binop_intrinsic_reverse( +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV:%.*]], [[B_REV:%.*]]) +; CHECK-NEXT: ret [[ADD]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4f32( %b) + %maxnum = call @llvm.maxnum.nxv4f32( %a.rev, %b.rev) + %maxnum.rev = tail call @llvm.vector.reverse.nxv4f32( %maxnum) + ret %maxnum.rev +} + define @reverse_binop_reverse_splat_RHS( %a, float %b) { ; CHECK-LABEL: @reverse_binop_reverse_splat_RHS( ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 @@ -659,6 +728,63 @@ define @reverse_binop_reverse_splat_LHS( %div.rev } +define @reverse_binop_reverse_intrinsic_splat_RHS( %a, float %b) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_RHS( +; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV:%.*]], [[B_SPLAT]]) +; CHECK-NEXT: ret [[MAXNUM]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %b.insert = insertelement poison, float %b, i32 0 + %b.splat = shufflevector %b.insert, poison, zeroinitializer + %maxnum = call @llvm.maxnum.nxv4f32( %a.rev, %b.splat) + %maxnum.rev = tail call @llvm.vector.reverse.nxv4f32( %maxnum) + ret %maxnum.rev +} + +define @reverse_binop_reverse_intrinsic_splat_LHS( %a, float %b) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_LHS( +; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[B_SPLAT]], [[A_REV:%.*]]) +; CHECK-NEXT: ret [[MAXNUM]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %b.insert = insertelement poison, float %b, i32 0 + %b.splat = shufflevector %b.insert, poison, zeroinitializer + %maxnum = call @llvm.maxnum.nxv4f32( %b.splat, %a.rev) + %maxnum.rev = tail call @llvm.vector.reverse.nxv4f32( %maxnum) + ret %maxnum.rev +} + +; Negative test: Make sure that splats with poison aren't considered splats +define <4 x float> @reverse_binop_reverse_intrinsic_splat_with_poison(<4 x float> %a) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_with_poison( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> ) +; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]]) +; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]] +; + %a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a) + %maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> , <4 x float> %a.rev) + %maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum) + ret <4 x float> %maxnum.rev +} + +define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> ) +; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]]) +; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]] +; + %a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a) + %maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> , <4 x float> %a.rev) + %maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum) + ret <4 x float> %maxnum.rev +} + define @reverse_fcmp_reverse( %a, %b) { ; CHECK-LABEL: @reverse_fcmp_reverse( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt [[A:%.*]], [[B:%.*]] @@ -695,6 +821,27 @@ define @reverse_unop_reverse( %a) { ret %neg.rev } +define @reverse_unop_intrinsic_reverse( %a) { +; CHECK-LABEL: @reverse_unop_intrinsic_reverse( +; CHECK-NEXT: [[ABS:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV:%.*]]) +; CHECK-NEXT: ret [[ABS]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %abs = call @llvm.fabs( %a.rev) + %abs.rev = tail call @llvm.vector.reverse.nxv4f32( %abs) + ret %abs.rev +} + +define @reverse_unop_intrinsic_reverse_scalar_arg( %a, i32 %power) { +; CHECK-LABEL: @reverse_unop_intrinsic_reverse_scalar_arg( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.powi.nxv4f32.i32( [[A:%.*]], i32 [[POWER:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %powi = call @llvm.powi.nxv4f32( %a.rev, i32 %power) + %powi.rev = tail call @llvm.vector.reverse.nxv4f32( %powi) + ret %powi.rev +} declare void @use_nxv4i1() declare void @use_nxv4i32()