From 1ae4ddc06ca571c0630016e117eae4e00ae88e6f Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 30 Jun 2025 14:40:23 +0100 Subject: [PATCH 1/4] Precommit tests --- .../Transforms/InstCombine/vector-reverse.ll | 145 ++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll index c9c68d2241b34..d1d8c259e7c8e 100644 --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -17,6 +17,19 @@ define @binop_reverse( %a, %add } +define @binop_intrinsic_reverse( %a, %b) { +; CHECK-LABEL: @binop_intrinsic_reverse( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[A:%.*]]) +; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[B:%.*]]) +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.smax.nxv4i32( [[A_REV]], [[B_REV]]) +; CHECK-NEXT: ret [[ADD]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4i32( %b) + %smax = call @llvm.smax( %a.rev, %b.rev) + ret %smax +} + ; %a.rev has multiple uses define @binop_reverse_1( %a, %b) { ; CHECK-LABEL: @binop_reverse_1( @@ -33,6 +46,22 @@ define @binop_reverse_1( %a, %add } +; %a.rev has multiple uses +define @binop_intrinsic_reverse_1( %a, %b) { +; CHECK-LABEL: @binop_intrinsic_reverse_1( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[A:%.*]]) +; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[B:%.*]]) +; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) +; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.smax.nxv4i32( [[A_REV]], [[B_REV]]) +; CHECK-NEXT: ret [[SMAX]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4i32( %b) + call void @use_nxv4i32( %a.rev) + %smax = call @llvm.smax( %a.rev, %b.rev) + ret %smax +} + ; %b.rev has multiple uses define @binop_reverse_2( %a, %b) { ; CHECK-LABEL: @binop_reverse_2( @@ -67,6 +96,24 @@ define @binop_reverse_3( %a, %add } +; %a.rev and %b.rev have multiple uses +define @binop_intrinsic_reverse_3( %a, %b) { +; CHECK-LABEL: @binop_intrinsic_reverse_3( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[A:%.*]]) +; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[B:%.*]]) +; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) +; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) +; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.smax.nxv4i32( [[A_REV]], [[B_REV]]) +; CHECK-NEXT: ret [[SMAX]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4i32( %b) + call void @use_nxv4i32( %a.rev) + call void @use_nxv4i32( %b.rev) + %smax = call @llvm.smax( %a.rev, %b.rev) + ret %smax +} + ; %a.rev used as both operands define @binop_reverse_4( %a) { ; CHECK-LABEL: @binop_reverse_4( @@ -184,6 +231,17 @@ define @unop_reverse_1( %a) { ret %neg } +define @unop_intrinsic_reverse( %a) { +; CHECK-LABEL: @unop_intrinsic_reverse( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) +; CHECK-NEXT: [[NEG:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV]]) +; CHECK-NEXT: ret [[NEG]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %abs = call @llvm.fabs( %a.rev) + ret %abs +} + define @icmp_reverse( %a, %b) { ; CHECK-LABEL: @icmp_reverse( ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq [[A:%.*]], [[B:%.*]] @@ -629,6 +687,21 @@ define @reverse_binop_reverse( %a, %add.rev } +define @reverse_binop_intrinsic_reverse( %a, %b) { +; CHECK-LABEL: @reverse_binop_intrinsic_reverse( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) +; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[B:%.*]]) +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV]], [[B_REV]]) +; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[ADD]]) +; CHECK-NEXT: ret [[MAXNUM_REV]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %b.rev = tail call @llvm.vector.reverse.nxv4f32( %b) + %maxnum = call @llvm.maxnum.nxv4f32( %a.rev, %b.rev) + %maxnum.rev = tail call @llvm.vector.reverse.nxv4f32( %maxnum) + ret %maxnum.rev +} + define @reverse_binop_reverse_splat_RHS( %a, float %b) { ; CHECK-LABEL: @reverse_binop_reverse_splat_RHS( ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 @@ -659,6 +732,53 @@ define @reverse_binop_reverse_splat_LHS( %div.rev } +define @reverse_binop_reverse_intrinsic_splat_RHS( %a, float %b) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_RHS( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) +; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV]], [[B_SPLAT]]) +; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[MAXNUM]]) +; CHECK-NEXT: ret [[MAXNUM_REV]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %b.insert = insertelement poison, float %b, i32 0 + %b.splat = shufflevector %b.insert, poison, zeroinitializer + %maxnum = call @llvm.maxnum.nxv4f32( %a.rev, %b.splat) + %maxnum.rev = tail call @llvm.vector.reverse.nxv4f32( %maxnum) + ret %maxnum.rev +} + +define @reverse_binop_reverse_intrinsic_splat_LHS( %a, float %b) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_LHS( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) +; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[B_SPLAT]], [[A_REV]]) +; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[MAXNUM]]) +; CHECK-NEXT: ret [[MAXNUM_REV]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %b.insert = insertelement poison, float %b, i32 0 + %b.splat = shufflevector %b.insert, poison, zeroinitializer + %maxnum = call @llvm.maxnum.nxv4f32( %b.splat, %a.rev) + %maxnum.rev = tail call @llvm.vector.reverse.nxv4f32( %maxnum) + ret %maxnum.rev +} + +define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS( +; CHECK-NEXT: [[MAXNUM_REV1:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM1:%.*]]) +; CHECK-NEXT: [[MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[MAXNUM_REV1]], <4 x float> ) +; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]]) +; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]] +; + %a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a) + %maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> , <4 x float> %a.rev) + %maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum) + ret <4 x float> %maxnum.rev +} + define @reverse_fcmp_reverse( %a, %b) { ; CHECK-LABEL: @reverse_fcmp_reverse( ; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt [[A:%.*]], [[B:%.*]] @@ -695,6 +815,31 @@ define @reverse_unop_reverse( %a) { ret %neg.rev } +define @reverse_unop_intrinsic_reverse( %a) { +; CHECK-LABEL: @reverse_unop_intrinsic_reverse( +; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV]]) +; CHECK-NEXT: [[ABS_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[ABS]]) +; CHECK-NEXT: ret [[ABS_REV]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %abs = call @llvm.fabs( %a.rev) + %abs.rev = tail call @llvm.vector.reverse.nxv4f32( %abs) + ret %abs.rev +} + +define @reverse_unop_intrinsic_reverse_scalar_arg( %a, i32 %power) { +; CHECK-LABEL: @reverse_unop_intrinsic_reverse_scalar_arg( +; CHECK-NEXT: [[A:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A1:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.powi.nxv4f32.i32( [[A]], i32 [[POWER:%.*]]) +; CHECK-NEXT: [[POWI_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[TMP1]]) +; CHECK-NEXT: ret [[POWI_REV]] +; + %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) + %powi = call @llvm.powi.nxv4f32( %a.rev, i32 %power) + %powi.rev = tail call @llvm.vector.reverse.nxv4f32( %powi) + ret %powi.rev +} declare void @use_nxv4i1() declare void @use_nxv4i32() From e9ab4347982f813989237353d3c0b4bcf28cb455 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 30 Jun 2025 17:27:41 +0100 Subject: [PATCH 2/4] [InstCombine] Pull vector reverse through intrinsics This is the intrinsic version of #146349, and handles fabs as well as other intrinsics. It's largely a copy of InstCombinerImpl::foldShuffledIntrinsicOperands but a bit simpler since we don't need to find a common mask. Creating a separate function seems to be cleaner than trying to shoehorn it into the existing one. --- .../InstCombine/InstCombineCalls.cpp | 42 +++++++++++++++ .../InstCombine/InstCombineInternal.h | 1 + .../Transforms/InstCombine/vector-reverse.ll | 54 ++++++++----------- 3 files changed, 64 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e33d111167c04..9d07a4531d0a3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1456,6 +1456,45 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) { return new ShuffleVectorInst(NewIntrinsic, Mask); } +/// If all arguments of the intrinsic are reverses, try to pull the reverse +/// after the intrinsic. +Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) { + if (!isTriviallyVectorizable(II->getIntrinsicID()) || + !II->getCalledFunction()->isSpeculatable()) + return nullptr; + + // At least 1 operand must be a reverse with 1 use because we are creating 2 + // instructions. + if (none_of(II->args(), [](Value *V) { + return match(V, m_OneUse(m_VecReverse(m_Value()))); + })) + return nullptr; + + Value *X; + Constant *C; + SmallVector NewArgs; + for (Use &Arg : II->args()) { + if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), + Arg.getOperandNo(), nullptr)) + NewArgs.push_back(Arg); + else if (match(&Arg, m_VecReverse(m_Value(X)))) + NewArgs.push_back(X); + else if (Value *Splat = getSplatValue(Arg)) + NewArgs.push_back(Builder.CreateVectorSplat( + cast(Arg->getType())->getElementCount(), Splat)); + else if (match(&Arg, m_ImmConstant(C))) + NewArgs.push_back(Builder.CreateVectorReverse(C)); + else + return nullptr; + } + + // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...) + Instruction *FPI = isa(II) ? II : nullptr; + Instruction *NewIntrinsic = Builder.CreateIntrinsic( + II->getType(), II->getIntrinsicID(), NewArgs, FPI); + return Builder.CreateVectorReverse(NewIntrinsic); +} + /// Fold the following cases and accepts bswap and bitreverse intrinsics: /// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y)) /// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse) @@ -3867,6 +3906,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *Shuf = foldShuffledIntrinsicOperands(II)) return Shuf; + if (Value *Reverse = foldReversedIntrinsicOperands(II)) + return replaceInstUsesWith(*II, Reverse); + // Some intrinsics (like experimental_gc_statepoint) can be used in invoke // context, so it is handled in visitCallBase and we should trigger it. return visitCallBase(*II); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 8c9de862fe8f2..a8645521fe053 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *foldItoFPtoI(CastInst &FI); Instruction *visitSelectInst(SelectInst &SI); Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II); + Value *foldReversedIntrinsicOperands(IntrinsicInst *II); Instruction *visitCallInst(CallInst &CI); Instruction *visitInvokeInst(InvokeInst &II); Instruction *visitCallBrInst(CallBrInst &CBI); diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll index d1d8c259e7c8e..cbf6b37692cac 100644 --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -19,10 +19,9 @@ define @binop_reverse( %a, @binop_intrinsic_reverse( %a, %b) { ; CHECK-LABEL: @binop_intrinsic_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[B:%.*]]) -; CHECK-NEXT: [[ADD:%.*]] = call @llvm.smax.nxv4i32( [[A_REV]], [[B_REV]]) -; CHECK-NEXT: ret [[ADD]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.smax.nxv4i32( [[A_REV:%.*]], [[B_REV:%.*]]) +; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.vector.reverse.nxv4i32( [[ADD]]) +; CHECK-NEXT: ret [[SMAX]] ; %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) %b.rev = tail call @llvm.vector.reverse.nxv4i32( %b) @@ -49,10 +48,10 @@ define @binop_reverse_1( %a, @binop_intrinsic_reverse_1( %a, %b) { ; CHECK-LABEL: @binop_intrinsic_reverse_1( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[A:%.*]]) ; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4i32( [[B:%.*]]) -; CHECK-NEXT: call void @use_nxv4i32( [[A_REV]]) -; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.smax.nxv4i32( [[A_REV]], [[B_REV]]) +; CHECK-NEXT: call void @use_nxv4i32( [[B_REV]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.smax.nxv4i32( [[B]], [[B1:%.*]]) +; CHECK-NEXT: [[SMAX:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP1]]) ; CHECK-NEXT: ret [[SMAX]] ; %a.rev = tail call @llvm.vector.reverse.nxv4i32( %a) @@ -233,9 +232,9 @@ define @unop_reverse_1( %a) { define @unop_intrinsic_reverse( %a) { ; CHECK-LABEL: @unop_intrinsic_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) -; CHECK-NEXT: [[NEG:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV]]) -; CHECK-NEXT: ret [[NEG]] +; CHECK-NEXT: [[NEG:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = call @llvm.vector.reverse.nxv4f32( [[NEG]]) +; CHECK-NEXT: ret [[ABS]] ; %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) %abs = call @llvm.fabs( %a.rev) @@ -689,11 +688,8 @@ define @reverse_binop_reverse( %a, @reverse_binop_intrinsic_reverse( %a, %b) { ; CHECK-LABEL: @reverse_binop_intrinsic_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[B:%.*]]) -; CHECK-NEXT: [[ADD:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV]], [[B_REV]]) -; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[ADD]]) -; CHECK-NEXT: ret [[MAXNUM_REV]] +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV:%.*]], [[B_REV:%.*]]) +; CHECK-NEXT: ret [[ADD]] ; %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) %b.rev = tail call @llvm.vector.reverse.nxv4f32( %b) @@ -734,12 +730,10 @@ define @reverse_binop_reverse_splat_LHS( @reverse_binop_reverse_intrinsic_splat_RHS( %a, float %b) { ; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_RHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV]], [[B_SPLAT]]) -; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[MAXNUM]]) -; CHECK-NEXT: ret [[MAXNUM_REV]] +; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[A_REV:%.*]], [[B_SPLAT]]) +; CHECK-NEXT: ret [[MAXNUM]] ; %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) %b.insert = insertelement poison, float %b, i32 0 @@ -751,12 +745,10 @@ define @reverse_binop_reverse_intrinsic_splat_RHS( @reverse_binop_reverse_intrinsic_splat_LHS( %a, float %b) { ; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_LHS( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) ; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement poison, float [[B:%.*]], i64 0 ; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector [[B_INSERT]], poison, zeroinitializer -; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[B_SPLAT]], [[A_REV]]) -; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[MAXNUM]]) -; CHECK-NEXT: ret [[MAXNUM_REV]] +; CHECK-NEXT: [[MAXNUM:%.*]] = call @llvm.maxnum.nxv4f32( [[B_SPLAT]], [[A_REV:%.*]]) +; CHECK-NEXT: ret [[MAXNUM]] ; %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) %b.insert = insertelement poison, float %b, i32 0 @@ -768,8 +760,8 @@ define @reverse_binop_reverse_intrinsic_splat_LHS( @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) { ; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS( -; CHECK-NEXT: [[MAXNUM_REV1:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM1:%.*]]) -; CHECK-NEXT: [[MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[MAXNUM_REV1]], <4 x float> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> ) +; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]]) ; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]] ; @@ -817,10 +809,8 @@ define @reverse_unop_reverse( %a) { define @reverse_unop_intrinsic_reverse( %a) { ; CHECK-LABEL: @reverse_unop_intrinsic_reverse( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV]]) -; CHECK-NEXT: [[ABS_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[ABS]]) -; CHECK-NEXT: ret [[ABS_REV]] +; CHECK-NEXT: [[ABS:%.*]] = call @llvm.fabs.nxv4f32( [[A_REV:%.*]]) +; CHECK-NEXT: ret [[ABS]] ; %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) %abs = call @llvm.fabs( %a.rev) @@ -830,10 +820,8 @@ define @reverse_unop_intrinsic_reverse( define @reverse_unop_intrinsic_reverse_scalar_arg( %a, i32 %power) { ; CHECK-LABEL: @reverse_unop_intrinsic_reverse_scalar_arg( -; CHECK-NEXT: [[A:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[A1:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.powi.nxv4f32.i32( [[A]], i32 [[POWER:%.*]]) -; CHECK-NEXT: [[POWI_REV:%.*]] = tail call @llvm.vector.reverse.nxv4f32( [[TMP1]]) -; CHECK-NEXT: ret [[POWI_REV]] +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.powi.nxv4f32.i32( [[A:%.*]], i32 [[POWER:%.*]]) +; CHECK-NEXT: ret [[TMP1]] ; %a.rev = tail call @llvm.vector.reverse.nxv4f32( %a) %powi = call @llvm.powi.nxv4f32( %a.rev, i32 %power) From 0b093262272dc809fee821116241b07265e47320 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 1 Jul 2025 10:59:58 +0100 Subject: [PATCH 3/4] Remove isSpeculatable check --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9d07a4531d0a3..7dc7244c3e8aa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1459,8 +1459,7 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) { /// If all arguments of the intrinsic are reverses, try to pull the reverse /// after the intrinsic. Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) { - if (!isTriviallyVectorizable(II->getIntrinsicID()) || - !II->getCalledFunction()->isSpeculatable()) + if (!isTriviallyVectorizable(II->getIntrinsicID())) return nullptr; // At least 1 operand must be a reverse with 1 use because we are creating 2 From e0acdf38c5c3dcc021fe1a6ffe34115fb02bc321 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 1 Jul 2025 11:28:15 +0100 Subject: [PATCH 4/4] Don't create new splats, use isSplatValue --- .../Transforms/InstCombine/InstCombineCalls.cpp | 5 ++--- llvm/test/Transforms/InstCombine/vector-reverse.ll | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7dc7244c3e8aa..fa2a6758e98df 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1478,9 +1478,8 @@ Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) { NewArgs.push_back(Arg); else if (match(&Arg, m_VecReverse(m_Value(X)))) NewArgs.push_back(X); - else if (Value *Splat = getSplatValue(Arg)) - NewArgs.push_back(Builder.CreateVectorSplat( - cast(Arg->getType())->getElementCount(), Splat)); + else if (isSplatValue(Arg)) + NewArgs.push_back(Arg); else if (match(&Arg, m_ImmConstant(C))) NewArgs.push_back(Builder.CreateVectorReverse(C)); else diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll index cbf6b37692cac..9eb941d7b1c84 100644 --- a/llvm/test/Transforms/InstCombine/vector-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll @@ -758,6 +758,20 @@ define @reverse_binop_reverse_intrinsic_splat_LHS( %maxnum.rev } +; Negative test: Make sure that splats with poison aren't considered splats +define <4 x float> @reverse_binop_reverse_intrinsic_splat_with_poison(<4 x float> %a) { +; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_with_poison( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> ) +; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]]) +; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]] +; + %a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a) + %maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> , <4 x float> %a.rev) + %maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum) + ret <4 x float> %maxnum.rev +} + define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) { ; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS( ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> )