diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 95479373b4393..f948eb0e4c824 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7764,8 +7764,9 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, (CM.foldTailByMasking() || !GEP || !GEP->isInBounds()) ? GEPNoWrapFlags::none() : GEPNoWrapFlags::inBounds(); - VectorPtr = new VPVectorEndPointerRecipe( - Ptr, &Plan.getVF(), getLoadStoreType(I), Flags, I->getDebugLoc()); + VectorPtr = + new VPVectorEndPointerRecipe(Ptr, &Plan.getVF(), getLoadStoreType(I), + /*Stride*/ -1, Flags, I->getDebugLoc()); } else { VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), GEP ? GEP->getNoWrapFlags() diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 61b5ccd85bc6e..b7ec259ff24f6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1706,17 +1706,22 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags { /// A recipe to compute a pointer to the last element of each part of a widened /// memory access for widened memory accesses of IndexedTy. Used for -/// VPWidenMemoryRecipes that are reversed. +/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<2> { Type *IndexedTy; + /// The constant stride of the pointer computed by this recipe. + int64_t Stride; + public: VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, - GEPNoWrapFlags GEPFlags, DebugLoc DL) + int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL) : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC, ArrayRef({Ptr, VF}), GEPFlags, DL), - IndexedTy(IndexedTy) {} + IndexedTy(IndexedTy), Stride(Stride) { + assert(Stride != 0 && "Stride cannot be zero"); + } VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC) @@ -1748,7 +1753,8 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags, VPVectorEndPointerRecipe *clone() override { return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy, - getGEPNoWrapFlags(), getDebugLoc()); + Stride, getGEPNoWrapFlags(), + getDebugLoc()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 472b5700bd358..db9a282493ee2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2326,12 +2326,12 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, +static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, unsigned CurrentPart, IRBuilderBase &Builder) { // Use i32 for the gep index type when the value is constant, // or query DataLayout for a more suitable index type otherwise. const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - return IsScalable && (IsReverse || CurrentPart > 0) + return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0)) ? DL.getIndexType(Builder.getPtrTy(0)) : Builder.getInt32Ty(); } @@ -2339,18 +2339,21 @@ static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); + bool IsUnitStride = Stride == 1 || Stride == -1; Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true, - CurrentPart, Builder); + IsUnitStride, CurrentPart, Builder); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); if (IndexTy != RunTimeVF->getType()) RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); - // NumElt = -CurrentPart * RunTimeVF + // NumElt = Stride * CurrentPart * RunTimeVF Value *NumElt = Builder.CreateMul( - ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + ConstantInt::get(IndexTy, Stride * (int64_t)CurrentPart), RunTimeVF); + // LastLane = Stride * (RunTimeVF - 1) + Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1)); + if (Stride != 1) + LastLane = Builder.CreateMul(ConstantInt::get(IndexTy, Stride), LastLane); Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags()); @@ -2375,7 +2378,7 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, - CurrentPart, Builder); + /*IsUnitStride*/ true, CurrentPart, Builder); Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); @@ -3341,25 +3344,6 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { if (auto *I = dyn_cast(ResAddr)) State.setDebugLocFrom(I->getDebugLoc()); - // If the group is reverse, adjust the index to refer to the last vector lane - // instead of the first. We adjust the index from the first vector lane, - // rather than directly getting the pointer for lane VF - 1, because the - // pointer operand of the interleaved access is supposed to be uniform. - if (Group->isReverse()) { - Value *RuntimeVF = - getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); - Value *Index = - State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); - Index = State.Builder.CreateMul(Index, - State.Builder.getInt32(Group->getFactor())); - Index = State.Builder.CreateNeg(Index); - - bool InBounds = false; - if (auto *Gep = dyn_cast(ResAddr->stripPointerCasts())) - InBounds = Gep->isInBounds(); - ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds); - } - State.setDebugLocFrom(getDebugLoc()); Value *PoisonVec = PoisonValue::get(VecTy); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7298b24fb559f..3410031bb67ab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2482,23 +2482,23 @@ void VPlanTransforms::createInterleaveGroups( auto *InsertPos = cast(RecipeBuilder.getRecipe(IRInsertPos)); + bool InBounds = false; + if (auto *Gep = dyn_cast( + getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts())) + InBounds = Gep->isInBounds(); + // Get or create the start address for the interleave group. auto *Start = cast(RecipeBuilder.getRecipe(IG->getMember(0))); VPValue *Addr = Start->getAddr(); VPRecipeBase *AddrDef = Addr->getDefiningRecipe(); if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) { - // TODO: Hoist Addr's defining recipe (and any operands as needed) to - // InsertPos or sink loads above zero members to join it. - bool InBounds = false; - if (auto *Gep = dyn_cast( - getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts())) - InBounds = Gep->isInBounds(); - // We cannot re-use the address of member zero because it does not // dominate the insert position. Instead, use the address of the insert // position and create a PtrAdd adjusting it to the address of member // zero. + // TODO: Hoist Addr's defining recipe (and any operands as needed) to + // InsertPos or sink loads above zero members to join it. assert(IG->getIndex(IRInsertPos) != 0 && "index of insert position shouldn't be zero"); auto &DL = IRInsertPos->getDataLayout(); @@ -2512,6 +2512,21 @@ void VPlanTransforms::createInterleaveGroups( Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV) : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV); } + // If the group is reverse, adjust the index to refer to the last vector + // lane instead of the first. We adjust the index from the first vector + // lane, rather than directly getting the pointer for lane VF - 1, because + // the pointer operand of the interleaved access is supposed to be uniform. + if (IG->isReverse()) { + auto *GEP = dyn_cast( + getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()); + auto *ReversePtr = new VPVectorEndPointerRecipe( + Addr, &Plan.getVF(), getLoadStoreType(IRInsertPos), + -(int64_t)IG->getFactor(), + InBounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(), + InsertPos->getDebugLoc()); + ReversePtr->insertBefore(InsertPos); + Addr = ReversePtr; + } auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps, InsertPos->getDebugLoc()); VPIG->insertBefore(InsertPos); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 8c2958769a615..b349c55d3e09a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -367,10 +367,8 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = sub nsw i32 2, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i64 2, [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) @@ -381,10 +379,8 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = sub nsw [[REVERSE1]], [[VEC_IND]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP15]], 3 -; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i32 2, [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 2, [[TMP15]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP18]] ; CHECK-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP12]]) ; CHECK-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP13]]) @@ -1577,10 +1573,8 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_XYZT:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i32 [[TMP6]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i32 4, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = sub nsw i64 4, [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 [[TMP9]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC]]) @@ -1597,10 +1591,8 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw [[REVERSE4]], [[VEC_IND]] ; CHECK-NEXT: [[TMP20:%.*]] = shl nuw nsw [[REVERSE5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_XYZT]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 4 -; CHECK-NEXT: [[TMP24:%.*]] = sub nsw i32 4, [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = sext i32 [[TMP24]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP25:%.*]] = sub nsw i64 4, [[TMP22]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP25]] ; CHECK-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP17]]) ; CHECK-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP18]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll index 09b274de30214..29b27cdb7556d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll @@ -40,7 +40,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] ; RV64-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP5]] +; RV64-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP22]] ; RV64-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP10]] ; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP11]] ; RV64-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 4 @@ -48,7 +49,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[TMP14:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] ; RV64-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP5]] +; RV64-NEXT: [[TMP23:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] ; RV64-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP16]] ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP17]] ; RV64-NEXT: [[REVERSE1:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP14]]) @@ -98,7 +100,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] ; RV32-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 ; RV32-NEXT: [[TMP11:%.*]] = mul i32 0, [[TMP10]] -; RV32-NEXT: [[TMP12:%.*]] = sub i32 1, [[TMP10]] +; RV32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP10]], 1 +; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP24]] ; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 [[TMP11]] ; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 [[TMP12]] ; RV32-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 @@ -107,7 +110,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] ; RV32-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP5]] to i32 ; RV32-NEXT: [[TMP18:%.*]] = mul i32 0, [[TMP17]] -; RV32-NEXT: [[TMP19:%.*]] = sub i32 1, [[TMP17]] +; RV32-NEXT: [[TMP25:%.*]] = sub i32 [[TMP17]], 1 +; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP25]] ; RV32-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 [[TMP18]] ; RV32-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP19]] ; RV32-NEXT: [[REVERSE1:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP15]]) @@ -157,11 +161,13 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP9]] ; RV64-UF2-NEXT: [[TMP11:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP12:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP32]] ; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP11]] ; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] ; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP16:%.*]] = mul i64 -1, [[TMP33]] ; RV64-UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP15]] ; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]] ; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 @@ -172,11 +178,13 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[TMP20:%.*]] = add [[REVERSE2]], splat (i32 1) ; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] ; RV64-UF2-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP34:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP34]] ; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP22]] ; RV64-UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]] ; RV64-UF2-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP35:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP35]] ; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP26]] ; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[TMP27]] ; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP19]]) @@ -246,7 +254,8 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] ; RV64-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP5]] +; RV64-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP22]] ; RV64-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[TMP10]] ; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP11]] ; RV64-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP13]], align 4 @@ -254,7 +263,8 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[TMP14:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; RV64-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP5]] +; RV64-NEXT: [[TMP23:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] ; RV64-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[TMP16]] ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[TMP17]] ; RV64-NEXT: [[REVERSE1:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP14]]) @@ -304,7 +314,8 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] ; RV32-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP5]] to i32 ; RV32-NEXT: [[TMP11:%.*]] = mul i32 0, [[TMP10]] -; RV32-NEXT: [[TMP12:%.*]] = sub i32 1, [[TMP10]] +; RV32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP10]], 1 +; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP24]] ; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 [[TMP11]] ; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP12]] ; RV32-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 @@ -313,7 +324,8 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; RV32-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP5]] to i32 ; RV32-NEXT: [[TMP18:%.*]] = mul i32 0, [[TMP17]] -; RV32-NEXT: [[TMP19:%.*]] = sub i32 1, [[TMP17]] +; RV32-NEXT: [[TMP25:%.*]] = sub i32 [[TMP17]], 1 +; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP25]] ; RV32-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP18]] ; RV32-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP19]] ; RV32-NEXT: [[REVERSE1:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP15]]) @@ -363,11 +375,13 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] ; RV64-UF2-NEXT: [[TMP11:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP12:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP32]] ; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP11]] ; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP12]] ; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP16:%.*]] = mul i64 -1, [[TMP33]] ; RV64-UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP15]] ; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP16]] ; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 @@ -378,11 +392,13 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[TMP20:%.*]] = fadd [[REVERSE2]], splat (float 1.000000e+00) ; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] ; RV64-UF2-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP34:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP34]] ; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[TMP22]] ; RV64-UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP23]] ; RV64-UF2-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP35:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP35]] ; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[TMP26]] ; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP27]] ; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP19]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index dd8b7d6ea7e42..b4e49a60e0887 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -334,22 +334,24 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %22 = zext i32 %21 to i64 ; CHECK-NEXT: %23 = getelementptr inbounds i32, ptr %B, i64 %22 ; CHECK-NEXT: %24 = mul i64 0, %18 -; CHECK-NEXT: %25 = sub i64 1, %18 -; CHECK-NEXT: %26 = getelementptr inbounds i32, ptr %23, i64 %24 -; CHECK-NEXT: %27 = getelementptr inbounds i32, ptr %26, i64 %25 -; CHECK-NEXT: %wide.load = load , ptr %27, align 4 +; CHECK-NEXT: %25 = sub i64 %18, 1 +; CHECK-NEXT: %26 = mul i64 -1, %25 +; CHECK-NEXT: %27 = getelementptr inbounds i32, ptr %23, i64 %24 +; CHECK-NEXT: %28 = getelementptr inbounds i32, ptr %27, i64 %26 +; CHECK-NEXT: %wide.load = load , ptr %28, align 4 ; CHECK-NEXT: %reverse = call @llvm.vector.reverse.nxv4i32( %wide.load) -; CHECK-NEXT: %28 = add %reverse, splat (i32 1) -; CHECK-NEXT: %29 = getelementptr inbounds i32, ptr %A, i64 %22 -; CHECK-NEXT: %30 = mul i64 0, %18 -; CHECK-NEXT: %31 = sub i64 1, %18 -; CHECK-NEXT: %32 = getelementptr inbounds i32, ptr %29, i64 %30 -; CHECK-NEXT: %33 = getelementptr inbounds i32, ptr %32, i64 %31 -; CHECK-NEXT: %reverse4 = call @llvm.vector.reverse.nxv4i32( %28) -; CHECK-NEXT: store %reverse4, ptr %33, align 4 +; CHECK-NEXT: %29 = add %reverse, splat (i32 1) +; CHECK-NEXT: %30 = getelementptr inbounds i32, ptr %A, i64 %22 +; CHECK-NEXT: %31 = mul i64 0, %18 +; CHECK-NEXT: %32 = sub i64 %18, 1 +; CHECK-NEXT: %33 = mul i64 -1, %32 +; CHECK-NEXT: %34 = getelementptr inbounds i32, ptr %30, i64 %31 +; CHECK-NEXT: %35 = getelementptr inbounds i32, ptr %34, i64 %33 +; CHECK-NEXT: %reverse4 = call @llvm.vector.reverse.nxv4i32( %29) +; CHECK-NEXT: store %reverse4, ptr %35, align 4 ; CHECK-NEXT: %index.next = add nuw i64 %index, %18 -; CHECK-NEXT: %34 = icmp eq i64 %index.next, %n.vec -; CHECK-NEXT: br i1 %34, , label %vector.body +; CHECK-NEXT: %36 = icmp eq i64 %index.next, %n.vec +; CHECK-NEXT: br i1 %36, , label %vector.body ; CHECK-NEXT: LV: created middle.block ; CHECK-NEXT: LV: draw edge from vector.body ; CHECK-NEXT: LV: vectorizing VPBB: middle.block in BB: middle.block @@ -380,8 +382,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom -; CHECK-NEXT: %35 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: %add9 = add i32 %35, 1 +; CHECK-NEXT: %37 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: %add9 = add i32 %37, 1 ; CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom ; CHECK-NEXT: store i32 %add9, ptr %arrayidx3, align 4 ; CHECK-NEXT: %cmp = icmp ugt i64 %indvars.iv, 1 @@ -743,22 +745,24 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %22 = zext i32 %21 to i64 ; CHECK-NEXT: %23 = getelementptr inbounds float, ptr %B, i64 %22 ; CHECK-NEXT: %24 = mul i64 0, %18 -; CHECK-NEXT: %25 = sub i64 1, %18 -; CHECK-NEXT: %26 = getelementptr inbounds float, ptr %23, i64 %24 -; CHECK-NEXT: %27 = getelementptr inbounds float, ptr %26, i64 %25 -; CHECK-NEXT: %wide.load = load , ptr %27, align 4 +; CHECK-NEXT: %25 = sub i64 %18, 1 +; CHECK-NEXT: %26 = mul i64 -1, %25 +; CHECK-NEXT: %27 = getelementptr inbounds float, ptr %23, i64 %24 +; CHECK-NEXT: %28 = getelementptr inbounds float, ptr %27, i64 %26 +; CHECK-NEXT: %wide.load = load , ptr %28, align 4 ; CHECK-NEXT: %reverse = call @llvm.vector.reverse.nxv4f32( %wide.load) -; CHECK-NEXT: %28 = fadd %reverse, splat (float 1.000000e+00) -; CHECK-NEXT: %29 = getelementptr inbounds float, ptr %A, i64 %22 -; CHECK-NEXT: %30 = mul i64 0, %18 -; CHECK-NEXT: %31 = sub i64 1, %18 -; CHECK-NEXT: %32 = getelementptr inbounds float, ptr %29, i64 %30 -; CHECK-NEXT: %33 = getelementptr inbounds float, ptr %32, i64 %31 -; CHECK-NEXT: %reverse4 = call @llvm.vector.reverse.nxv4f32( %28) -; CHECK-NEXT: store %reverse4, ptr %33, align 4 +; CHECK-NEXT: %29 = fadd %reverse, splat (float 1.000000e+00) +; CHECK-NEXT: %30 = getelementptr inbounds float, ptr %A, i64 %22 +; CHECK-NEXT: %31 = mul i64 0, %18 +; CHECK-NEXT: %32 = sub i64 %18, 1 +; CHECK-NEXT: %33 = mul i64 -1, %32 +; CHECK-NEXT: %34 = getelementptr inbounds float, ptr %30, i64 %31 +; CHECK-NEXT: %35 = getelementptr inbounds float, ptr %34, i64 %33 +; CHECK-NEXT: %reverse4 = call @llvm.vector.reverse.nxv4f32( %29) +; CHECK-NEXT: store %reverse4, ptr %35, align 4 ; CHECK-NEXT: %index.next = add nuw i64 %index, %18 -; CHECK-NEXT: %34 = icmp eq i64 %index.next, %n.vec -; CHECK-NEXT: br i1 %34, , label %vector.body +; CHECK-NEXT: %36 = icmp eq i64 %index.next, %n.vec +; CHECK-NEXT: br i1 %36, , label %vector.body ; CHECK-NEXT: LV: created middle.block ; CHECK-NEXT: LV: draw edge from vector.body ; CHECK-NEXT: LV: vectorizing VPBB: middle.block in BB: middle.block @@ -789,8 +793,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %i.0 = add nsw i32 %i.0.in8, -1 ; CHECK-NEXT: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom -; CHECK-NEXT: %35 = load float, ptr %arrayidx, align 4 -; CHECK-NEXT: %conv1 = fadd float %35, 1.000000e+00 +; CHECK-NEXT: %37 = load float, ptr %arrayidx, align 4 +; CHECK-NEXT: %conv1 = fadd float %37, 1.000000e+00 ; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom ; CHECK-NEXT: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: %cmp = icmp ugt i64 %indvars.iv, 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 96db5bf4e9acc..91d94e52d0990 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -33,7 +33,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP18]] -; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP18]] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP18]], 1 +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 -1, [[TMP11]] ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) @@ -41,7 +42,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] -; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP19]] +; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 [[TMP19]], 1 +; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP23]] ; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]] ; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) @@ -136,7 +138,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP26]] -; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP26]] +; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 [[TMP26]], 1 +; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP15]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) @@ -145,7 +148,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP27]] +; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP27]], 1 +; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP30]] ; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] ; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) @@ -261,7 +265,8 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] -; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]] +; IF-EVL-NEXT: [[TMP29:%.*]] = sub i64 [[TMP9]], 1 +; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP29]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP6]]) @@ -271,7 +276,8 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] -; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]] +; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP16]], 1 +; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP30]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) @@ -279,7 +285,8 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] ; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP22]] -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP22]] +; IF-EVL-NEXT: [[TMP31:%.*]] = sub i64 [[TMP22]], 1 +; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 -1, [[TMP31]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]] ; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]] ; IF-EVL-NEXT: [[VP_REVERSE2:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index 5c94ce180578f..984b64c55ce16 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -34,7 +34,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = sub i64 1, [[TMP15]] +; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[ARRAYIDX13]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP18]], i64 [[TMP17]] ; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, splat (i1 true), i32 [[TMP11]])