diff --git a/llvm/lib/Target/AIE/AIECombine.td b/llvm/lib/Target/AIE/AIECombine.td index 525917029985..f6a745fdb30c 100644 --- a/llvm/lib/Target/AIE/AIECombine.td +++ b/llvm/lib/Target/AIE/AIECombine.td @@ -88,6 +88,12 @@ def combine_vector_shuffle_extract_subvec : GICombineRule< [{ return matchShuffleToExtractSubvec(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; +def combine_vector_shuffle_to_copy : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR): $root, + [{ return matchShuffleToCopy(*${root}, MRI, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def AIE2PreLegalizerCombiner : GICombiner<"AIE2PreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector, all_combines, combine_S20NarrowingOpt, @@ -99,6 +105,7 @@ def AIE2PreLegalizerCombiner def AIE2PPreLegalizerCombiner : GICombiner<"AIE2PPreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector, + combine_vector_shuffle_to_copy, combine_vector_shuffle_extract_subvec, all_combines, combine_S20NarrowingOpt, combine_globalval_offset, diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 549acafe91b0..9731b7f517da 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -52,6 +52,69 @@ cl::opt CombineVecShiftByZero( "aie-combine-vec-shift-by-zero", cl::init(true), cl::Hidden, cl::desc("Combine vectors shift by zero into copies.")); +bool MaskMatch::isValidMask(ArrayRef Mask) const { + bool FirstNotUndef = true; + for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) { + if (Mask[Idx] == -1) + continue; + + // Find the start value of the mask + if (FirstNotUndef) { + // Get the start value + const unsigned MaskStart = Mask[Idx] - (Period == 0 ? Idx : Idx % Period); + + if (MaskStart != Height) + return false; + + FirstNotUndef = false; + } + + // Check not undef values (not -1) of the mask + if ((unsigned)Mask[Idx] != getMaskValue(Idx)) + return false; + } + + return true; +} + +bool MaskMatch::isMaskWithAllUndefs(ArrayRef Mask) { + for (unsigned I = 0; I < Mask.size(); ++I) { + if (Mask[I] != -1) + return false; + } + return true; +} + +std::optional MaskMatch::getHeight(ArrayRef Mask, + unsigned Period) { + for (unsigned I = 0; I < Mask.size(); ++I) { + if (Mask[I] != -1) + return Mask[I] - (Period == 0 ? I : I % Period); + } + return std::nullopt; +} + +/// This function returns the unique index in the shuffle mask \p Mask if the +/// unique index exists. +std::optional MaskMatch::getUniqueIndex(ArrayRef Mask) { + std::optional UniqOpIdx; + for (unsigned I = 0; I < Mask.size(); I++) { + int Idx = Mask[I]; + if (Idx == -1) + continue; + + if (!UniqOpIdx) { + UniqOpIdx = Idx; + continue; + } + + if (UniqOpIdx != Idx) { + return std::nullopt; + } + } + return UniqOpIdx; +} + MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI, CombinerHelper &Helper, AIELoadStoreCombineMatchData &MatchData, @@ -1765,6 +1828,15 @@ bool llvm::matchBroadcastElement(MachineInstr &MI, MachineRegisterInfo &MRI, return true; } +/// \returns true if it is possible to combine the shuffle vector to VSEL. +/// E.g.: +/// From : %0:_(<16 x s32>) = COPY $x0 +/// %1:_(<16 x s32>) = COPY $x1 +/// %2:_(<16 x s32>) = G_SHUFFLE_VECTOR %X(<16 x s32>), %1(<16 x s32>), +/// shufflemask(0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, +/// 31) +/// To : %3:_(s32) = G_CONSTANT i32 65520 +/// %4:_(<16 x s32>) = G_AIE_VSEL %0, %1, %3(s32) bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI, const AIEBaseInstrInfo &TII, BuildFnTy &MatchInfo) { @@ -1785,13 +1857,10 @@ bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI, if (NumDstElems != NumSrcElems) return false; - // Check that the shuffle mask can be converted into VSel mask: - // The mask contains only -1 - if (std::all_of(Mask.begin(), Mask.end(), - [&](int Value) { return Value == -1; })) { + if (MaskMatch::isMaskWithAllUndefs(Mask)) return false; - } + // Check that the shuffle mask can be converted into VSel mask: // 1. The shuffle mask doesn't contain indices that correspond to the same // index in Src1 and Src2, i.e., for each i only the i-th element from Src1 or // the i-th element from Src2 is used. @@ -1820,27 +1889,6 @@ bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI, return true; } -/// This function returns the unique index in the shuffle mask \p Mask if the -/// unique index exists. -static std::optional getUniqueIndex(ArrayRef Mask) { - std::optional UniqOpIdx; - for (unsigned I = 0; I < Mask.size(); I++) { - int Idx = Mask[I]; - if (Idx < 0) - continue; - - if (!UniqOpIdx) { - UniqOpIdx = Idx; - continue; - } - - if (UniqOpIdx != Idx) { - return std::nullopt; - } - } - return UniqOpIdx; -} - /// \returns true if it is possible to combine the shuffle vector with a mask /// that extracts an element from the first source vector and broadcasts /// it. E.g.: @@ -1856,7 +1904,7 @@ static bool matchShuffleToVecEltBroadcast(MachineInstr &MI, BuildFnTy &MatchInfo) { ArrayRef Mask = MI.getOperand(3).getShuffleMask(); - std::optional UniqOpIdx = getUniqueIndex(Mask); + std::optional UniqOpIdx = MaskMatch::getUniqueIndex(Mask); if (!UniqOpIdx) return false; @@ -1873,24 +1921,6 @@ static bool matchShuffleToVecEltBroadcast(MachineInstr &MI, return true; } -/// A sequential mask with \p StartValue and \p NumElems is generated. If \p -/// Mask is equivalent to the generated sequential mask, the method returns -/// true. Otherwise, false. -static bool checkSequentialMask(const ArrayRef Mask, unsigned StartValue, - unsigned NumElems) { - if (Mask.size() != NumElems) - return false; - - auto SeqMask = createSequentialMask(StartValue, NumElems, 0); - - for (unsigned I = 0; I < NumElems; ++I) { - if (Mask[I] != -1 && Mask[I] != SeqMask[I]) - return false; - } - - return true; -} - /// Check prerequisites to extract a subvector static bool checkExtractSubvectorPrerequisites(const AIEBaseInstrInfo &TII, const LLT DstTy, @@ -2124,12 +2154,15 @@ bool llvm::matchShuffleToExtractSubvec(MachineInstr &MI, if (NumSrc1Elems % NumDstElems != 0) return false; + if (MaskMatch::isMaskWithAllUndefs(Mask)) + return false; + const unsigned NumSubVectors = NumSrc1Elems / NumDstElems; auto GetSubvecExtractIdx = [=, &Mask]() -> std::optional { for (unsigned SubVecIdx = 0; SubVecIdx < NumSubVectors; ++SubVecIdx) { - if (checkSequentialMask(Mask, SubVecIdx * NumDstElems, NumDstElems)) { + MaskMatch SequentialMask{/*Height*/ SubVecIdx * NumDstElems}; + if (SequentialMask.isValidMask(Mask)) return SubVecIdx; - } } return std::nullopt; @@ -2189,30 +2222,17 @@ static bool matchShuffleToSubvecBroadcast(MachineInstr &MI, if (Mask[0] != -1 && Mask[0] % SplatMaskLen != 0) return std::nullopt; - // Find the start value of the splat mask and check that the mask is valid - bool ValidMask = true; - int SplatMaskStart = -1; - for (unsigned I = 0; I < MaskSize; ++I) { - if (Mask[I] == -1) - continue; - - if (SplatMaskStart == -1) { - // First Mask[I]!=-1 - // Get the start value - SplatMaskStart = Mask[I] - I % SplatMaskLen; - - if (SplatMaskStart % SplatMaskLen != 0) - return std::nullopt; - - } else if ((unsigned)Mask[I] != SplatMaskStart + I % SplatMaskLen) { - // Check the rest not undef values (not -1) of the mask - ValidMask = false; - break; - } - } + // Get Height (start value) + std::optional Height = + MaskMatch::getHeight(Mask, /*Period*/ SplatMaskLen); + if (!Height) + return std::nullopt; - if (ValidMask) - return std::make_pair(SplatMaskStart, SplatMaskLen); + // Check the mask + MaskMatch SequentialPeriodicMask{/*Height*/ Height.value(), + /*Period*/ SplatMaskLen}; + if (SequentialPeriodicMask.isValidMask(Mask)) + return std::make_pair(Height.value(), SplatMaskLen); } return std::nullopt; }; @@ -2272,10 +2292,11 @@ static bool matchShuffleToVecBroadcast(MachineInstr &MI, return false; } - for (unsigned I = 0; I < Mask.size(); ++I) { - if (Mask[I] != -1 && (unsigned)Mask[I] != I % NumSrcElems) - return false; - } + // Check the mask + MaskMatch SequentialPeriodicMask{/*Height*/ 0, + /*Period*/ NumSrcElems}; + if (!SequentialPeriodicMask.isValidMask(Mask)) + return false; MatchInfo = [=, &MRI](MachineIRBuilder &B) { buildBroadcastVector(B, MRI, Src1Reg, DstReg); @@ -2288,6 +2309,12 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI, const AIEBaseInstrInfo &TII, BuildFnTy &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + + if (MaskMatch::isMaskWithAllUndefs(Mask)) + return false; + if (matchShuffleToVecBroadcast(MI, MRI, TII, MatchInfo)) return true; if (matchShuffleToVecEltBroadcast(MI, MRI, TII, MatchInfo)) @@ -2296,3 +2323,41 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI, return true; return false; } + +/// Match something like this: +/// %1:_(<2 x s32>) = COPY $x0 +/// %2:_(<2 x s32>) = G_IMPLICIT_DEF +/// %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), +/// shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + +/// To convert to: +/// %0:_(<16 x s32>) = COPY $x0 +bool llvm::matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + + const Register DstReg = MI.getOperand(0).getReg(); + const Register Src1Reg = MI.getOperand(1).getReg(); + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT Src1Ty = MRI.getType(Src1Reg); + if (DstTy != Src1Ty) + return false; + + const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; + if (Mask.size() != NumSrcElems) + return false; + + if (MaskMatch::isMaskWithAllUndefs(Mask)) + return false; + + // Check that the mask is sequential + MaskMatch SequentialMask{/*Height*/ 0}; + if (!SequentialMask.isValidMask(Mask)) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(DstReg, Src1Reg); }; + + return true; +} diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.h b/llvm/lib/Target/AIE/AIECombinerHelper.h index c8da15a4d838..75ada55c0692 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.h +++ b/llvm/lib/Target/AIE/AIECombinerHelper.h @@ -34,6 +34,34 @@ struct AIELoadStoreCombineMatchData { bool RemoveInstr; }; +/// The mask is represented by a sawtooth function F with Period, Height and +/// Amplitude, i.e., F(idx + Period) = F(idx) = Height + idx * Amplitude, where +/// idx >= 0. +/// Example: mask = (4, 6, 8, 4, 6, 8) <=> Height=4, Amplitude=2, Period=3 +class MaskMatch { +public: + MaskMatch(unsigned MaskHeight, unsigned MaskPeriod = 0, int MaskAmplitude = 1) + : Period{MaskPeriod}, Height{MaskHeight}, Amplitude{MaskAmplitude} {} + + bool isValidMask(ArrayRef Mask) const; + unsigned getHeight() const { return Height; } + + static bool isMaskWithAllUndefs(ArrayRef Mask); + static std::optional getHeight(ArrayRef Mask, unsigned Period); + static std::optional getUniqueIndex(ArrayRef Mask); + +protected: + unsigned getMaskValue(unsigned Idx) const { + unsigned BaseIdx = Period == 0 ? Idx : Idx % Period; + return Height + BaseIdx * Amplitude; + } + + unsigned Period = 0; + unsigned Height = 0; + /// Negative amplitude can be used for reverse mask patterns. + int Amplitude = 1; +}; + /// Look for any PtrAdd instruction that use the same base as \a MI that can be /// combined with it and stores it in \a MatchData /// \return true if an instruction is found @@ -211,6 +239,9 @@ bool matchShuffleToExtractSubvec(MachineInstr &MI, MachineRegisterInfo &MRI, const AIEBaseInstrInfo &TII, BuildFnTy &MatchInfo); +bool matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI, + BuildFnTy &MatchInfo); + } // namespace llvm #endif diff --git a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir index 52d31cadddb1..4b58797ba7f3 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir @@ -776,9 +776,7 @@ body: | ; CHECK: liveins: $dm0, $dm1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s32>) = COPY $dm0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s32>) = COPY $dm1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<64 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<64 x s32>), [[COPY1]], shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) - ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<64 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<64 x s32>) %1:_(<64 x s32>) = COPY $dm0 %2:_(<64 x s32>) = COPY $dm1 %0:_(<64 x s32>) = G_SHUFFLE_VECTOR %1(<64 x s32>), %2(<64 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1) @@ -907,24 +905,88 @@ body: | %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(4, 5, 6, 7, 8, 9, 10, 11) PseudoRET implicit $lr, implicit %0 ... -# Note: currently it is combined to G_AIE_VSEL but it should be combined to COPY -# which is not implemented yet. + +# Combine G_SHUFFLE_VECTOR into COPY --- -name: shuffle_vector_to_copy +name: shuffle_vector_to_copy_scalar +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: shuffle_vector_to_copy_scalar + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[C]](s32) + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %0:_(s32) = G_SHUFFLE_VECTOR %1(s32), %2(s32), shufflemask(0) + PseudoRET implicit $lr, implicit %0 +... +--- +name: shuffle_vector_to_copy_vec tracksRegLiveness: true body: | bb.1: liveins: $x0, $x1 - ; CHECK-LABEL: name: shuffle_vector_to_copy + ; CHECK-LABEL: name: shuffle_vector_to_copy_vec ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[COPY]], [[COPY1]], [[C]](s32) - ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<16 x s32>) %1:_(<16 x s32>) = COPY $x0 %2:_(<16 x s32>) = COPY $x1 %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) PseudoRET implicit $lr, implicit %0 ... +# Note: currently not commutable +--- +name: shufflevector_to_copy_commutable +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $l0, $l1 + ; CHECK-LABEL: name: shufflevector_to_copy_commutable + ; CHECK: liveins: $l0, $l1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $l0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $l1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 3) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<2 x s32>) + %0:_(<2 x s32>) = COPY $l0 + %1:_(<2 x s32>) = COPY $l1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(2,3) + PseudoRET implicit $lr, implicit %2 +... +--- +name: shuffle_vector_to_copy_vec_with_undef +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shuffle_vector_to_copy_vec_with_undef + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, -1, 4, 5, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15) + PseudoRET implicit $lr, implicit %0 +... +--- +name: shuffle_vector_to_copy_vec_no_combine +tracksRegLiveness: true +body: | + bb.1: + liveins: $wl0, $wl1 + ; CHECK-LABEL: name: shuffle_vector_to_copy_vec_no_combine + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s32>), [[COPY1]], shufflemask(0, 1, 2, 3, 4, 5, 6, 2) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<8 x s32>) + %1:_(<8 x s32>) = COPY $wl0 + %2:_(<8 x s32>) = COPY $wl1 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<8 x s32>), %2(<8 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 2) + PseudoRET implicit $lr, implicit %0 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll index bf0176dbde2b..5e1dda8fc490 100644 --- a/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll +++ b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll @@ -521,3 +521,47 @@ entry: %shuffle = shufflevector <64 x i32> %a, <64 x i32> %b, <32 x i32> ret <32 x i32> %shuffle } + +; Combine G_SHUFFLE_VECTOR into COPY. Note: shufflevector doesn't accept scalar arguments. +define <16 x i32> @shuffle_vector_to_copy_vec(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: shuffle_vector_to_copy_vec: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vmov x0, x2 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <8 x i32> @shuffle_vector_to_copy_no_combine(<8 x i32> noundef %a, <8 x i32> noundef %b) { +; CHECK-LABEL: shuffle_vector_to_copy_no_combine: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopx ; vextract.32 r1, x2, #5, vaddsign1 +; CHECK-NEXT: vextract.32 r2, x2, #6, vaddsign1 +; CHECK-NEXT: vextract.32 r3, x2, #7, vaddsign1 +; CHECK-NEXT: vextract.32 r4, x4, #0, vaddsign1 +; CHECK-NEXT: vextract.32 r5, x4, #1, vaddsign1 +; CHECK-NEXT: vextract.32 r6, x4, #2, vaddsign1 +; CHECK-NEXT: vextract.32 r0, x2, #4, vaddsign1 +; CHECK-NEXT: vextract.32 r7, x4, #3, vaddsign1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r0 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r2 +; CHECK-NEXT: vpush.hi.32 x0, x0, r3 +; CHECK-NEXT: vpush.hi.32 x0, x0, r4 +; CHECK-NEXT: ret lr +; CHECK-NEXT: vpush.hi.32 x0, x0, r5 // Delay Slot 5 +; CHECK-NEXT: vpush.hi.32 x0, x0, r6 // Delay Slot 4 +; CHECK-NEXT: vpush.hi.32 x0, x0, r7 // Delay Slot 3 +; CHECK-NEXT: vmov wl0, wh0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle +}