diff --git a/llvm/lib/Target/AIE/AIECombine.td b/llvm/lib/Target/AIE/AIECombine.td
index 525917029985..f6a745fdb30c 100644
--- a/llvm/lib/Target/AIE/AIECombine.td
+++ b/llvm/lib/Target/AIE/AIECombine.td
@@ -88,6 +88,12 @@ def combine_vector_shuffle_extract_subvec : GICombineRule<
          [{ return matchShuffleToExtractSubvec(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
+def combine_vector_shuffle_to_copy : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR): $root,
+         [{ return matchShuffleToCopy(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 def AIE2PreLegalizerCombiner
     : GICombiner<"AIE2PreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
                                                                  all_combines, combine_S20NarrowingOpt,
@@ -99,6 +105,7 @@ def AIE2PreLegalizerCombiner
 
 def AIE2PPreLegalizerCombiner
     : GICombiner<"AIE2PPreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
+                                                                 combine_vector_shuffle_to_copy,
                                                                  combine_vector_shuffle_extract_subvec,
                                                                  all_combines, combine_S20NarrowingOpt,
                                                                  combine_globalval_offset,
diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp
index 549acafe91b0..9731b7f517da 100644
--- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp
+++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp
@@ -52,6 +52,69 @@ cl::opt<bool> CombineVecShiftByZero(
     "aie-combine-vec-shift-by-zero", cl::init(true), cl::Hidden,
     cl::desc("Combine vectors shift by zero into copies."));
 
+bool MaskMatch::isValidMask(ArrayRef<int> Mask) const {
+  bool FirstNotUndef = true;
+  for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
+    if (Mask[Idx] == -1)
+      continue;
+
+    // Find the start value of the mask
+    if (FirstNotUndef) {
+      // Get the start value
+      const unsigned MaskStart = Mask[Idx] - (Period == 0 ? Idx : Idx % Period);
+
+      if (MaskStart != Height)
+        return false;
+
+      FirstNotUndef = false;
+    }
+
+    // Check not undef values (not -1) of the mask
+    if ((unsigned)Mask[Idx] != getMaskValue(Idx))
+      return false;
+  }
+
+  return true;
+}
+
+bool MaskMatch::isMaskWithAllUndefs(ArrayRef<int> Mask) {
+  for (unsigned I = 0; I < Mask.size(); ++I) {
+    if (Mask[I] != -1)
+      return false;
+  }
+  return true;
+}
+
+std::optional<unsigned> MaskMatch::getHeight(ArrayRef<int> Mask,
+                                             unsigned Period) {
+  for (unsigned I = 0; I < Mask.size(); ++I) {
+    if (Mask[I] != -1)
+      return Mask[I] - (Period == 0 ? I : I % Period);
+  }
+  return std::nullopt;
+}
+
+/// This function returns the unique index in the shuffle mask \p Mask if the
+/// unique index exists.
+std::optional<int> MaskMatch::getUniqueIndex(ArrayRef<int> Mask) {
+  std::optional<int> UniqOpIdx;
+  for (unsigned I = 0; I < Mask.size(); I++) {
+    int Idx = Mask[I];
+    if (Idx == -1)
+      continue;
+
+    if (!UniqOpIdx) {
+      UniqOpIdx = Idx;
+      continue;
+    }
+
+    if (UniqOpIdx != Idx) {
+      return std::nullopt;
+    }
+  }
+  return UniqOpIdx;
+}
+
 MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
                               CombinerHelper &Helper,
                               AIELoadStoreCombineMatchData &MatchData,
@@ -1765,6 +1828,15 @@ bool llvm::matchBroadcastElement(MachineInstr &MI, MachineRegisterInfo &MRI,
   return true;
 }
 
+/// \returns true if it is possible to combine the shuffle vector to VSEL.
+/// E.g.:
+/// From :  %0:_(<16 x s32>) = COPY $x0
+///         %1:_(<16 x s32>) = COPY $x1
+///         %2:_(<16 x s32>) = G_SHUFFLE_VECTOR %X(<16 x s32>), %1(<16 x s32>),
+///         shufflemask(0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+///         31)
+/// To :    %3:_(s32) = G_CONSTANT i32 65520
+///         %4:_(<16 x s32>) = G_AIE_VSEL %0, %1, %3(s32)
 bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI,
                               const AIEBaseInstrInfo &TII,
                               BuildFnTy &MatchInfo) {
@@ -1785,13 +1857,10 @@ bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI,
   if (NumDstElems != NumSrcElems)
     return false;
 
-  // Check that the shuffle mask can be converted into VSel mask:
-  // The mask contains only -1
-  if (std::all_of(Mask.begin(), Mask.end(),
-                  [&](int Value) { return Value == -1; })) {
+  if (MaskMatch::isMaskWithAllUndefs(Mask))
     return false;
-  }
 
+  // Check that the shuffle mask can be converted into VSel mask:
   // 1. The shuffle mask doesn't contain indices that correspond to the same
   // index in Src1 and Src2, i.e., for each i only the i-th element from Src1 or
   // the i-th element from Src2 is used.
@@ -1820,27 +1889,6 @@ bool llvm::matchShuffleToVSel(MachineInstr &MI, MachineRegisterInfo &MRI,
   return true;
 }
 
-/// This function returns the unique index in the shuffle mask \p Mask if the
-/// unique index exists.
-static std::optional<int> getUniqueIndex(ArrayRef<int> Mask) {
-  std::optional<int> UniqOpIdx;
-  for (unsigned I = 0; I < Mask.size(); I++) {
-    int Idx = Mask[I];
-    if (Idx < 0)
-      continue;
-
-    if (!UniqOpIdx) {
-      UniqOpIdx = Idx;
-      continue;
-    }
-
-    if (UniqOpIdx != Idx) {
-      return std::nullopt;
-    }
-  }
-  return UniqOpIdx;
-}
-
 /// \returns true if it is possible to combine the shuffle vector with a mask
 /// that extracts an element from the first source vector and broadcasts
 /// it. E.g.:
@@ -1856,7 +1904,7 @@ static bool matchShuffleToVecEltBroadcast(MachineInstr &MI,
                                           BuildFnTy &MatchInfo) {
   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
 
-  std::optional<int> UniqOpIdx = getUniqueIndex(Mask);
+  std::optional<int> UniqOpIdx = MaskMatch::getUniqueIndex(Mask);
   if (!UniqOpIdx)
     return false;
 
@@ -1873,24 +1921,6 @@ static bool matchShuffleToVecEltBroadcast(MachineInstr &MI,
   return true;
 }
 
-/// A sequential mask with \p StartValue and \p NumElems is generated. If \p
-/// Mask is equivalent to the generated sequential mask, the method returns
-/// true. Otherwise, false.
-static bool checkSequentialMask(const ArrayRef<int> Mask, unsigned StartValue,
-                                unsigned NumElems) {
-  if (Mask.size() != NumElems)
-    return false;
-
-  auto SeqMask = createSequentialMask(StartValue, NumElems, 0);
-
-  for (unsigned I = 0; I < NumElems; ++I) {
-    if (Mask[I] != -1 && Mask[I] != SeqMask[I])
-      return false;
-  }
-
-  return true;
-}
-
 /// Check prerequisites to extract a subvector
 static bool checkExtractSubvectorPrerequisites(const AIEBaseInstrInfo &TII,
                                                const LLT DstTy,
@@ -2124,12 +2154,15 @@ bool llvm::matchShuffleToExtractSubvec(MachineInstr &MI,
   if (NumSrc1Elems % NumDstElems != 0)
     return false;
 
+  if (MaskMatch::isMaskWithAllUndefs(Mask))
+    return false;
+
   const unsigned NumSubVectors = NumSrc1Elems / NumDstElems;
   auto GetSubvecExtractIdx = [=, &Mask]() -> std::optional<unsigned> {
     for (unsigned SubVecIdx = 0; SubVecIdx < NumSubVectors; ++SubVecIdx) {
-      if (checkSequentialMask(Mask, SubVecIdx * NumDstElems, NumDstElems)) {
+      MaskMatch SequentialMask{/*Height*/ SubVecIdx * NumDstElems};
+      if (SequentialMask.isValidMask(Mask))
         return SubVecIdx;
-      }
     }
 
     return std::nullopt;
@@ -2189,30 +2222,17 @@ static bool matchShuffleToSubvecBroadcast(MachineInstr &MI,
       if (Mask[0] != -1 && Mask[0] % SplatMaskLen != 0)
         return std::nullopt;
 
-      // Find the start value of the splat mask and check that the mask is valid
-      bool ValidMask = true;
-      int SplatMaskStart = -1;
-      for (unsigned I = 0; I < MaskSize; ++I) {
-        if (Mask[I] == -1)
-          continue;
-
-        if (SplatMaskStart == -1) {
-          // First Mask[I]!=-1
-          // Get the start value
-          SplatMaskStart = Mask[I] - I % SplatMaskLen;
-
-          if (SplatMaskStart % SplatMaskLen != 0)
-            return std::nullopt;
-
-        } else if ((unsigned)Mask[I] != SplatMaskStart + I % SplatMaskLen) {
-          // Check the rest not undef values (not -1) of the mask
-          ValidMask = false;
-          break;
-        }
-      }
+      // Get Height (start value)
+      std::optional<unsigned> Height =
+          MaskMatch::getHeight(Mask, /*Period*/ SplatMaskLen);
+      if (!Height)
+        return std::nullopt;
 
-      if (ValidMask)
-        return std::make_pair(SplatMaskStart, SplatMaskLen);
+      // Check the mask
+      MaskMatch SequentialPeriodicMask{/*Height*/ Height.value(),
+                                       /*Period*/ SplatMaskLen};
+      if (SequentialPeriodicMask.isValidMask(Mask))
+        return std::make_pair(Height.value(), SplatMaskLen);
     }
     return std::nullopt;
   };
@@ -2272,10 +2292,11 @@ static bool matchShuffleToVecBroadcast(MachineInstr &MI,
     return false;
   }
 
-  for (unsigned I = 0; I < Mask.size(); ++I) {
-    if (Mask[I] != -1 && (unsigned)Mask[I] != I % NumSrcElems)
-      return false;
-  }
+  // Check the mask
+  MaskMatch SequentialPeriodicMask{/*Height*/ 0,
+                                   /*Period*/ NumSrcElems};
+  if (!SequentialPeriodicMask.isValidMask(Mask))
+    return false;
 
   MatchInfo = [=, &MRI](MachineIRBuilder &B) {
     buildBroadcastVector(B, MRI, Src1Reg, DstReg);
@@ -2288,6 +2309,12 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
                                    const AIEBaseInstrInfo &TII,
                                    BuildFnTy &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+
+  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+
+  if (MaskMatch::isMaskWithAllUndefs(Mask))
+    return false;
+
   if (matchShuffleToVecBroadcast(MI, MRI, TII, MatchInfo))
     return true;
   if (matchShuffleToVecEltBroadcast(MI, MRI, TII, MatchInfo))
@@ -2296,3 +2323,41 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
     return true;
   return false;
 }
+
+/// Match something like this:
+///  %1:_(<2 x s32>) = COPY $x0
+///  %2:_(<2 x s32>) = G_IMPLICIT_DEF
+///  %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>),
+///  shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+
+/// To convert to:
+///  %0:_(<16 x s32>) = COPY $x0
+bool llvm::matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              BuildFnTy &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+
+  const Register DstReg = MI.getOperand(0).getReg();
+  const Register Src1Reg = MI.getOperand(1).getReg();
+  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+
+  const LLT DstTy = MRI.getType(DstReg);
+  const LLT Src1Ty = MRI.getType(Src1Reg);
+  if (DstTy != Src1Ty)
+    return false;
+
+  const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+  if (Mask.size() != NumSrcElems)
+    return false;
+
+  if (MaskMatch::isMaskWithAllUndefs(Mask))
+    return false;
+
+  // Check that the mask is sequential
+  MaskMatch SequentialMask{/*Height*/ 0};
+  if (!SequentialMask.isValidMask(Mask))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(DstReg, Src1Reg); };
+
+  return true;
+}
diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.h b/llvm/lib/Target/AIE/AIECombinerHelper.h
index c8da15a4d838..75ada55c0692 100644
--- a/llvm/lib/Target/AIE/AIECombinerHelper.h
+++ b/llvm/lib/Target/AIE/AIECombinerHelper.h
@@ -34,6 +34,34 @@ struct AIELoadStoreCombineMatchData {
   bool RemoveInstr;
 };
 
+/// The mask is represented by a sawtooth function F with Period, Height and
+/// Amplitude, i.e., F(idx + Period) = F(idx) = Height + idx * Amplitude, where
+/// idx >= 0.
+/// Example: mask = (4, 6, 8, 4, 6, 8) <=> Height=4, Amplitude=2, Period=3
+class MaskMatch {
+public:
+  MaskMatch(unsigned MaskHeight, unsigned MaskPeriod = 0, int MaskAmplitude = 1)
+      : Period{MaskPeriod}, Height{MaskHeight}, Amplitude{MaskAmplitude} {}
+
+  bool isValidMask(ArrayRef<int> Mask) const;
+  unsigned getHeight() const { return Height; }
+
+  static bool isMaskWithAllUndefs(ArrayRef<int> Mask);
+  static std::optional<unsigned> getHeight(ArrayRef<int> Mask, unsigned Period);
+  static std::optional<int> getUniqueIndex(ArrayRef<int> Mask);
+
+protected:
+  unsigned getMaskValue(unsigned Idx) const {
+    unsigned BaseIdx = Period == 0 ? Idx : Idx % Period;
+    return Height + BaseIdx * Amplitude;
+  }
+
+  unsigned Period = 0;
+  unsigned Height = 0;
+  /// Negative amplitude can be used for reverse mask patterns.
+  int Amplitude = 1;
+};
+
 /// Look for any PtrAdd instruction that use the same base as \a MI that can be
 /// combined with it and stores it in \a MatchData
 /// \return true if an instruction is found
@@ -211,6 +239,9 @@ bool matchShuffleToExtractSubvec(MachineInstr &MI, MachineRegisterInfo &MRI,
                                  const AIEBaseInstrInfo &TII,
                                  BuildFnTy &MatchInfo);
 
+bool matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI,
+                        BuildFnTy &MatchInfo);
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir
index 52d31cadddb1..4b58797ba7f3 100644
--- a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir
@@ -776,9 +776,7 @@ body:             |
     ; CHECK: liveins: $dm0, $dm1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s32>) = COPY $dm0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s32>) = COPY $dm1
-    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<64 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<64 x s32>), [[COPY1]], shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef)
-    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<64 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<64 x s32>)
     %1:_(<64 x s32>) = COPY $dm0
     %2:_(<64 x s32>) = COPY $dm1
     %0:_(<64 x s32>) = G_SHUFFLE_VECTOR %1(<64 x s32>), %2(<64 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)
@@ -907,24 +905,88 @@ body:             |
     %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(4, 5, 6, 7, 8, 9, 10, 11)
     PseudoRET implicit $lr, implicit %0
 ...
-# Note: currently it is combined to G_AIE_VSEL but it should be combined to COPY
-# which is not implemented yet.
+
+# Combine G_SHUFFLE_VECTOR into COPY
 ---
-name: shuffle_vector_to_copy
+name: shuffle_vector_to_copy_scalar
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: shuffle_vector_to_copy_scalar
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[C]](s32)
+    %1:_(s32) = G_CONSTANT i32 0
+    %2:_(s32) = G_CONSTANT i32 1
+    %0:_(s32) = G_SHUFFLE_VECTOR %1(s32), %2(s32), shufflemask(0)
+    PseudoRET implicit $lr, implicit %0
+...
+---
+name: shuffle_vector_to_copy_vec
 tracksRegLiveness: true
 body:             |
   bb.1:
     liveins: $x0, $x1
-    ; CHECK-LABEL: name: shuffle_vector_to_copy
+    ; CHECK-LABEL: name: shuffle_vector_to_copy_vec
     ; CHECK: liveins: $x0, $x1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[COPY]], [[COPY1]], [[C]](s32)
-    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<16 x s32>)
     %1:_(<16 x s32>) = COPY $x0
     %2:_(<16 x s32>) = COPY $x1
     %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
     PseudoRET implicit $lr, implicit %0
 ...
+# Note: currently not commutable
+---
+name:            shufflevector_to_copy_commutable
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $l0, $l1
+    ; CHECK-LABEL: name: shufflevector_to_copy_commutable
+    ; CHECK: liveins: $l0, $l1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $l0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $l1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 3)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $l0
+    %1:_(<2 x s32>) = COPY $l1
+    %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(2,3)
+    PseudoRET implicit $lr, implicit %2
+...
+---
+name: shuffle_vector_to_copy_vec_with_undef
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: shuffle_vector_to_copy_vec_with_undef
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<16 x s32>)
+    %1:_(<16 x s32>) = COPY $x0
+    %2:_(<16 x s32>) = COPY $x1
+    %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, -1, 4, 5, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15)
+    PseudoRET implicit $lr, implicit %0
+...
+---
+name: shuffle_vector_to_copy_vec_no_combine
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $wl0, $wl1
+    ; CHECK-LABEL: name: shuffle_vector_to_copy_vec_no_combine
+    ; CHECK: liveins: $wl0, $wl1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s32>), [[COPY1]], shufflemask(0, 1, 2, 3, 4, 5, 6, 2)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<8 x s32>)
+    %1:_(<8 x s32>) = COPY $wl0
+    %2:_(<8 x s32>) = COPY $wl1
+    %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<8 x s32>), %2(<8 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 2)
+    PseudoRET implicit $lr, implicit %0
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll
index bf0176dbde2b..5e1dda8fc490 100644
--- a/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll
+++ b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll
@@ -521,3 +521,47 @@ entry:
   %shuffle = shufflevector <64 x i32> %a, <64 x i32> %b, <32 x i32> <i32 0, i32  1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   ret <32 x i32> %shuffle
 }
+
+; Combine G_SHUFFLE_VECTOR into COPY. Note: shufflevector doesn't accept scalar arguments.
+define <16 x i32> @shuffle_vector_to_copy_vec(<16 x i32> noundef %a, <16 x i32> noundef %b) {
+; CHECK-LABEL: shuffle_vector_to_copy_vec:
+; CHECK:         .p2align 4
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    ret lr
+; CHECK-NEXT:    nop // Delay Slot 5
+; CHECK-NEXT:    nop // Delay Slot 4
+; CHECK-NEXT:    nop // Delay Slot 3
+; CHECK-NEXT:    vmov x0, x2 // Delay Slot 2
+; CHECK-NEXT:    nop // Delay Slot 1
+entry:
+  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32  1, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_vector_to_copy_no_combine(<8 x i32> noundef %a, <8 x i32> noundef %b) {
+; CHECK-LABEL: shuffle_vector_to_copy_no_combine:
+; CHECK:         .p2align 4
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    nopa ; nopx ; vextract.32 r1, x2, #5, vaddsign1
+; CHECK-NEXT:    vextract.32 r2, x2, #6, vaddsign1
+; CHECK-NEXT:    vextract.32 r3, x2, #7, vaddsign1
+; CHECK-NEXT:    vextract.32 r4, x4, #0, vaddsign1
+; CHECK-NEXT:    vextract.32 r5, x4, #1, vaddsign1
+; CHECK-NEXT:    vextract.32 r6, x4, #2, vaddsign1
+; CHECK-NEXT:    vextract.32 r0, x2, #4, vaddsign1
+; CHECK-NEXT:    vextract.32 r7, x4, #3, vaddsign1
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r0
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r1
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r2
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r3
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r4
+; CHECK-NEXT:    ret lr
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r5 // Delay Slot 5
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r6 // Delay Slot 4
+; CHECK-NEXT:    vpush.hi.32 x0, x0, r7 // Delay Slot 3
+; CHECK-NEXT:    vmov wl0, wh0 // Delay Slot 2
+; CHECK-NEXT:    nop // Delay Slot 1
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x i32> %shuffle
+}