Xilinx · ValentijnvdBeek · Jun 27, 2024 · martien-de-jong · Jun 28, 2024 · ValentijnvdBeek
@@ -524,36 +524,24 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
   }
 
   // {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES
-  // Extracts the first chunk of the same size of the destination vector from
-  // the source
-  GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1);
-  if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) {
-    // This optimization does not work if the target type is not a power of two,
-    // this can happen in some backends that support uneven vector types. We
-    // also need to make sure that the vector can be split into two.
-    if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
-        SrcNumElts % DstNumElts != 0)
-      return false;
-    ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
-    const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
-    createUnmergeValue(MI, TargetReg, DstReg, 0, 0, SrcNumElts);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  // {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES
-  // Extracts the second chunk of the same size of the destination vector from
-  // the source
-  GeneratorType SecondQuarter =
-      adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1);
-  if (matchCombineShuffleVector(MI, SecondQuarter, DstNumElts - 1)) {
-    if (((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0)
-      return false;
-    ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
-    const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
-    createUnmergeValue(MI, TargetReg, DstReg, 1, 0, SrcNumElts);
-    MI.eraseFromParent();
-    return true;
+  // Extracts the chunks of the same size of the destination vector from the
+  // source
+  for (uint8_t Current = 0, Total = SrcNumElts; Current < Total; Current++) {
+    uint32_t Start = Current * DstNumElts, End = Start + DstNumElts - 1;
+    GeneratorType Generator = adderGenerator(Start, End, 1);
+    if (matchCombineShuffleVector(MI, Generator, DstNumElts - 1)) {
+      // This optimization does not work if the target type is not a power of
+      // two, this can happen in some backends that support uneven vector types.
+      // We also need to make sure that the vector can be split into two.
+      if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
+          SrcNumElts % DstNumElts != 0)
+        return false;
+      ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+      const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
+      createUnmergeValue(MI, TargetReg, DstReg, Current, 0, SrcNumElts);
+      MI.eraseFromParent();
+      return true;
+    }
   }
 
   // After this point, it is assumed our shufflevectors work on vectors that can

@@ -557,6 +557,86 @@ body:             |
     PseudoRET implicit $lr, implicit %0
 ...
 
+---
+name:            extract_vector_256_1024_q1
+legalized:       false
+body:             |
+  bb.1.entry:
+    liveins: $y2
+    ; CHECK-LABEL: name: extract_vector_256_1024_q1
+    ; CHECK: liveins: $y2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
+    ; CHECK-NEXT: $wl0 = COPY [[UV3]](<8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
+    %1:_(<32 x s32>) = COPY $y2
+    %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
+    $wl0 = COPY %0:_(<8 x s32>)
+    PseudoRET implicit $lr, implicit $wl0
+...
+
+---
+name:            extract_vector_256_1024_q2
+legalized:       false
+body:             |
+  bb.1.entry:
+    liveins: $y2
+    ; CHECK-LABEL: name: extract_vector_256_1024_q2
+    ; CHECK: liveins: $y2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV1]](<16 x s32>)
+    ; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
+    %1:_(<32 x s32>) = COPY $y2
+    %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(16, 17, 18, 19, 20, 21, 22, 23)
+    $wl0 = COPY %0:_(<8 x s32>)
+    PseudoRET implicit $lr, implicit $wl0
+...
+
+---
+name:            extract_vector_256_1024_q3
+legalized:       false
+body:             |
+  bb.1.entry:
+    liveins: $y2
+    ; CHECK-LABEL: name: extract_vector_256_1024_q3
+    ; CHECK: liveins: $y2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV1]](<16 x s32>)
+    ; CHECK-NEXT: $wl0 = COPY [[UV3]](<8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
+    %1:_(<32 x s32>) = COPY $y2
+    %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(24, 25, 26, 27, 28, 29, 30, 31)
+    $wl0 = COPY %0:_(<8 x s32>)
+    PseudoRET implicit $lr, implicit $wl0
+...
+
+---
+name:            extract_vector_256_1024_q4
+legalized:       false
+body:             |
+  bb.1.entry:
+    liveins: $y2
+    ; CHECK-LABEL: name: extract_vector_256_1024_q4
+    ; CHECK: liveins: $y2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
+    ; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0
+    %1:_(<32 x s32>) = COPY $y2
+    %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
+    $wl0 = COPY %0:_(<8 x s32>)
+    PseudoRET implicit $lr, implicit $wl0
+...
+
 ---
 name:            insert_vector_16_elements
 legalized:       false