diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 101534105116..fd56422b8d1d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -524,36 +524,24 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { } // {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES - // Extracts the first chunk of the same size of the destination vector from - // the source - GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1); - if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) { - // This optimization does not work if the target type is not a power of two, - // this can happen in some backends that support uneven vector types. We - // also need to make sure that the vector can be split into two. - if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 || - SrcNumElts % DstNumElts != 0) - return false; - ArrayRef Mask = MI.getOperand(3).getShuffleMask(); - const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2; - createUnmergeValue(MI, TargetReg, DstReg, 0, 0, SrcNumElts); - MI.eraseFromParent(); - return true; - } - - // {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES - // Extracts the second chunk of the same size of the destination vector from - // the source - GeneratorType SecondQuarter = - adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1); - if (matchCombineShuffleVector(MI, SecondQuarter, DstNumElts - 1)) { - if (((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0) - return false; - ArrayRef Mask = MI.getOperand(3).getShuffleMask(); - const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2; - createUnmergeValue(MI, TargetReg, DstReg, 1, 0, SrcNumElts); - MI.eraseFromParent(); - return true; + // Extracts the chunks of the same size of the destination vector from the + // source + for (uint8_t Current = 0, Total = SrcNumElts; Current < Total; Current++) { + uint32_t Start = Current * DstNumElts, End = Start + DstNumElts - 1; + GeneratorType Generator = adderGenerator(Start, End, 1); + if (matchCombineShuffleVector(MI, Generator, DstNumElts - 1)) { + // This optimization does not work if the target type is not a power of + // two, this can happen in some backends that support uneven vector types. + // We also need to make sure that the vector can be split into two. + if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 || + SrcNumElts % DstNumElts != 0) + return false; + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2; + createUnmergeValue(MI, TargetReg, DstReg, Current, 0, SrcNumElts); + MI.eraseFromParent(); + return true; + } } // After this point, it is assumed our shufflevectors work on vectors that can diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir index 5089018c5a9c..1c2355203761 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir @@ -557,6 +557,86 @@ body: | PseudoRET implicit $lr, implicit %0 ... +--- +name: extract_vector_256_1024_q1 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_256_1024_q1 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>) + ; CHECK-NEXT: $wl0 = COPY [[UV3]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0 + %1:_(<32 x s32>) = COPY $y2 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15) + $wl0 = COPY %0:_(<8 x s32>) + PseudoRET implicit $lr, implicit $wl0 +... + +--- +name: extract_vector_256_1024_q2 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_256_1024_q2 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV1]](<16 x s32>) + ; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0 + %1:_(<32 x s32>) = COPY $y2 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(16, 17, 18, 19, 20, 21, 22, 23) + $wl0 = COPY %0:_(<8 x s32>) + PseudoRET implicit $lr, implicit $wl0 +... + +--- +name: extract_vector_256_1024_q3 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_256_1024_q3 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV1]](<16 x s32>) + ; CHECK-NEXT: $wl0 = COPY [[UV3]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0 + %1:_(<32 x s32>) = COPY $y2 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(24, 25, 26, 27, 28, 29, 30, 31) + $wl0 = COPY %0:_(<8 x s32>) + PseudoRET implicit $lr, implicit $wl0 +... + +--- +name: extract_vector_256_1024_q4 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_256_1024_q4 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>) + ; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0 + %1:_(<32 x s32>) = COPY $y2 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7) + $wl0 = COPY %0:_(<8 x s32>) + PseudoRET implicit $lr, implicit $wl0 +... + --- name: insert_vector_16_elements legalized: false