diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 2b168f92d5df..4eb3a73aeac5 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -1791,13 +1791,13 @@ bool llvm::matchShuffleToVSel( const LLT DstTy = MRI.getType(DstReg); const LLT Src1Ty = MRI.getType(Src1Reg); - if (Src1Ty.getSizeInBits() != 512) + if (Src1Ty.getSizeInBits() != 512 || Src1Ty == LLT::scalar(64)) return false; const unsigned NumDstElems = DstTy.getNumElements(); const unsigned NumSrcElems = Src1Ty.getNumElements(); - assert(NumDstElems == NumSrcElems && - "Expected same number of elements in dst and src vector types"); + if (NumDstElems != NumSrcElems) + return false; // Check that the shuffle mask can be converted into VSel mask: // 1. The shuffle mask doesn't contain indices that correspond to the same @@ -1805,18 +1805,43 @@ bool llvm::matchShuffleToVSel( // the i-th element from Src2 is used. // 2. The mask indices modulo the number of elements are in strictly ascending // order. - int PrevIdx = Mask[0] % NumSrcElems; const size_t NumElems = Mask.size(); - for (unsigned I = 1; I < NumElems; I++) { - int CurrIdx = Mask[I] % NumSrcElems; + unsigned I = 0; + int PrevIdx = -1; + while (Mask[I] == -1 && I < NumElems) { + ++I; + ++PrevIdx; + } + + // The mask contains only -1 + if (I == NumElems) + return false; + + int CurrIdx = Mask[I] % NumSrcElems; + if (CurrIdx <= PrevIdx) + return false; + + PrevIdx = CurrIdx; + ++I; + for (; I < NumElems; ++I) { + if (Mask[I] == -1) { + ++PrevIdx; + continue; + } + + CurrIdx = Mask[I] % NumSrcElems; if (CurrIdx <= PrevIdx) return false; + PrevIdx = CurrIdx; } // Create the mask unsigned long long DstMask = 0; - for (unsigned I = 0; I < NumElems; I++) { + for (I = 0; I < NumElems; I++) { int Idx = Mask[I]; + if (Idx == -1) + continue; + if (Idx >= (int)NumSrcElems) { unsigned long long ElemMask = 1 << I; DstMask |= ElemMask; diff --git a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir index 844e900bdbb2..7f2129e63ae0 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/prelegalizercombiner-shuffle-vector.mir @@ -120,31 +120,86 @@ body: | PseudoRET implicit $lr, implicit %0 ... --- +name: shuffle_vector_all_undef +alignment: 16 +exposesReturnsTwice: false +legalized: false +body: | + bb.1.entry: + liveins: $x2 + + ; CHECK-LABEL: name: shuffle_vector_all_undef + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[DEF]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x2 + %8:_(<16 x s32>) = G_IMPLICIT_DEF + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1) + PseudoRET implicit $lr, implicit %0 +... +--- name: shuffle_vector_vsel_valid_mask_ordered_indices alignment: 16 exposesReturnsTwice: false legalized: false body: | bb.1.entry: - liveins: $r0, $wl4, $x2 + liveins: $x2 ; CHECK-LABEL: name: shuffle_vector_vsel_valid_mask_ordered_indices - ; CHECK: liveins: $r0, $wl4, $x2 + ; CHECK: liveins: $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 - ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>) - ; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[AIE_PAD_VECTOR_UNDEF]], [[COPY]], 65520 - ; CHECK-NEXT: $x0 = COPY [[AIE_VSEL]](<16 x s32>) - ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[DEF]], [[COPY]], 65520 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>) %1:_(<16 x s32>) = COPY $x2 - %4:_(<8 x s32>) = COPY $wl4 - %3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>) - %8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>) + %8:_(<16 x s32>) = G_IMPLICIT_DEF %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) - $x0 = COPY %0(<16 x s32>) - PseudoRET implicit $lr, implicit $x0 + PseudoRET implicit $lr, implicit %0 +... +--- +name: shuffle_vector_vsel_valid_mask_ordered_indices_with_undef +alignment: 16 +exposesReturnsTwice: false +legalized: false +body: | + bb.1.entry: + liveins: $x2 + + ; CHECK-LABEL: name: shuffle_vector_vsel_valid_mask_ordered_indices_with_undef + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[DEF]], [[COPY]], 65520 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x2 + %8:_(<16 x s32>) = G_IMPLICIT_DEF + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(-1, -1, -1, -1, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) + PseudoRET implicit $lr, implicit %0 +... +--- +name: shuffle_vector_vsel_valid_mask_with_undef +alignment: 16 +exposesReturnsTwice: false +legalized: false +body: | + bb.1.entry: + liveins: $x2 + + ; CHECK-LABEL: name: shuffle_vector_vsel_valid_mask_with_undef + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[DEF]], [[COPY]], 64992 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x2 + %8:_(<16 x s32>) = G_IMPLICIT_DEF + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(0, 1, 2, -1, -1, 21, 22, 23, 24, -1, 26, 27, 28, 29, 30, 31) + PseudoRET implicit $lr, implicit %0 ... --- name: shuffle_vector_vsel_valid_mask_mixed_indices @@ -153,25 +208,61 @@ exposesReturnsTwice: false legalized: false body: | bb.1.entry: - liveins: $r0, $wl4, $x2 + liveins: $x2 ; CHECK-LABEL: name: shuffle_vector_vsel_valid_mask_mixed_indices - ; CHECK: liveins: $r0, $wl4, $x2 + ; CHECK: liveins: $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 - ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>) - ; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[AIE_PAD_VECTOR_UNDEF]], [[COPY]], 65521 - ; CHECK-NEXT: $x0 = COPY [[AIE_VSEL]](<16 x s32>) - ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[DEF]], [[COPY]], 65521 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>) %1:_(<16 x s32>) = COPY $x2 - %4:_(<8 x s32>) = COPY $wl4 - %3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>) - %8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>) + %8:_(<16 x s32>) = G_IMPLICIT_DEF %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(16, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) - $x0 = COPY %0(<16 x s32>) - PseudoRET implicit $lr, implicit $x0 + PseudoRET implicit $lr, implicit %0 +... +--- +name: shuffle_vector_vsel_valid_mask_mixed_indices_with_undef +alignment: 16 +exposesReturnsTwice: false +legalized: false +body: | + bb.1.entry: + liveins: $x2 + + ; CHECK-LABEL: name: shuffle_vector_vsel_valid_mask_mixed_indices_with_undef + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[DEF]], [[COPY]], 64465 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x2 + %8:_(<16 x s32>) = G_IMPLICIT_DEF + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(16, 1, -1, 3, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, 30, 31) + PseudoRET implicit $lr, implicit %0 +... +--- +name: shuffle_vector_vsel_invalid_mask +alignment: 16 +exposesReturnsTwice: false +legalized: false +body: | + bb.1.entry: + liveins: $x2 + + ; CHECK-LABEL: name: shuffle_vector_vsel_invalid_mask + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<16 x s32>), [[COPY]], shufflemask(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x2 + %8:_(<16 x s32>) = G_IMPLICIT_DEF + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) + PseudoRET implicit $lr, implicit %0 ... --- name: shuffle_vector_vsel_invalid_mask_with_repeated_index @@ -180,25 +271,19 @@ exposesReturnsTwice: false legalized: false body: | bb.1.entry: - liveins: $r0, $wl4, $x2 + liveins: $x2 ; CHECK-LABEL: name: shuffle_vector_vsel_invalid_mask_with_repeated_index - ; CHECK: liveins: $r0, $wl4, $x2 + ; CHECK: liveins: $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 - ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>) - ; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[AIE_PAD_VECTOR_UNDEF]](<16 x s32>), [[COPY]], shufflemask(0, 1, 2, 3, 16, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) - ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) - ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<16 x s32>), [[COPY]], shufflemask(0, 1, 2, 3, 16, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<16 x s32>) %1:_(<16 x s32>) = COPY $x2 - %4:_(<8 x s32>) = COPY $wl4 - %3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>) - %8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>) + %8:_(<16 x s32>) = G_IMPLICIT_DEF %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(0, 1, 2, 3, 16, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) - $x0 = COPY %0(<16 x s32>) - PseudoRET implicit $lr, implicit $x0 + PseudoRET implicit $lr, implicit %0 ... --- name: shuffle_vector_vsel_invalid_mask_with_unordered_indices @@ -207,23 +292,38 @@ exposesReturnsTwice: false legalized: false body: | bb.1.entry: - liveins: $r0, $wl4, $x2 + liveins: $x2 ; CHECK-LABEL: name: shuffle_vector_vsel_invalid_mask_with_unordered_indices - ; CHECK: liveins: $r0, $wl4, $x2 + ; CHECK: liveins: $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 - ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>) - ; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[AIE_PAD_VECTOR_UNDEF]](<16 x s32>), [[COPY]], shufflemask(1, 0, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) - ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) - ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<16 x s32>), [[COPY]], shufflemask(1, 0, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<16 x s32>) %1:_(<16 x s32>) = COPY $x2 - %4:_(<8 x s32>) = COPY $wl4 - %3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>) - %8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>) + %8:_(<16 x s32>) = G_IMPLICIT_DEF %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(1, 0, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) - $x0 = COPY %0(<16 x s32>) - PseudoRET implicit $lr, implicit $x0 + PseudoRET implicit $lr, implicit %0 +... +--- +name: shuffle_vector_vsel_invalid_s64 +alignment: 16 +exposesReturnsTwice: false +legalized: false +body: | + bb.1.entry: + liveins: $r0, $wl4, $x2 + + ; CHECK-LABEL: name: shuffle_vector_vsel_invalid_s64 + ; CHECK: liveins: $r0, $wl4, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s64>) = COPY $x2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[DEF]](<8 x s64>), [[COPY]], shufflemask(8, 1, 2, 3, 11, 12, 13, 14) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<8 x s64>) + %1:_(<8 x s64>) = COPY $x2 + %8:_(<8 x s64>) = G_IMPLICIT_DEF + %0:_(<8 x s64>) = G_SHUFFLE_VECTOR %8(<8 x s64>), %1, shufflemask(8, 1, 2, 3, 11, 12, 13, 14) + PseudoRET implicit $lr, implicit %0 ...