Skip to content

Commit

Permalink
[AIEX] Combine G_SHUFFLE_VECTOR to COPY
Browse files Browse the repository at this point in the history
  • Loading branch information
katerynamuts committed Feb 11, 2025
1 parent a6c19a6 commit 7fab9f6
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 11 deletions.
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIECombine.td
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ def combine_vector_shuffle_extract_subvec : GICombineRule<
[{ return matchShuffleToExtractSubvec(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def combine_vector_shuffle_to_copy : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR): $root,
[{ return matchShuffleToCopy(*${root}, MRI, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def AIE2PreLegalizerCombiner
: GICombiner<"AIE2PreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
all_combines, combine_S20NarrowingOpt,
Expand All @@ -99,6 +105,7 @@ def AIE2PreLegalizerCombiner

def AIE2PPreLegalizerCombiner
: GICombiner<"AIE2PPreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
combine_vector_shuffle_to_copy,
combine_vector_shuffle_extract_subvec,
all_combines, combine_S20NarrowingOpt,
combine_globalval_offset,
Expand Down
40 changes: 40 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2266,3 +2266,43 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
return false;
}

bool llvm::matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

const Register DstReg = MI.getOperand(0).getReg();
const Register Src1Reg = MI.getOperand(1).getReg();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

const LLT DstTy = MRI.getType(DstReg);
const LLT Src1Ty = MRI.getType(Src1Reg);
if (DstTy != Src1Ty)
return false;

const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
if (Mask.size() != NumSrcElems)
return false;

// If the mask has only -1 (undef), do nothing
auto allUndefs = [&NumSrcElems](const ArrayRef<int> &Mask) -> bool {
for (unsigned I = 0; I < NumSrcElems; ++I) {
if (Mask[I] != -1)
return false;
}
return true;
};

if (allUndefs(Mask))
return false;

// Check that the mask is sequential
for (unsigned I = 0; I < NumSrcElems; ++I) {
if (Mask[I] != -1 && Mask[I] != (int)I)
return false;
}

MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(DstReg, Src1Reg); };

return true;
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ bool matchShuffleToExtractSubvec(MachineInstr &MI, MachineRegisterInfo &MRI,
const AIEBaseInstrInfo &TII,
BuildFnTy &MatchInfo);

bool matchShuffleToCopy(MachineInstr &MI, MachineRegisterInfo &MRI,
BuildFnTy &MatchInfo);

} // namespace llvm

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -776,9 +776,7 @@ body: |
; CHECK: liveins: $dm0, $dm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s32>) = COPY $dm0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s32>) = COPY $dm1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<64 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<64 x s32>), [[COPY1]], shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<64 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<64 x s32>)
%1:_(<64 x s32>) = COPY $dm0
%2:_(<64 x s32>) = COPY $dm1
%0:_(<64 x s32>) = G_SHUFFLE_VECTOR %1(<64 x s32>), %2(<64 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)
Expand Down Expand Up @@ -907,24 +905,88 @@ body: |
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(4, 5, 6, 7, 8, 9, 10, 11)
PseudoRET implicit $lr, implicit %0
...
# Note: currently it is combined to G_AIE_VSEL but it should be combined to COPY
# which is not implemented yet.

# Combine G_SHUFFLE_VECTOR into COPY
---
name: shuffle_vector_to_copy_scalar
tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: shuffle_vector_to_copy_scalar
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[C]](s32)
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%0:_(s32) = G_SHUFFLE_VECTOR %1(s32), %2(s32), shufflemask(0)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_to_copy
name: shuffle_vector_to_copy_vec
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_to_copy
; CHECK-LABEL: name: shuffle_vector_to_copy_vec
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[COPY]], [[COPY1]], [[C]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<16 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<16 x s32>) = COPY $x1
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
PseudoRET implicit $lr, implicit %0
...
# Note: currently not commutable
---
name: shufflevector_to_copy_commutable
alignment: 4
tracksRegLiveness: true
body: |
bb.0:
liveins: $l0, $l1
; CHECK-LABEL: name: shufflevector_to_copy_commutable
; CHECK: liveins: $l0, $l1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $l0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $l1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(2, 3)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<2 x s32>)
%0:_(<2 x s32>) = COPY $l0
%1:_(<2 x s32>) = COPY $l1
%2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(2,3)
PseudoRET implicit $lr, implicit %2
...
---
name: shuffle_vector_to_copy_vec_with_undef
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_to_copy_vec_with_undef
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY]](<16 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<16 x s32>) = COPY $x1
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, -1, 4, 5, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_to_copy_vec_no_combine
tracksRegLiveness: true
body: |
bb.1:
liveins: $l0, $l1
; CHECK-LABEL: name: shuffle_vector_to_copy_vec_no_combine
; CHECK: liveins: $l0, $l1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $l0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $l1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(1, 2)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<2 x s32>)
%1:_(<2 x s32>) = COPY $l0
%2:_(<2 x s32>) = COPY $l1
%0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2(<2 x s32>), shufflemask(1, 2)
PseudoRET implicit $lr, implicit %0
...
44 changes: 44 additions & 0 deletions llvm/test/CodeGen/AIE/aie2p/shufflevec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -521,3 +521,47 @@ entry:
%shuffle = shufflevector <64 x i32> %a, <64 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <32 x i32> %shuffle
}

; Combine G_SHUFFLE_VECTOR into COPY. Note: shufflevector doesn't accept scalar arguments.
define <16 x i32> @shuffle_vector_to_copy_vec(<16 x i32> noundef %a, <16 x i32> noundef %b) {
; CHECK-LABEL: shuffle_vector_to_copy_vec:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: vmov x0, x2 // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i32> %shuffle
}

define <8 x i32> @shuffle_vector_to_copy_no_combine(<8 x i32> noundef %a, <8 x i32> noundef %b) {
; CHECK-LABEL: shuffle_vector_to_copy_no_combine:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopx ; vextract.32 r1, x2, #5, vaddsign1
; CHECK-NEXT: vextract.32 r2, x2, #6, vaddsign1
; CHECK-NEXT: vextract.32 r3, x2, #7, vaddsign1
; CHECK-NEXT: vextract.32 r4, x4, #0, vaddsign1
; CHECK-NEXT: vextract.32 r5, x4, #1, vaddsign1
; CHECK-NEXT: vextract.32 r6, x4, #2, vaddsign1
; CHECK-NEXT: vextract.32 r0, x2, #4, vaddsign1
; CHECK-NEXT: vextract.32 r7, x4, #3, vaddsign1
; CHECK-NEXT: vpush.hi.32 x0, x0, r0
; CHECK-NEXT: vpush.hi.32 x0, x0, r1
; CHECK-NEXT: vpush.hi.32 x0, x0, r2
; CHECK-NEXT: vpush.hi.32 x0, x0, r3
; CHECK-NEXT: vpush.hi.32 x0, x0, r4
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.hi.32 x0, x0, r5 // Delay Slot 5
; CHECK-NEXT: vpush.hi.32 x0, x0, r6 // Delay Slot 4
; CHECK-NEXT: vpush.hi.32 x0, x0, r7 // Delay Slot 3
; CHECK-NEXT: vmov wl0, wh0 // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i32> %shuffle
}

0 comments on commit 7fab9f6

Please sign in to comment.