Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit de96091

Browse files
committedFeb 5, 2025·
[AIE2P] Implemented VLDB.UNPACK combine
1 parent 9c1a780 commit de96091

14 files changed

+3270
-16
lines changed
 

‎llvm/lib/Target/AIE/AIE2InstructionSelector.cpp

+12-3
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ class AIE2InstructionSelector : public AIEBaseInstructionSelector {
148148
std::optional<APInt> Immediate, bool IsSigned,
149149
bool Is32Lanes);
150150
bool canCombinePACK(MachineInstr &MemOp, MachineInstr &CombOp);
151+
bool canCombineUNPACKLoad(MachineInstr &MemOp, MachineInstr &CombOp,
152+
MachineRegisterInfo &MRI);
153+
std::optional<LoadStoreOpcodes> getCombinedOpcodeUNPACKLoad(
154+
const MachineInstr &MemOp, const MachineInstr &CombOp,
155+
std::optional<APInt> Immediate, MachineRegisterInfo &MRI);
151156

152157
// const AIE2TargetMachine &TM;
153158
const AIE2InstrInfo &TII;
@@ -819,7 +824,8 @@ bool AIE2InstructionSelector::selectVPACK(MachineInstr &I,
819824
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
820825
}
821826

822-
std::optional<LoadStoreOpcodes> getCombinedOpcodeUNPACKLoad(
827+
std::optional<LoadStoreOpcodes>
828+
AIE2InstructionSelector::getCombinedOpcodeUNPACKLoad(
823829
const MachineInstr &MemOp, const MachineInstr &CombOp,
824830
std::optional<APInt> Immediate, MachineRegisterInfo &MRI) {
825831

@@ -910,8 +916,9 @@ std::optional<LoadStoreOpcodes> getCombinedOpcodeUNPACKLoad(
910916
return {};
911917
}
912918

913-
bool canCombineUNPACKLoad(MachineInstr &MemOp, MachineInstr &CombOp,
914-
MachineRegisterInfo &MRI) {
919+
bool AIE2InstructionSelector::canCombineUNPACKLoad(MachineInstr &MemOp,
920+
MachineInstr &CombOp,
921+
MachineRegisterInfo &MRI) {
915922
const std::optional<APInt> NoImmediate = {};
916923
return getCombinedOpcodeUNPACKLoad(MemOp, CombOp, NoImmediate, MRI)
917924
.has_value();
@@ -950,6 +957,8 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK(
950957
std::optional<LoadStoreOpcodes> LSO = getCombinedOpcodeUNPACKLoad(
951958
AMI->MemI, UNPACKI, AMI->ImmediateOffset, MRI);
952959

960+
assert(LSO && "Unexpected VLDB.UNPACK combine failure");
961+
953962
Register DstReg = UNPACKI.getOperand(0).getReg();
954963
Register SignReg = UNPACKI.getOperand(3).getReg();
955964

‎llvm/lib/Target/AIE/AIEBaseInstructionSelector.cpp

+8-9
Original file line numberDiff line numberDiff line change
@@ -714,14 +714,6 @@ bool AIEBaseInstructionSelector::selectG_AIE_LOAD_CONV(
714714
NewInstr.cloneMemRefs(AMI->MemI);
715715

716716
CONVI.eraseFromParent();
717-
718-
// Erasing the load instruction breaks later on in the selection code. That is
719-
// because an iterator is kept on erased instructions. This breaks while
720-
// trying to eliminate a trivially dead instruction which requires access to
721-
// its memory operands which have been erased, thus leading to a seg fault. To
722-
// remedy this, we keep the load to be removed by the trivial dead code
723-
// elimination and we make sure to assign new virtual register definitions to
724-
// its live operands to respect SSA.
725717
makeDeadMI(*LoadOp, MRI);
726718

727719
return constrainSelectedInstRegOperands(*NewInstr.getInstr(), TII, TRI, RBI);
@@ -750,7 +742,14 @@ AIEBaseInstructionSelector::getCombinedOpcodeCONVLoad(
750742
}
751743

752744
// Make an instruction trivially dead by creating and distributing new virtual
753-
// registers to its defs
745+
// registers to its defs.
746+
// Erasing the load instruction breaks later on in the selection code. That is
747+
// because we keep an iterator on erased instructions. This breaks while
748+
// trying to eliminate a trivially dead instruction which requires access to
749+
// its memory operands which have been erased, thus leading to a seg fault. To
750+
// remedy this, we keep the load to be removed by the trivial dead code
751+
// elimination and we make sure to assign a new virtual register definition to
752+
// its live operands to respect SSA.
754753
void AIEBaseInstructionSelector::makeDeadMI(MachineInstr &MI,
755754
MachineRegisterInfo &MRI) {
756755
if (MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) {

‎llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp

+313-4
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-indexed-vldb_unpack.mir

+673
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-indexed-vst_pack.mir

+25
Original file line numberDiff line numberDiff line change
@@ -454,3 +454,28 @@ body: |
454454
%11:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32)
455455
G_AIE_OFFSET_STORE %11:vregbank(<128 x s4>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<128 x s4>))
456456
...
457+
458+
---
459+
name: VST_PACK_D8_D16_COPY_BITCAST
460+
alignment: 16
461+
legalized: true
462+
regBankSelected: true
463+
body: |
464+
bb.1.entry:
465+
liveins: $p0, $r0, $x0
466+
; CHECK-LABEL: name: VST_PACK_D8_D16_COPY_BITCAST
467+
; CHECK: liveins: $p0, $r0, $x0
468+
; CHECK-NEXT: {{ $}}
469+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0
470+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0
471+
; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1
472+
; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 96, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<8 x s32>))
473+
%0:vregbank(<32 x s16>) = COPY $x0
474+
%2:ptrregbank(p0) = COPY $p0
475+
%6:gprregbank(s32) = G_CONSTANT i32 0
476+
%5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32)
477+
%3:modregbank(s20) = G_CONSTANT i20 96
478+
%100:vregbank(<8 x s32>) = G_BITCAST %5(<32 x s8>)
479+
%110:vregbank(<8 x s32>) = COPY %100(<8 x s32>)
480+
G_AIE_OFFSET_STORE %110:vregbank(<8 x s32>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<8 x s32>))
481+
...

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-indexed-vst_srs.mir

+27
Original file line numberDiff line numberDiff line change
@@ -1526,3 +1526,30 @@ body: |
15261526
G_AIE_OFFSET_STORE %107, %0, %11 :: (store (<32 x s16>) into stack - 64)
15271527
G_AIE_OFFSET_STORE %108, %0, %12 :: (store (<32 x s16>) into stack - 64)
15281528
...
1529+
1530+
---
1531+
name: VST_SRS_2x_v16_acc32_signed_BITCAST
1532+
alignment: 16
1533+
legalized: true
1534+
regBankSelected: true
1535+
body: |
1536+
bb.1.entry:
1537+
liveins: $m0, $p0, $r1
1538+
; CHECK-LABEL: name: VST_SRS_2x_v16_acc32_signed_BITCAST
1539+
; CHECK: liveins: $m0, $p0, $r1
1540+
; CHECK-NEXT: {{ $}}
1541+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
1542+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY $m0
1543+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1
1544+
; CHECK-NEXT: $crsrsmode = MOV_scalar_imm11_pseudo 0
1545+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:es = COPY [[COPY2]]
1546+
; CHECK-NEXT: VST_SRS_4x_dm_sts_srs_cm_idx_srsSign0 %5:acc1024, [[COPY3]], [[COPY]], [[COPY1]], implicit-def $srsrs_of, implicit $crrnd, implicit $crsrsmode, implicit $crsat, implicit $srssign0 :: (store (<8 x s32>))
1547+
%0:ptrregbank(p0) = COPY $p0
1548+
%7:modregbank(s20) = COPY $m0
1549+
%101:gprregbank(s32) = COPY $r1
1550+
%102:gprregbank(s32) = G_CONSTANT i32 0
1551+
%103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.I256.v32.acc32.srs), %100:accregbank(<16 x s64>), %101:gprregbank(s32), %102:gprregbank(s32)
1552+
%144:vregbank(<8 x s32>) = G_BITCAST %103(<32 x s8>)
1553+
%201:vregbank(<8 x s32>) = COPY %144(<8 x s32>)
1554+
G_AIE_OFFSET_STORE %201(<8 x s32>), %0, %7 :: (store (<8 x s32>))
1555+
...
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
9+
# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s
10+
11+
12+
# This tests that we don't combine if one of the load's defs (data) is used after the VUNPACK instruction (no single use)
13+
14+
---
15+
name: VLD_UNPACK_use_after
16+
alignment: 16
17+
legalized: true
18+
regBankSelected: true
19+
body: |
20+
bb.1.entry:
21+
liveins: $p0
22+
; CHECK-LABEL: name: VLD_UNPACK_use_after
23+
; CHECK: liveins: $p0
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
26+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
27+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
28+
; CHECK-NEXT: [[VLDA_dmw_lda_w_pstm_nrm:%[0-9]+]]:vec256, [[VLDA_dmw_lda_w_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmw_lda_w_pstm_nrm [[COPY]], [[COPY2]] :: (load (<32 x s8>))
29+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
30+
; CHECK-NEXT: [[VUNPACK_mv_unpack_w_unpackSign1_:%[0-9]+]]:vec512 = VUNPACK_mv_unpack_w_unpackSign1 [[VLDA_dmw_lda_w_pstm_nrm]], implicit $crunpacksize, implicit $unpacksign1
31+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_mv_unpack_w_unpackSign1_]], implicit [[VLDA_dmw_lda_w_pstm_nrm]]
32+
%0:ptrregbank(p0) = COPY $p0
33+
%1:gprregbank(s32) = COPY $r0
34+
%7:modregbank(s20) = G_TRUNC %1
35+
%102:gprregbank(s32) = G_CONSTANT i32 1
36+
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
37+
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I8.I4), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
38+
%12:vregbank(<32 x s8>) = COPY %25
39+
PseudoRET implicit $lr, implicit %103, implicit %12
40+
...
41+
42+
# This tests that we don't combine if one of the load's defs (data) is used before the VUNPACK instruction (no single use)
43+
44+
---
45+
name: VLD_UNPACK_use_between
46+
alignment: 16
47+
legalized: true
48+
regBankSelected: true
49+
body: |
50+
bb.1.entry:
51+
liveins: $p0
52+
; CHECK-LABEL: name: VLD_UNPACK_use_between
53+
; CHECK: liveins: $p0
54+
; CHECK-NEXT: {{ $}}
55+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
56+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
57+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
58+
; CHECK-NEXT: [[VLDA_dmw_lda_w_pstm_nrm:%[0-9]+]]:vec256, [[VLDA_dmw_lda_w_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmw_lda_w_pstm_nrm [[COPY]], [[COPY2]] :: (load (<32 x s8>))
59+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
60+
; CHECK-NEXT: [[VUNPACK_mv_unpack_w_unpackSign1_:%[0-9]+]]:vec512 = VUNPACK_mv_unpack_w_unpackSign1 [[VLDA_dmw_lda_w_pstm_nrm]], implicit $crunpacksize, implicit $unpacksign1
61+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_mv_unpack_w_unpackSign1_]], implicit [[VLDA_dmw_lda_w_pstm_nrm]]
62+
%0:ptrregbank(p0) = COPY $p0
63+
%1:gprregbank(s32) = COPY $r0
64+
%7:modregbank(s20) = G_TRUNC %1
65+
%102:gprregbank(s32) = G_CONSTANT i32 1
66+
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
67+
%12:vregbank(<32 x s8>) = COPY %25
68+
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I8.I4), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
69+
PseudoRET implicit $lr, implicit %103, implicit %12
70+
...
71+
72+
---
73+
name: VLD_UNPACK_side_effects_between
74+
alignment: 16
75+
legalized: true
76+
regBankSelected: true
77+
body: |
78+
bb.1.entry:
79+
liveins: $p0
80+
; CHECK-LABEL: name: VLD_UNPACK_side_effects_between
81+
; CHECK: liveins: $p0
82+
; CHECK-NEXT: {{ $}}
83+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
84+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
85+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]]
86+
; CHECK-NEXT: [[VLDA_dmw_lda_w_pstm_nrm:%[0-9]+]]:vec256, [[VLDA_dmw_lda_w_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmw_lda_w_pstm_nrm [[COPY]], [[COPY2]] :: (load (<32 x s8>))
87+
; CHECK-NEXT: $srssign1 = COPY [[COPY1]]
88+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
89+
; CHECK-NEXT: [[VUNPACK_mv_unpack_w_unpackSign1_:%[0-9]+]]:vec512 = VUNPACK_mv_unpack_w_unpackSign1 [[VLDA_dmw_lda_w_pstm_nrm]], implicit $crunpacksize, implicit $unpacksign1
90+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VUNPACK_mv_unpack_w_unpackSign1_]]
91+
%0:ptrregbank(p0) = COPY $p0
92+
%1:gprregbank(s32) = COPY $r0
93+
%7:modregbank(s20) = G_TRUNC %1
94+
%102:gprregbank(s32) = G_CONSTANT i32 1
95+
%25:vregbank(<32 x s8>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s8>))
96+
%110:gprregbank(s32) = G_CONSTANT i32 11
97+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.set.ctrl.reg), %110:gprregbank(s32), %1:gprregbank(s32)
98+
%103:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I8.I4), %25:vregbank(<32 x s8>), %102:gprregbank(s32)
99+
PseudoRET implicit $lr, implicit %103
100+
...

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-2d-vldb_unpack.mir

+393
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-3d-vldb_unpack.mir

+477
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-vldb_unpack.mir

+761
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vlda_conv.mir

+44
Original file line numberDiff line numberDiff line change
@@ -620,3 +620,47 @@ body: |
620620
%3:accregbank(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.v32bf16.to.v32accfloat), %2:vregbank(<32 x s16>)
621621
PseudoRET implicit $lr, implicit %3
622622
...
623+
624+
---
625+
name: VLDA_CONV_COPY
626+
legalized: true
627+
regBankSelected: true
628+
tracksRegLiveness: true
629+
body: |
630+
bb.0:
631+
liveins: $p0, $r0
632+
; CHECK-LABEL: name: VLDA_CONV_COPY
633+
; CHECK: liveins: $p0, $r0
634+
; CHECK-NEXT: {{ $}}
635+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
636+
; CHECK-NEXT: [[VLDA_CONV_fp32_bf16_dmw_lda_ups_bf_idx_imm:%[0-9]+]]:acc512 = VLDA_CONV_fp32_bf16_dmw_lda_ups_bf_idx_imm [[COPY]], 0 :: (load (<16 x s16>))
637+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_CONV_fp32_bf16_dmw_lda_ups_bf_idx_imm]]
638+
%0:ptrregbank(p0) = COPY $p0
639+
%1:vregbank(<16 x s16>) = G_LOAD %0:ptrregbank(p0) :: (load (<16 x s16>))
640+
%2:vregbank(<16 x s16>) = COPY %1
641+
%3:accregbank(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.v16bf16.to.v16accfloat), %2:vregbank(<16 x s16>)
642+
PseudoRET implicit $lr, implicit %3
643+
...
644+
645+
---
646+
name: VLDA_CONV_COPY_no_single_use
647+
legalized: true
648+
regBankSelected: true
649+
tracksRegLiveness: true
650+
body: |
651+
bb.0:
652+
liveins: $p0, $r0
653+
; CHECK-LABEL: name: VLDA_CONV_COPY_no_single_use
654+
; CHECK: liveins: $p0, $r0
655+
; CHECK-NEXT: {{ $}}
656+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
657+
; CHECK-NEXT: [[VLDA_dmw_lda_w_idx_imm:%[0-9]+]]:vec256 = VLDA_dmw_lda_w_idx_imm [[COPY]], 0 :: (load (<16 x s16>))
658+
; CHECK-NEXT: [[VCONV_fp32_bf16_mv_ups_wbf:%[0-9]+]]:acc512 = VCONV_fp32_bf16_mv_ups_wbf [[VLDA_dmw_lda_w_idx_imm]]
659+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VCONV_fp32_bf16_mv_ups_wbf]], implicit [[VLDA_dmw_lda_w_idx_imm]]
660+
%0:ptrregbank(p0) = COPY $p0
661+
%1:vregbank(<16 x s16>) = G_LOAD %0:ptrregbank(p0) :: (load (<16 x s16>))
662+
%2:vregbank(<16 x s16>) = COPY %1
663+
%3:accregbank(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.v16bf16.to.v16accfloat), %2:vregbank(<16 x s16>)
664+
PseudoRET implicit $lr, implicit %3, implicit %2
665+
...
666+

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vlda_ups.mir

+26
Original file line numberDiff line numberDiff line change
@@ -630,3 +630,29 @@ body: |
630630
%7:accregbank(<32 x s64>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.acc32.v64.I512.ups), %5:vregbank(<16 x s32>), %2:gprregbank(s32), %6:gprregbank(s32)
631631
PseudoRET implicit $lr, implicit %7
632632
...
633+
634+
---
635+
name: VLDA_UPS_2x_acc32_v16_COPY
636+
alignment: 16
637+
legalized: true
638+
regBankSelected: true
639+
body: |
640+
bb.1.entry:
641+
liveins: $p0, $r0
642+
; CHECK-LABEL: name: VLDA_UPS_2x_acc32_v16_COPY
643+
; CHECK: liveins: $p0, $r0
644+
; CHECK-NEXT: {{ $}}
645+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
646+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
647+
; CHECK-NEXT: $crupsmode = MOV_scalar_imm11_pseudo 0
648+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:es = COPY [[COPY1]]
649+
; CHECK-NEXT: [[VLDA_UPS_2x_dmw_lda_ups_w2b_idx_imm_upsSign0_:%[0-9]+]]:acc512 = VLDA_UPS_2x_dmw_lda_ups_w2b_idx_imm_upsSign0 [[COPY2]], [[COPY]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 :: (load (<16 x s16>))
650+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_UPS_2x_dmw_lda_ups_w2b_idx_imm_upsSign0_]]
651+
%1:ptrregbank(p0) = COPY $p0
652+
%2:gprregbank(s32) = COPY $r0
653+
%6:gprregbank(s32) = G_CONSTANT i32 0
654+
%5:vregbank(<16 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<16 x s16>))
655+
%8:vregbank(<16 x s16>) = COPY %5
656+
%7:accregbank(<8 x s64>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.acc32.v16.I256.ups), %8:vregbank(<16 x s16>), %2:gprregbank(s32), %6:gprregbank(s32)
657+
PseudoRET implicit $lr, implicit %7
658+
...
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
9+
# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s
10+
11+
---
12+
name: VLDB_UNPACK_I512_I8_I4_signed
13+
alignment: 16
14+
legalized: true
15+
regBankSelected: true
16+
body: |
17+
bb.1.entry:
18+
liveins: $p0
19+
; CHECK-LABEL: name: VLDB_UNPACK_I512_I8_I4_signed
20+
; CHECK: liveins: $p0
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
23+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
24+
; CHECK-NEXT: [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign1_:%[0-9]+]]:vec512 = VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign1 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign1 :: (load (<32 x s8>))
25+
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign1_]]
26+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
27+
%1:ptrregbank(p0) = COPY $p0
28+
%6:gprregbank(s32) = G_CONSTANT i32 1
29+
%5:vregbank(<32 x s8>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s8>))
30+
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I8.I4), %5:vregbank(<32 x s8>), %6:gprregbank(s32)
31+
$x0 = COPY %7:vregbank(<32 x s16>)
32+
PseudoRET implicit $lr, implicit $x0
33+
...
34+
35+
---
36+
name: VLDB_UNPACK_I512_I16_I8_signed
37+
alignment: 16
38+
legalized: true
39+
regBankSelected: true
40+
body: |
41+
bb.1.entry:
42+
liveins: $p0
43+
; CHECK-LABEL: name: VLDB_UNPACK_I512_I16_I8_signed
44+
; CHECK: liveins: $p0
45+
; CHECK-NEXT: {{ $}}
46+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
47+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 1
48+
; CHECK-NEXT: [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign1_:%[0-9]+]]:vec512 = VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign1 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign1 :: (load (<32 x s8>))
49+
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign1_]]
50+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
51+
%1:ptrregbank(p0) = COPY $p0
52+
%6:gprregbank(s32) = G_CONSTANT i32 1
53+
%5:vregbank(<32 x s8>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s8>))
54+
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I16.I8), %5:vregbank(<32 x s8>), %6:gprregbank(s32)
55+
$x0 = COPY %7:vregbank(<32 x s16>)
56+
PseudoRET implicit $lr, implicit $x0
57+
...
58+
59+
---
60+
name: VLDB_UNPACK_I1024_I8_I4_signed
61+
alignment: 16
62+
legalized: true
63+
regBankSelected: true
64+
body: |
65+
bb.1.entry:
66+
liveins: $p0
67+
; CHECK-LABEL: name: VLDB_UNPACK_I1024_I8_I4_signed
68+
; CHECK: liveins: $p0
69+
; CHECK-NEXT: {{ $}}
70+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
71+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
72+
; CHECK-NEXT: [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign1_:%[0-9]+]]:vec1024 = VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign1 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign1 :: (load (<32 x s16>))
73+
; CHECK-NEXT: $y0 = COPY [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign1_]]
74+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $y0
75+
%1:ptrregbank(p0) = COPY $p0
76+
%6:gprregbank(s32) = G_CONSTANT i32 1
77+
%5:vregbank(<32 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s16>))
78+
%7:vregbank(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I1024.I8.I4), %5:vregbank(<32 x s16>), %6:gprregbank(s32)
79+
$y0 = COPY %7:vregbank(<32 x s32>)
80+
PseudoRET implicit $lr, implicit $y0
81+
...
82+
83+
---
84+
name: VLDB_UNPACK_I1024_I16_I8_signed
85+
alignment: 16
86+
legalized: true
87+
regBankSelected: true
88+
body: |
89+
bb.1.entry:
90+
liveins: $p0
91+
; CHECK-LABEL: name: VLDB_UNPACK_I1024_I16_I8_signed
92+
; CHECK: liveins: $p0
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
95+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 1
96+
; CHECK-NEXT: [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign1_:%[0-9]+]]:vec1024 = VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign1 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign1 :: (load (<32 x s16>))
97+
; CHECK-NEXT: $y0 = COPY [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign1_]]
98+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $y0
99+
%1:ptrregbank(p0) = COPY $p0
100+
%6:gprregbank(s32) = G_CONSTANT i32 1
101+
%5:vregbank(<32 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s16>))
102+
%7:vregbank(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I1024.I16.I8), %5:vregbank(<32 x s16>), %6:gprregbank(s32)
103+
$y0 = COPY %7:vregbank(<32 x s32>)
104+
PseudoRET implicit $lr, implicit $y0
105+
...
106+
107+
---
108+
name: VLDB_UNPACK_I512_I8_I4_unsigned
109+
alignment: 16
110+
legalized: true
111+
regBankSelected: true
112+
body: |
113+
bb.1.entry:
114+
liveins: $p0
115+
; CHECK-LABEL: name: VLDB_UNPACK_I512_I8_I4_unsigned
116+
; CHECK: liveins: $p0
117+
; CHECK-NEXT: {{ $}}
118+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
119+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
120+
; CHECK-NEXT: [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec512 = VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s8>))
121+
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_]]
122+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
123+
%1:ptrregbank(p0) = COPY $p0
124+
%6:gprregbank(s32) = G_CONSTANT i32 0
125+
%5:vregbank(<32 x s8>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s8>))
126+
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I8.I4), %5:vregbank(<32 x s8>), %6:gprregbank(s32)
127+
$x0 = COPY %7:vregbank(<32 x s16>)
128+
PseudoRET implicit $lr, implicit $x0
129+
...
130+
131+
---
132+
name: VLDB_UNPACK_I512_I16_I8_unsigned
133+
alignment: 16
134+
legalized: true
135+
regBankSelected: true
136+
body: |
137+
bb.1.entry:
138+
liveins: $p0
139+
; CHECK-LABEL: name: VLDB_UNPACK_I512_I16_I8_unsigned
140+
; CHECK: liveins: $p0
141+
; CHECK-NEXT: {{ $}}
142+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
143+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 1
144+
; CHECK-NEXT: [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec512 = VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s8>))
145+
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_]]
146+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
147+
%1:ptrregbank(p0) = COPY $p0
148+
%6:gprregbank(s32) = G_CONSTANT i32 0
149+
%5:vregbank(<32 x s8>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s8>))
150+
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I16.I8), %5:vregbank(<32 x s8>), %6:gprregbank(s32)
151+
$x0 = COPY %7:vregbank(<32 x s16>)
152+
PseudoRET implicit $lr, implicit $x0
153+
...
154+
155+
---
156+
name: VLDB_UNPACK_I1024_I8_I4_unsigned
157+
alignment: 16
158+
legalized: true
159+
regBankSelected: true
160+
body: |
161+
bb.1.entry:
162+
liveins: $p0
163+
; CHECK-LABEL: name: VLDB_UNPACK_I1024_I8_I4_unsigned
164+
; CHECK: liveins: $p0
165+
; CHECK-NEXT: {{ $}}
166+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
167+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
168+
; CHECK-NEXT: [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec1024 = VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s16>))
169+
; CHECK-NEXT: $y0 = COPY [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_]]
170+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $y0
171+
%1:ptrregbank(p0) = COPY $p0
172+
%6:gprregbank(s32) = G_CONSTANT i32 0
173+
%5:vregbank(<32 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s16>))
174+
%7:vregbank(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I1024.I8.I4), %5:vregbank(<32 x s16>), %6:gprregbank(s32)
175+
$y0 = COPY %7:vregbank(<32 x s32>)
176+
PseudoRET implicit $lr, implicit $y0
177+
...
178+
179+
---
180+
name: VLDB_UNPACK_I1024_I16_I8_unsigned
181+
alignment: 16
182+
legalized: true
183+
regBankSelected: true
184+
body: |
185+
bb.1.entry:
186+
liveins: $p0
187+
; CHECK-LABEL: name: VLDB_UNPACK_I1024_I16_I8_unsigned
188+
; CHECK: liveins: $p0
189+
; CHECK-NEXT: {{ $}}
190+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
191+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 1
192+
; CHECK-NEXT: [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec1024 = VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s16>))
193+
; CHECK-NEXT: $y0 = COPY [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_]]
194+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $y0
195+
%1:ptrregbank(p0) = COPY $p0
196+
%6:gprregbank(s32) = G_CONSTANT i32 0
197+
%5:vregbank(<32 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s16>))
198+
%7:vregbank(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I1024.I16.I8), %5:vregbank(<32 x s16>), %6:gprregbank(s32)
199+
$y0 = COPY %7:vregbank(<32 x s32>)
200+
PseudoRET implicit $lr, implicit $y0
201+
...
202+
203+
---
204+
name: VLDB_UNPACK_I512_I8_I4_dynamic
205+
alignment: 16
206+
legalized: true
207+
regBankSelected: true
208+
body: |
209+
bb.1.entry:
210+
liveins: $p0
211+
; CHECK-LABEL: name: VLDB_UNPACK_I512_I8_I4_dynamic
212+
; CHECK: liveins: $p0
213+
; CHECK-NEXT: {{ $}}
214+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
215+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
216+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
217+
; CHECK-NEXT: $unpacksign0 = COPY [[COPY1]]
218+
; CHECK-NEXT: [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec512 = VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s8>))
219+
; CHECK-NEXT: $unpacksign0 = MOV_scalar_imm11_pseudo 0
220+
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_]]
221+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
222+
%1:ptrregbank(p0) = COPY $p0
223+
%6:gprregbank(s32) = COPY $r0
224+
%5:vregbank(<32 x s8>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s8>))
225+
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I8.I4), %5:vregbank(<32 x s8>), %6:gprregbank(s32)
226+
$x0 = COPY %7:vregbank(<32 x s16>)
227+
PseudoRET implicit $lr, implicit $x0
228+
...
229+
230+
---
231+
name: VLDB_UNPACK_I512_I16_I8_dynamic
232+
alignment: 16
233+
legalized: true
234+
regBankSelected: true
235+
body: |
236+
bb.1.entry:
237+
liveins: $p0
238+
; CHECK-LABEL: name: VLDB_UNPACK_I512_I16_I8_dynamic
239+
; CHECK: liveins: $p0
240+
; CHECK-NEXT: {{ $}}
241+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
242+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
243+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 1
244+
; CHECK-NEXT: $unpacksign0 = COPY [[COPY1]]
245+
; CHECK-NEXT: [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec512 = VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s8>))
246+
; CHECK-NEXT: $unpacksign0 = MOV_scalar_imm11_pseudo 0
247+
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_dmw_ldb_unpack_idx_imm_unpackSign0_]]
248+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
249+
%1:ptrregbank(p0) = COPY $p0
250+
%6:gprregbank(s32) = COPY $r0
251+
%5:vregbank(<32 x s8>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s8>))
252+
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I16.I8), %5:vregbank(<32 x s8>), %6:gprregbank(s32)
253+
$x0 = COPY %7:vregbank(<32 x s16>)
254+
PseudoRET implicit $lr, implicit $x0
255+
...
256+
257+
---
258+
name: VLDB_UNPACK_I1024_I8_I4_dynamic
259+
alignment: 16
260+
legalized: true
261+
regBankSelected: true
262+
body: |
263+
bb.1.entry:
264+
liveins: $p0
265+
; CHECK-LABEL: name: VLDB_UNPACK_I1024_I8_I4_dynamic
266+
; CHECK: liveins: $p0
267+
; CHECK-NEXT: {{ $}}
268+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
269+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
270+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
271+
; CHECK-NEXT: $unpacksign0 = COPY [[COPY1]]
272+
; CHECK-NEXT: [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec1024 = VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s16>))
273+
; CHECK-NEXT: $unpacksign0 = MOV_scalar_imm11_pseudo 0
274+
; CHECK-NEXT: $y0 = COPY [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_]]
275+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $y0
276+
%1:ptrregbank(p0) = COPY $p0
277+
%6:gprregbank(s32) = COPY $r0
278+
%5:vregbank(<32 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s16>))
279+
%7:vregbank(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I1024.I8.I4), %5:vregbank(<32 x s16>), %6:gprregbank(s32)
280+
$y0 = COPY %7:vregbank(<32 x s32>)
281+
PseudoRET implicit $lr, implicit $y0
282+
...
283+
284+
---
285+
name: VLDB_UNPACK_I1024_I16_I8_dynamic
286+
alignment: 16
287+
legalized: true
288+
regBankSelected: true
289+
body: |
290+
bb.1.entry:
291+
liveins: $p0, $r0
292+
; CHECK-LABEL: name: VLDB_UNPACK_I1024_I16_I8_dynamic
293+
; CHECK: liveins: $p0, $r0
294+
; CHECK-NEXT: {{ $}}
295+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
296+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
297+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 1
298+
; CHECK-NEXT: $unpacksign0 = COPY [[COPY1]]
299+
; CHECK-NEXT: [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_:%[0-9]+]]:vec1024 = VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0 [[COPY]], 0, implicit $crunpacksize, implicit $unpacksign0 :: (load (<32 x s16>))
300+
; CHECK-NEXT: $unpacksign0 = MOV_scalar_imm11_pseudo 0
301+
; CHECK-NEXT: $y0 = COPY [[VLDB_UNPACK_dmx_ldb_unpack_idx_imm_unpackSign0_]]
302+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $y0
303+
%1:ptrregbank(p0) = COPY $p0
304+
%6:gprregbank(s32) = COPY $r0
305+
%5:vregbank(<32 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<32 x s16>))
306+
%7:vregbank(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I1024.I16.I8), %5:vregbank(<32 x s16>), %6:gprregbank(s32)
307+
$y0 = COPY %7:vregbank(<32 x s32>)
308+
PseudoRET implicit $lr, implicit $y0
309+
...
310+
311+
---
312+
name: VLDB_UNPACK_I512_I8_I4_COPY
313+
alignment: 16
314+
legalized: true
315+
regBankSelected: true
316+
body: |
317+
bb.1.entry:
318+
liveins: $p0, $m0
319+
; CHECK-LABEL: name: VLDB_UNPACK_I512_I8_I4_COPY
320+
; CHECK: liveins: $p0, $m0
321+
; CHECK-NEXT: {{ $}}
322+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
323+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY $m0
324+
; CHECK-NEXT: $crunpacksize = MOV_scalar_imm11_pseudo 0
325+
; CHECK-NEXT: [[VLDB_UNPACK_dmw_ldb_unpack_idx_unpackSign1_:%[0-9]+]]:vec512 = VLDB_UNPACK_dmw_ldb_unpack_idx_unpackSign1 [[COPY]], [[COPY1]], implicit $crunpacksize, implicit $unpacksign1 :: (load (<32 x s8>))
326+
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_dmw_ldb_unpack_idx_unpackSign1_]]
327+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
328+
%1:ptrregbank(p0) = COPY $p0
329+
%8:modregbank(s20) = COPY $m0
330+
%6:gprregbank(s32) = G_CONSTANT i32 1
331+
%5:vregbank(<32 x s8>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %8:modregbank(s20) :: (load (<32 x s8>))
332+
%9:vregbank(<32 x s8>) = COPY %5
333+
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2p.unpack.I512.I8.I4), %9:vregbank(<32 x s8>), %6:gprregbank(s32)
334+
$x0 = COPY %7:vregbank(<32 x s16>)
335+
PseudoRET implicit $lr, implicit $x0
336+
...
337+

‎llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vst_conv.mir

+74
Original file line numberDiff line numberDiff line change
@@ -509,3 +509,77 @@ body: |
509509
%21:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %0, %8 :: (store (<16 x s32>))
510510
PseudoRET implicit $lr
511511
...
512+
513+
---
514+
name: VST_CONV_COPY_BITCAST
515+
legalized: true
516+
regBankSelected: true
517+
tracksRegLiveness: true
518+
body: |
519+
bb.0:
520+
liveins: $p0, $bmll0
521+
; CHECK-LABEL: name: VST_CONV_COPY_BITCAST
522+
; CHECK: liveins: $p0, $bmll0
523+
; CHECK-NEXT: {{ $}}
524+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
525+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bmll0
526+
; CHECK-NEXT: VST_CONV_bf16_fp32_dmw_sts_srs_bf_idx_imm [[COPY1]], [[COPY]], 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd :: (store (<8 x s32>))
527+
; CHECK-NEXT: PseudoRET implicit $lr
528+
%0:ptrregbank(p0) = COPY $p0
529+
%100:accregbank(<8 x s64>) = COPY $bmll0
530+
%104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v16accfloat.to.v16bf16), %100:accregbank(<8 x s64>)
531+
%150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>)
532+
%200:vregbank(<8 x s32>) = COPY %150(<8 x s32>)
533+
G_STORE %200, %0 :: (store (<8 x s32>))
534+
PseudoRET implicit $lr
535+
...
536+
537+
---
538+
name: VST_CONV_COPY_BITCAST_no_single_use_bit_cast
539+
legalized: true
540+
regBankSelected: true
541+
tracksRegLiveness: true
542+
body: |
543+
bb.0:
544+
liveins: $p0, $bmll0
545+
; CHECK-LABEL: name: VST_CONV_COPY_BITCAST_no_single_use_bit_cast
546+
; CHECK: liveins: $p0, $bmll0
547+
; CHECK-NEXT: {{ $}}
548+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
549+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bmll0
550+
; CHECK-NEXT: [[VCONV_bf16_fp32_mv_w_srs_bf:%[0-9]+]]:vec256 = VCONV_bf16_fp32_mv_w_srs_bf [[COPY1]], implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crrnd
551+
; CHECK-NEXT: VST_dmw_sts_w_idx_imm [[VCONV_bf16_fp32_mv_w_srs_bf]], [[COPY]], 0 :: (store (<8 x s32>))
552+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VCONV_bf16_fp32_mv_w_srs_bf]]
553+
%0:ptrregbank(p0) = COPY $p0
554+
%100:accregbank(<16 x s32>) = COPY $bmll0
555+
%104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v16accfloat.to.v16bf16), %100:accregbank(<16 x s32>)
556+
%150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>)
557+
%200:vregbank(<8 x s32>) = COPY %150(<8 x s32>)
558+
G_STORE %200, %0 :: (store (<8 x s32>))
559+
PseudoRET implicit $lr, implicit %150
560+
...
561+
562+
---
563+
name: VST_CONV_COPY_BITCAST_no_single_use_copy
564+
legalized: true
565+
regBankSelected: true
566+
tracksRegLiveness: true
567+
body: |
568+
bb.0:
569+
liveins: $p0, $bmll0
570+
; CHECK-LABEL: name: VST_CONV_COPY_BITCAST_no_single_use_copy
571+
; CHECK: liveins: $p0, $bmll0
572+
; CHECK-NEXT: {{ $}}
573+
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
574+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bmll0
575+
; CHECK-NEXT: [[VCONV_bf16_fp32_mv_w_srs_bf:%[0-9]+]]:vec256 = VCONV_bf16_fp32_mv_w_srs_bf [[COPY1]], implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crrnd
576+
; CHECK-NEXT: VST_dmw_sts_w_idx_imm [[VCONV_bf16_fp32_mv_w_srs_bf]], [[COPY]], 0 :: (store (<8 x s32>))
577+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VCONV_bf16_fp32_mv_w_srs_bf]]
578+
%0:ptrregbank(p0) = COPY $p0
579+
%100:accregbank(<16 x s32>) = COPY $bmll0
580+
%104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v16accfloat.to.v16bf16), %100:accregbank(<16 x s32>)
581+
%150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>)
582+
%200:vregbank(<8 x s32>) = COPY %150(<8 x s32>)
583+
G_STORE %200, %0 :: (store (<8 x s32>))
584+
PseudoRET implicit $lr, implicit %200
585+
...

0 commit comments

Comments
 (0)
Please sign in to comment.