diff --git a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp index 958e73f5c477..2ff6d009b31b 100644 --- a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp +++ b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// /// \file @@ -149,6 +149,11 @@ class AIE2InstructionSelector : public AIEBaseInstructionSelector { getCombinedOpcodeSRSUPS(const MachineInstr &MemOp, const MachineInstr &CombOp, std::optional Immediate, bool IsSigned); bool canCombineSRSUPS(MachineInstr &MemOp, MachineInstr &CombOp); + std::optional + getCombinedOpcodePACK(const MachineInstr &MemOp, const MachineInstr &CombOp, + std::optional Immediate, bool IsSigned, + bool Is32Lanes); + bool canCombinePACK(MachineInstr &MemOp, MachineInstr &CombOp); // const AIE2TargetMachine &TM; const AIE2InstrInfo &TII; @@ -3036,10 +3041,9 @@ LoadStoreOpcodes AIE2InstructionSelector::getLoadStoreOpcode( llvm_unreachable("Invalid combined instruction"); } -std::optional -getCombinedOpcodePACK(const MachineInstr &MemOp, const MachineInstr &CombOp, - std::optional Immediate, bool IsSigned, - bool Is32Lanes) { +std::optional AIE2InstructionSelector::getCombinedOpcodePACK( + const MachineInstr &MemOp, const MachineInstr &CombOp, + std::optional Immediate, bool IsSigned, bool Is32Lanes) { const bool AlwaysFitsImmediateRange = true; if (CombOp.getOpcode() != AIE2::G_INTRINSIC_W_SIDE_EFFECTS || @@ -3189,7 +3193,8 @@ getCombinedOpcodePACK(const MachineInstr &MemOp, const MachineInstr &CombOp, return {}; } -bool canCombinePACK(MachineInstr &MemOp, MachineInstr &CombOp) { +bool AIE2InstructionSelector::canCombinePACK(MachineInstr &MemOp, + MachineInstr &CombOp) { std::optional NoImmediate = {}; bool IsSigned = true; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp index 20e69e154fc7..0373e78c7d8e 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp @@ -103,6 +103,7 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector { bool select1024BitG_AIE_LOAD_STORE(MachineInstr &I, LoadStoreOpcodes &LSO, AddressingModeInfo &AMI, MachineRegisterInfo &MRI); + bool selectG_AIE_STORE_PACK(MachineInstr &StoreI, MachineRegisterInfo &MRI); bool selectSetI128(MachineInstr &I, MachineOperand &DstReg, MachineOperand &SrcReg, MachineRegisterInfo &MRI); bool selectExtractI128(MachineInstr &I, Register DstReg, Register SrcReg, @@ -130,6 +131,11 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector { std::optional Immediate, bool IsSigned); bool canCombineUPS(MachineInstr &LoadOp, MachineInstr &UPSI, MachineRegisterInfo &MRI); + std::optional + getCombinedOpcodePACK(const MachineInstr &MemOp, const MachineInstr &CombOp, + std::optional Immediate, bool IsSigned); + bool canCombinePACK(MachineInstr &MemOp, MachineInstr &CombOp, + MachineRegisterInfo &MRI); const AIE2PInstrInfo &TII; const AIE2PRegisterInfo &TRI; @@ -2377,8 +2383,8 @@ bool AIE2PInstructionSelector::selectG_AIE_LOAD_STORE( MachineInstr &I, MachineRegisterInfo &MRI) { // First try to match CONV, SRS and PACK combine - if (selectG_AIE_STORE_CONV(I, MRI) /*|| selectG_AIE_STORE_SRS(I, MRI) || - selectG_AIE_STORE_PACK(I, MRI)*/) + if (selectG_AIE_STORE_CONV(I, MRI) /*|| selectG_AIE_STORE_SRS(I, MRI)*/ || + selectG_AIE_STORE_PACK(I, MRI)) return true; std::optional AMI = getOrDefineAddressingRegister(I, MRI); @@ -2492,6 +2498,285 @@ AIE2PInstructionSelector::getCombinedOpcodeCONVLoad( return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, /*OffsetOpcode=*/{}}; } +std::optional AIE2PInstructionSelector::getCombinedOpcodePACK( + const MachineInstr &MemOp, const MachineInstr &CombOp, + std::optional Immediate, bool IsSigned) { + if (CombOp.getOpcode() != AIE2P::G_INTRINSIC_W_SIDE_EFFECTS) + return {}; + + auto CombOpIntrinsicID = cast(CombOp).getIntrinsicID(); + if (CombOpIntrinsicID != Intrinsic::aie2p_pack_I512_I8_I16 && + CombOpIntrinsicID != Intrinsic::aie2p_pack_I512_I4_I8 && + CombOpIntrinsicID != Intrinsic::aie2p_pack_I1024_I8_I16 && + CombOpIntrinsicID != Intrinsic::aie2p_pack_I1024_I4_I8) + return {}; + + assert((getLoadStoreSize(MemOp) == 256 || getLoadStoreSize(MemOp) == 512) && + "Unexpected VST.PACK size"); + + unsigned ISelOpcode; + const bool AlwaysFitsImmediateRange = true; + bool FitsImmediateRange = false; + const bool NoImmediate = false; + + if (IsSigned) { + switch (MemOp.getOpcode()) { + case AIE2P::G_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_PACK_dmw_sts_pack_idx_imm_packSign1, + AlwaysFitsImmediateRange, /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_PACK_dmx_sts_pack_idx_imm_packSign1, + AlwaysFitsImmediateRange, /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_OFFSET_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 32>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmw_sts_pack_idx_imm_packSign1 + : AIE2P::VST_PACK_dmw_sts_pack_idx_packSign1; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 64>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmx_sts_pack_idx_imm_packSign1 + : AIE2P::VST_PACK_dmx_sts_pack_idx_packSign1; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_POSTINC_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 32>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1 + : AIE2P::VST_PACK_dmw_sts_pack_pstm_nrm_packSign1; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 64>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1 + : AIE2P::VST_PACK_dmx_sts_pack_pstm_nrm_packSign1; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_POSTINC_2D_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_2D_PACK_dmw_sts_pack_packSign1, + NoImmediate, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_2D_PACK_dmx_sts_pack_packSign1, + NoImmediate, + /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_POSTINC_3D_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1, + NoImmediate, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1, + NoImmediate, + /*OffsetOpcode=*/{}}; + } + default: + return {}; + } + } else { /* !IsSigned */ + switch (MemOp.getOpcode()) { + case AIE2P::G_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_PACK_dmw_sts_pack_idx_imm_packSign0, + AlwaysFitsImmediateRange, /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_PACK_dmx_sts_pack_idx_imm_packSign0, + AlwaysFitsImmediateRange, /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_OFFSET_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 32>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmw_sts_pack_idx_imm_packSign0 + : AIE2P::VST_PACK_dmw_sts_pack_idx_packSign0; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 64>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmx_sts_pack_idx_imm_packSign0 + : AIE2P::VST_PACK_dmx_sts_pack_idx_packSign0; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_POSTINC_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 32>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 + : AIE2P::VST_PACK_dmw_sts_pack_pstm_nrm_packSign0; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + FitsImmediateRange = checkImmediateRange<4, 64>(Immediate); + ISelOpcode = FitsImmediateRange + ? AIE2P::VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 + : AIE2P::VST_PACK_dmx_sts_pack_pstm_nrm_packSign0; + return LoadStoreOpcodes{ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_POSTINC_2D_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_2D_PACK_dmw_sts_pack_packSign0, + NoImmediate, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_2D_PACK_dmx_sts_pack_packSign0, + NoImmediate, + /*OffsetOpcode=*/{}}; + } + case AIE2P::G_AIE_POSTINC_3D_STORE: + switch (CombOpIntrinsicID) { + case Intrinsic::aie2p_pack_I512_I8_I16: + case Intrinsic::aie2p_pack_I512_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0, + NoImmediate, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_pack_I1024_I8_I16: + case Intrinsic::aie2p_pack_I1024_I4_I8: + return LoadStoreOpcodes{ + /*ISelOpcode=*/AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0, + NoImmediate, + /*OffsetOpcode=*/{}}; + } + default: + return {}; + } + } + return {}; +} + +bool AIE2PInstructionSelector::canCombinePACK(MachineInstr &MemOp, + MachineInstr &CombOp, + MachineRegisterInfo &MRI) { + Register PackResult = (MemOp.uses().begin())->getReg(); + + if (MemOp.getParent() != CombOp.getParent() || !MRI.hasOneUse(PackResult)) + return false; + + std::optional NoImmediate = {}; + bool IsSigned = true; + + return getCombinedOpcodePACK(MemOp, CombOp, NoImmediate, IsSigned) + .has_value(); +} + +bool AIE2PInstructionSelector::selectG_AIE_STORE_PACK( + MachineInstr &StoreI, MachineRegisterInfo &MRI) { + + Register PackResult = (StoreI.uses().begin())->getReg(); + MachineInstr *PackOp = MRI.getVRegDef(PackResult); + + if (!canCombinePACK(StoreI, *PackOp, MRI)) + return false; + + std::optional AMI = + getOrDefineAddressingRegister(StoreI, MRI); + if (!AMI) + return false; + + // Note: Operand 1 is the ID of the intrinsic + Register SrcReg = PackOp->getOperand(2).getReg(); + Register SignReg = PackOp->getOperand(3).getReg(); + + unsigned MemOpLoadStoreSize = getLoadStoreSize(StoreI); + TypeSize SrcRegSize = MRI.getType(SrcReg).getSizeInBits(); + assert((MemOpLoadStoreSize == 256 && SrcRegSize == 512) || + (MemOpLoadStoreSize == 512 && SrcRegSize == 1024) && + "Unexpected VST.PACK size"); + + auto SignVal = getIConstantVRegValWithLookThrough(SignReg, MRI); + bool ConstantSign = SignVal ? true : false; + // SignVal = 1 for signed and 0 for dynamically signed + std::optional LSO = getCombinedOpcodePACK( + StoreI, *PackOp, AMI->ImmediateOffset, + ConstantSign ? SignVal.value().Value == 0x1 : false); + + assert(LSO && "Unexpected VST.PACK combine failure"); + + // Note: the output size (I8 or I4) is not encoded as part of the instruction, + // but it is read from the crPackSize register. + auto NewInstr = MIB.buildInstr(LSO->ISelOpcode); + + for (auto Def : StoreI.defs()) + NewInstr.addDef(Def.getReg()); + + NewInstr.addUse(SrcReg); + + addAddressingMode(NewInstr, *AMI, LSO->FitsImmediateRange, false, MRI); + + NewInstr.cloneMemRefs(StoreI); + + // Set the crPackSize before NewInstr + // Selects the size of the Pack instructions + // 0 – Destination is 4 bits + // 1 – Destination is 8 bits + const bool Is8Bit = cast(PackOp)->getIntrinsicID() == + Intrinsic::aie2p_pack_I512_I8_I16 || + cast(PackOp)->getIntrinsicID() == + Intrinsic::aie2p_pack_I1024_I8_I16; + + auto Opcode = TII.getMvSclMultiSlotPseudoOpcode(); + MIB.setInstr(*NewInstr); + MIB.buildInstr(Opcode, {AIE2P::crPackSize}, {}).addImm((unsigned)Is8Bit); + + if (!ConstantSign) + setUnsetCtrlRegister(MIB, *NewInstr, MRI, AIE2P::packSign0, SignReg); + + StoreI.eraseFromParent(); + makeDeadMI(*PackOp, MRI); + return constrainSelectedInstRegOperands(*NewInstr.getInstr(), TII, TRI, RBI); +} + bool AIE2PInstructionSelector::selectG_AIE_ADD_VECTOR_ELT_HI( MachineInstr &I, MachineRegisterInfo &MRI) { const Register Dst = I.getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-indexed-vst_pack.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-indexed-vst_pack.mir new file mode 100644 index 000000000000..e23acb9261d5 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-indexed-vst_pack.mir @@ -0,0 +1,456 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: VST_PACK_512_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + %0:vregbank(<32 x s16>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 224 + G_AIE_OFFSET_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<32 x s8>)) + %8:modregbank(s20) = G_CONSTANT i20 256 + %9:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<32 x s8>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<32 x s8>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<32 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + %0:vregbank(<64 x s8>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 224 + G_AIE_OFFSET_STORE %5:vregbank(<64 x s4>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<64 x s4>)) + %8:modregbank(s20) = G_CONSTANT i20 256 + %9:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<64 x s4>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<64 x s4>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<64 x s4>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + %0:vregbank(<64 x s16>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<64 x s8>)) + %8:modregbank(s20) = G_CONSTANT i20 512 + %9:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<64 x s8>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<64 x s8>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<64 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign1 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + %0:vregbank(<128 x s8>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %5:vregbank(<128 x s4>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<128 x s4>)) + %8:modregbank(s20) = G_CONSTANT i20 512 + %9:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<128 x s4>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<128 x s4>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<128 x s4>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<128 x s4>)) +... + +--- +name: VST_PACK_512_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + %0:vregbank(<32 x s16>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 224 + G_AIE_OFFSET_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<32 x s8>)) + %8:modregbank(s20) = G_CONSTANT i20 256 + %9:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<32 x s8>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<32 x s8>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<32 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + %0:vregbank(<64 x s8>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 224 + G_AIE_OFFSET_STORE %5:vregbank(<64 x s4>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<64 x s4>)) + %8:modregbank(s20) = G_CONSTANT i20 256 + %9:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<64 x s4>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<64 x s4>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<64 x s4>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + %0:vregbank(<64 x s16>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<64 x s8>)) + %8:modregbank(s20) = G_CONSTANT i20 512 + %9:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<64 x s8>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<64 x s8>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<64 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY2]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + %0:vregbank(<128 x s8>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %5:vregbank(<128 x s4>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<128 x s4>)) + %8:modregbank(s20) = G_CONSTANT i20 512 + %9:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<128 x s4>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<128 x s4>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<128 x s4>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<128 x s4>)) +... + +--- +name: VST_PACK_512_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY3]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:vregbank(<32 x s16>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 224 + G_AIE_OFFSET_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<32 x s8>)) + %8:modregbank(s20) = G_CONSTANT i20 256 + %9:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<32 x s8>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<32 x s8>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<32 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY3]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:vregbank(<64 x s8>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 224 + G_AIE_OFFSET_STORE %5:vregbank(<64 x s4>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<64 x s4>)) + %8:modregbank(s20) = G_CONSTANT i20 256 + %9:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<64 x s4>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<64 x s4>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<64 x s4>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY3]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:vregbank(<64 x s16>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<64 x s8>)) + %8:modregbank(s20) = G_CONSTANT i20 512 + %9:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<64 x s8>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<64 x s8>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<64 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_packSign0 [[COPY]], [[COPY1]], [[COPY3]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:vregbank(<128 x s8>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %5:vregbank(<128 x s4>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<128 x s4>)) + %8:modregbank(s20) = G_CONSTANT i20 512 + %9:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %9:vregbank(<128 x s4>), %2:ptrregbank(p0), %8:modregbank(s20) :: (store (<128 x s4>)) + %10:modregbank(s20) = COPY $dj0 + %11:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_AIE_OFFSET_STORE %11:vregbank(<128 x s4>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<128 x s4>)) +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-2d-vst_pack.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-2d-vst_pack.mir new file mode 100644 index 000000000000..dad375b44d5c --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-2d-vst_pack.mir @@ -0,0 +1,416 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: VST_PACK_512_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_2D_PACK_dmw_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_2D_PACK_dmw_sts_pack_packSign1_1:%[0-9]+]]:edc = VST_2D_PACK_dmw_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:modregbank(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<32 x s16>) = COPY $x0 + %101:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %100:vregbank(<32 x s16>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_2D_PACK_dmw_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_2D_PACK_dmw_sts_pack_packSign1_1:%[0-9]+]]:edc = VST_2D_PACK_dmw_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<64 x s8>) = COPY $x0 + %101:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<64 x s4>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_2D_PACK_dmx_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_2D_PACK_dmx_sts_pack_packSign1_1:%[0-9]+]]:edc = VST_2D_PACK_dmx_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<64 x s16>) = COPY $y0 + %101:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_2D_PACK_dmx_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_2D_PACK_dmx_sts_pack_packSign1_1:%[0-9]+]]:edc = VST_2D_PACK_dmx_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<128 x s8>) = COPY $y0 + %101:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<128 x s4>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_2D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<32 x s16>) = COPY $x0 + %101:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %100:vregbank(<32 x s16>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_2D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<64 x s8>) = COPY $x0 + %101:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<64 x s4>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_2D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<64 x s16>) = COPY $y0 + %101:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_2D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<128 x s8>) = COPY $y0 + %101:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<128 x s4>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_2D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<32 x s16>) = COPY $x0 + %101:gprregbank(s32) = COPY $r0 + %103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %100:vregbank(<32 x s16>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_2D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<64 x s8>) = COPY $x0 + %101:gprregbank(s32) = COPY $r0 + %103:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<64 x s4>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_2D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<64 x s16>) = COPY $y0 + %101:gprregbank(s32) = COPY $r0 + %103:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_2D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_2D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edc = VST_2D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %100:vregbank(<128 x s8>) = COPY $y0 + %101:gprregbank(s32) = COPY $r0 + %103:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %101:gprregbank(s32) + %19:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %103, %0, %1, %2, %3, %4 :: (store (<128 x s4>)) + PseudoRET implicit $lr +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-3d-vst_pack.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-3d-vst_pack.mir new file mode 100644 index 000000000000..37078f853b60 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-3d-vst_pack.mir @@ -0,0 +1,465 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: VST_PACK_512_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_3D_PACK_dmw_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_3D_PACK_dmw_sts_pack_packSign1_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmw_sts_pack_packSign1_2:%[0-9]+]]:edch = VST_3D_PACK_dmw_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<32 x s16>) = COPY $x0 + %102:gprregbank(s32) = G_CONSTANT i32 1 + %104:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %100:vregbank(<32 x s16>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_3D_PACK_dmw_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_3D_PACK_dmw_sts_pack_packSign1_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmw_sts_pack_packSign1_2:%[0-9]+]]:edch = VST_3D_PACK_dmw_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<64 x s8>) = COPY $x0 + %102:gprregbank(s32) = G_CONSTANT i32 1 + %104:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_3D_PACK_dmx_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_3D_PACK_dmx_sts_pack_packSign1_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmx_sts_pack_packSign1_2:%[0-9]+]]:edch = VST_3D_PACK_dmx_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<64 x s16>) = COPY $y0 + %102:gprregbank(s32) = G_CONSTANT i32 1 + %104:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_3D_PACK_dmx_sts_pack_packSign1_:%[0-9]+]]:ep, [[VST_3D_PACK_dmx_sts_pack_packSign1_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmx_sts_pack_packSign1_2:%[0-9]+]]:edch = VST_3D_PACK_dmx_sts_pack_packSign1 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<128 x s8>) = COPY $y0 + %102:gprregbank(s32) = G_CONSTANT i32 1 + %104:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<128 x s4>)) +... + +--- +name: VST_PACK_512_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_3D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmw_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<32 x s16>) = COPY $x0 + %102:gprregbank(s32) = G_CONSTANT i32 0 + %104:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %100:vregbank(<32 x s16>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_3D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmw_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<64 x s8>) = COPY $x0 + %102:gprregbank(s32) = G_CONSTANT i32 0 + %104:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_3D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmx_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<64 x s16>) = COPY $y0 + %102:gprregbank(s32) = G_CONSTANT i32 0 + %104:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_3D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmx_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<128 x s8>) = COPY $y0 + %102:gprregbank(s32) = G_CONSTANT i32 0 + %104:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<128 x s4>)) +... + + +--- +name: VST_PACK_512_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_3D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmw_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<32 x s16>) = COPY $x0 + %102:gprregbank(s32) = COPY $r0 + %104:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %100:vregbank(<32 x s16>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_3D_PACK_dmw_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmw_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmw_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmw_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<64 x s8>) = COPY $x0 + %102:gprregbank(s32) = COPY $r0 + %104:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_3D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmx_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<64 x s16>) = COPY $y0 + %102:gprregbank(s32) = COPY $r0 + %104:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_3D_PACK_dmx_sts_pack_packSign0_:%[0-9]+]]:ep, [[VST_3D_PACK_dmx_sts_pack_packSign0_1:%[0-9]+]]:edcl, [[VST_3D_PACK_dmx_sts_pack_packSign0_2:%[0-9]+]]:edch = VST_3D_PACK_dmx_sts_pack_packSign0 [[COPY1]], [[COPY]], [[REG_SEQUENCE]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %100:vregbank(<128 x s8>) = COPY $y0 + %102:gprregbank(s32) = COPY $r0 + %104:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %102:gprregbank(s32) + %19:ptrregbank(p0), %8:modregbank(s20), %9:modregbank(s20) = G_AIE_POSTINC_3D_STORE %104, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<128 x s4>)) +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-vst_pack.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-vst_pack.mir new file mode 100644 index 000000000000..7d67beb6ba72 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-postinc-vst_pack.mir @@ -0,0 +1,676 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: VST_PACK_512_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_signed + ; CHECK: liveins: $p0, $m0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_1]], -256, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -256 + %11:modregbank(s20) = G_CONSTANT i20 224 + %12:modregbank(s20) = G_CONSTANT i20 256 + %100:vregbank(<64 x s8>) = COPY $x0 + %101:vregbank(<32 x s16>) = G_BITCAST %100:vregbank(<64 x s8>) + %200:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %104:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %105:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %106:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %107:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %108:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<32 x s8>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<32 x s8>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<32 x s8>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<32 x s8>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<32 x s8>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_signed + ; CHECK: liveins: $p0, $m0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_1]], -256, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign1_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -256 + %11:modregbank(s20) = G_CONSTANT i20 224 + %12:modregbank(s20) = G_CONSTANT i20 256 + %100:vregbank(<64 x s8>) = COPY $x0 + %200:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %104:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %105:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %106:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %107:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %108:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<64 x s4>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<64 x s4>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<64 x s4>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<64 x s4>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<64 x s4>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_signed + ; CHECK: liveins: $p0, $m0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_1]], -512, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -512 + %11:modregbank(s20) = G_CONSTANT i20 448 + %12:modregbank(s20) = G_CONSTANT i20 512 + %100:vregbank(<64 x s16>) = COPY $y0 + %200:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %104:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %105:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %106:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %107:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %108:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<64 x s8>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<64 x s8>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<64 x s8>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<64 x s8>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<64 x s8>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_signed + ; CHECK: liveins: $p0, $m0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_1]], -512, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign1_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign1 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign1_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<128 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -512 + %11:modregbank(s20) = G_CONSTANT i20 448 + %12:modregbank(s20) = G_CONSTANT i20 512 + %100:vregbank(<128 x s8>) = COPY $y0 + %200:gprregbank(s32) = G_CONSTANT i32 1 + %103:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %104:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %105:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %106:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %107:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %108:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<128 x s4>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<128 x s4>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<128 x s4>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<128 x s4>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<128 x s4>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<128 x s4>)) +... + +--- +name: VST_PACK_512_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_unsigned + ; CHECK: liveins: $p0, $m0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1]], -256, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -256 + %11:modregbank(s20) = G_CONSTANT i20 224 + %12:modregbank(s20) = G_CONSTANT i20 256 + %100:vregbank(<64 x s8>) = COPY $x0 + %101:vregbank(<32 x s16>) = G_BITCAST %100:vregbank(<64 x s8>) + %200:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %104:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %105:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %106:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %107:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %108:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<32 x s8>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<32 x s8>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<32 x s8>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<32 x s8>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<32 x s8>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_unsigned + ; CHECK: liveins: $p0, $m0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1]], -256, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -256 + %11:modregbank(s20) = G_CONSTANT i20 224 + %12:modregbank(s20) = G_CONSTANT i20 256 + %100:vregbank(<64 x s8>) = COPY $x0 + %200:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %104:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %105:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %106:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %107:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %108:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<64 x s4>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<64 x s4>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<64 x s4>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<64 x s4>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<64 x s4>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_unsigned + ; CHECK: liveins: $p0, $m0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1]], -512, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -512 + %11:modregbank(s20) = G_CONSTANT i20 448 + %12:modregbank(s20) = G_CONSTANT i20 512 + %100:vregbank(<64 x s16>) = COPY $y0 + %200:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %104:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %105:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %106:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %107:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %108:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<64 x s8>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<64 x s8>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<64 x s8>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<64 x s8>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<64 x s8>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_unsigned + ; CHECK: liveins: $p0, $m0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1]], -512, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -512 + %11:modregbank(s20) = G_CONSTANT i20 448 + %12:modregbank(s20) = G_CONSTANT i20 512 + %100:vregbank(<128 x s8>) = COPY $y0 + %200:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %104:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %105:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %106:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %107:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %108:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<128 x s4>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<128 x s4>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<128 x s4>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<128 x s4>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<128 x s4>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<128 x s4>)) +... + + +--- +name: VST_PACK_512_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $x0, $r0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_dynamic + ; CHECK: liveins: $p0, $m0, $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1]], -256, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -256 + %11:modregbank(s20) = G_CONSTANT i20 224 + %12:modregbank(s20) = G_CONSTANT i20 256 + %100:vregbank(<64 x s8>) = COPY $x0 + %101:vregbank(<32 x s16>) = G_BITCAST %100:vregbank(<64 x s8>) + %200:gprregbank(s32) = COPY $r0 + %103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %104:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %105:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %106:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %107:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %108:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %101:vregbank(<32 x s16>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<32 x s8>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<32 x s8>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<32 x s8>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<32 x s8>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<32 x s8>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<32 x s8>)) +... + +--- +name: VST_PACK_512_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $x0, $r0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_dynamic + ; CHECK: liveins: $p0, $m0, $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 256 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_1]], -256, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_1]], 224, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmw_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmw_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmw_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -256 + %11:modregbank(s20) = G_CONSTANT i20 224 + %12:modregbank(s20) = G_CONSTANT i20 256 + %100:vregbank(<64 x s8>) = COPY $x0 + %200:gprregbank(s32) = COPY $r0 + %103:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %104:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %105:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %106:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %107:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %108:vregbank(<64 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %100:vregbank(<64 x s8>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<64 x s4>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<64 x s4>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<64 x s4>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<64 x s4>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<64 x s4>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<64 x s4>)) +... + +--- +name: VST_PACK_1024_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $y0, $r0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_dynamic + ; CHECK: liveins: $p0, $m0, $y0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1]], -512, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -512 + %11:modregbank(s20) = G_CONSTANT i20 448 + %12:modregbank(s20) = G_CONSTANT i20 512 + %100:vregbank(<64 x s16>) = COPY $y0 + %200:gprregbank(s32) = COPY $r0 + %103:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %104:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %105:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %106:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %107:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %108:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %100:vregbank(<64 x s16>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<64 x s8>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<64 x s8>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<64 x s8>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<64 x s8>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<64 x s8>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<64 x s8>)) +... + +--- +name: VST_PACK_1024_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $m0, $y0, $r0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_dynamic + ; CHECK: liveins: $p0, $m0, $y0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 16 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[COPY]], [[COPY1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_]], [[MOV_PD_imm11_pseudo]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_1]], -512, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_1]], 448, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY3]] + ; CHECK-NEXT: [[VST_PACK_dmx_sts_pack_pstm_nrm_packSign0_2:%[0-9]+]]:ep = VST_PACK_dmx_sts_pack_pstm_nrm_packSign0 [[COPY2]], [[VST_PACK_dmx_sts_pack_pstm_nrm_imm_packSign0_2]], [[MOV_PD_imm11_pseudo1]], implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<128 x s4>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %8:modregbank(s20) = G_CONSTANT i20 0 + %9:modregbank(s20) = G_CONSTANT i20 16 + %10:modregbank(s20) = G_CONSTANT i20 -512 + %11:modregbank(s20) = G_CONSTANT i20 448 + %12:modregbank(s20) = G_CONSTANT i20 512 + %100:vregbank(<128 x s8>) = COPY $y0 + %200:gprregbank(s32) = COPY $r0 + %103:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %104:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %105:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %106:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %107:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %108:vregbank(<128 x s4>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %100:vregbank(<128 x s8>), %200:gprregbank(s32) + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %103, %0, %7 :: (store (<128 x s4>)) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %19, %8 :: (store (<128 x s4>)) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %105, %20, %9 :: (store (<128 x s4>)) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %106, %21, %10 :: (store (<128 x s4>)) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %107, %22, %11 :: (store (<128 x s4>)) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %108, %23, %12 :: (store (<128 x s4>)) +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vst_pack.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vst_pack.mir new file mode 100644 index 000000000000..c278193823e7 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vst_pack.mir @@ -0,0 +1,311 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: VST_PACK_512_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<32 x s16>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0) :: (store (<32 x s8>)) + PseudoRET implicit $lr +... +# Note: The output of intrinsic(@llvm.aie2p.pack.I512.I4.I8) must be of type <64 x s4> +# but we don't lower any vector element type less than 8-bit from the front end +# because the clang vector types do not support element types smaller than a byte. +# So the return type is changed from <64 x s4> to <32 x s8>. +--- +name: VST_PACK_512_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<32 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<64 x s8>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0) :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I8_I16_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<64 x s16>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0) :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I4_I8_signed +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_signed + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign1 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign1 :: (store (<64 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<128 x s8>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 1 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0) :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<32 x s16>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0) :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<64 x s8>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0) :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I8_I16_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<64 x s16>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0) :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I4_I8_unsigned +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_unsigned + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<128 x s8>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0) :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<32 x s16>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + G_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0) :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_512_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_512_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmw_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<32 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<64 x s8>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I512.I4.I8), %0:vregbank(<64 x s8>), %6:gprregbank(s32) + G_STORE %5:vregbank(<32 x s8>), %2:ptrregbank(p0) :: (store (<32 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I8_I16_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I8_I16_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 1 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<64 x s16>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I8.I16), %0:vregbank(<64 x s16>), %6:gprregbank(s32) + G_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0) :: (store (<64 x s8>)) + PseudoRET implicit $lr +... + +--- +name: VST_PACK_1024_I4_I8_dynamic +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $y0 + ; CHECK-LABEL: name: VST_PACK_1024_I4_I8_dynamic + ; CHECK: liveins: $p0, $r0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: $crpacksize = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: $packsign0 = COPY [[COPY2]] + ; CHECK-NEXT: VST_PACK_dmx_sts_pack_idx_imm_packSign0 [[COPY]], [[COPY1]], 0, implicit $crpacksize, implicit $crsat, implicit $packsign0 :: (store (<64 x s8>)) + ; CHECK-NEXT: $packsign0 = MOV_scalar_imm11_pseudo 0 + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vregbank(<128 x s8>) = COPY $y0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = COPY $r0 + %5:vregbank(<64 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.pack.I1024.I4.I8), %0:vregbank(<128 x s8>), %6:gprregbank(s32) + G_STORE %5:vregbank(<64 x s8>), %2:ptrregbank(p0) :: (store (<64 x s8>)) + PseudoRET implicit $lr +...