From 0bdee54f6ec58d1cc191755e92d77127ceb53b2b Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Tue, 11 Feb 2025 16:08:51 +0530 Subject: [PATCH 1/2] [AIE2P] Add VST.PUSH.CONV combine --- .../AIE/aie2p/AIE2PInstructionSelector.cpp | 102 ++++++++++++++++-- .../inst-select-fifo-store-conv.mir | 92 ++++++++++++++++ 2 files changed, 183 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fifo-store-conv.mir diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp index b6a94094a365..7e762a87d7d1 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp @@ -70,6 +70,7 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector { bool isWrite); bool selectVST_FIFO(MachineInstr &I, MachineRegisterInfo &MRI); bool selectG_TRUNC(MachineInstr &I, MachineRegisterInfo &MRI); + bool selectVST_FIFO_CONV(MachineInstr &StoreI, MachineRegisterInfo &MRI); static const char *getName() { return DEBUG_TYPE; } @@ -3630,19 +3631,28 @@ std::optional AIE2PInstructionSelector::getCombinedOpcodeCONV( std::optional Immediate) { const bool AlwaysFitsImmediateRange = true; const bool NoImmediate = false; - if (CombOp.getOpcode() != AIE2P::G_INTRINSIC_W_SIDE_EFFECTS || - (cast(CombOp).getIntrinsicID() != - Intrinsic::aie2p_v16accfloat_to_v16bf16 && - cast(CombOp).getIntrinsicID() != - Intrinsic::aie2p_v32accfloat_to_v32bf16)) - return {}; + if (CombOp.getOpcode() != AIE2P::G_INTRINSIC_W_SIDE_EFFECTS) + return std::nullopt; + + const unsigned CombOpID = cast(CombOp).getIntrinsicID(); + switch (CombOpID) { + case Intrinsic::aie2p_v16accfloat_to_v16bf16: + case Intrinsic::aie2p_v32accfloat_to_v32bf16: + case Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs8: + case Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16: + case Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16: + break; + default: + return std::nullopt; + } - assert(((cast(CombOp).getIntrinsicID() == - Intrinsic::aie2p_v16accfloat_to_v16bf16 && + assert(((CombOpID == Intrinsic::aie2p_v16accfloat_to_v16bf16 && getLoadStoreSize(MemOp) == 256) || - (cast(CombOp).getIntrinsicID() == - Intrinsic::aie2p_v32accfloat_to_v32bf16 && - getLoadStoreSize(MemOp) == 512)) && + (CombOpID == Intrinsic::aie2p_v32accfloat_to_v32bf16 && + getLoadStoreSize(MemOp) == 512) || + (CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs8) || + (CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16) || + (CombOpID == Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16)) && "Unexpected VST.CONV size"); unsigned ISelOpcode; @@ -3697,6 +3707,21 @@ std::optional AIE2PInstructionSelector::getCombinedOpcodeCONV( ISelOpcode = AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf; return LoadStoreOpcodes{ISelOpcode, NoImmediate, /*OffsetOpcode=*/{}}; + case AIE2P::G_INTRINSIC_W_SIDE_EFFECTS: + switch (cast(MemOp).getIntrinsicID()) { + case Intrinsic::aie2p_fifo_st_push_544_bfp16: + if (CombOpID == Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16) + ISelOpcode = AIE2P::VST_PUSH_544_CONV_bfp16ebs16_ebs8; + else /* CombOpID == + Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16 */ + ISelOpcode = AIE2P::VST_PUSH_544_CONV_bfp16ebs16_fp32; + return LoadStoreOpcodes{ISelOpcode, NoImmediate, + /*OffsetOpcode=*/{}}; + case Intrinsic::aie2p_fifo_st_push_576_bfp16: + return LoadStoreOpcodes{AIE2P::VST_PUSH_576_CONV_bfp16ebs8_fp32, + NoImmediate, + /*OffsetOpcode=*/{}}; + } } return {}; } @@ -4905,6 +4930,57 @@ unsigned int getStoreFifoOpcode(MachineInstr &I) { return AIE2P::INSTRUCTION_LIST_END; } +bool AIE2PInstructionSelector::selectVST_FIFO_CONV(MachineInstr &StoreI, + MachineRegisterInfo &MRI) { + Register ConvResult = StoreI.getOperand(5).getReg(); + MachineInstr *ConvOp = getDefIgnoringCopiesAndBitcasts(ConvResult, MRI); + assert(ConvOp && "Expected SSA."); + + if (!canCombineCONV(StoreI, *ConvOp) || + StoreI.getParent() != ConvOp->getParent() || !MRI.hasOneUse(ConvResult)) + return false; + + const std::optional NoImmediate = {}; + std::optional LSO = + getCombinedOpcodeCONV(StoreI, *ConvOp, NoImmediate); + assert(LSO && "Unexpected VST.FIFO.CONV combine failure"); + + Register PtrOut = StoreI.getOperand(0).getReg(); + Register FifoOut = StoreI.getOperand(1).getReg(); + Register AvailOut = StoreI.getOperand(2).getReg(); + + Register PtrIn = StoreI.getOperand(4).getReg(); + Register FifoIn = StoreI.getOperand(7).getReg(); + Register AvailIn = StoreI.getOperand(8).getReg(); + Register SrcReg; + + unsigned CombOpID = cast(*ConvOp).getIntrinsicID(); + if (CombOpID == Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16) { + Register MantIn = ConvOp->getOperand(3).getReg(); + Register ExpIn = ConvOp->getOperand(4).getReg(); + SrcReg = MRI.createVirtualRegister(&AIE2P::mEXaRegClass); + MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {SrcReg}, {}) + .addReg(MantIn) + .addImm(AIE2P::sub_bfp16_x) + .addReg(ExpIn) + .addImm(AIE2P::sub_bfp16_e); + } else { + assert((CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs8 || + CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16) && + "Unexpected IntrinsicID in VST.FIFO.CONV combine"); + SrcReg = ConvOp->getOperand(3).getReg(); + } + + auto NewInstr = MIB.buildInstr(LSO->ISelOpcode, {FifoOut, PtrOut, AvailOut}, + {FifoIn, SrcReg, PtrIn, AvailIn}); + NewInstr.cloneMemRefs(StoreI); + + makeDeadMI(*ConvOp, MRI); + StoreI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*NewInstr.getInstr(), TII, TRI, RBI); +} + bool AIE2PInstructionSelector::selectVST_FIFO(MachineInstr &I, MachineRegisterInfo &MRI) { auto IntrinsicID = cast(I).getIntrinsicID(); @@ -4928,6 +5004,10 @@ bool AIE2PInstructionSelector::selectVST_FIFO(MachineInstr &I, } case Intrinsic::aie2p_fifo_st_push_544_bfp16: case Intrinsic::aie2p_fifo_st_push_576_bfp16: { + // First try to match CONV combine + if (selectVST_FIFO_CONV(I, MRI)) + return true; + Register PtrIn = I.getOperand(4).getReg(); Register FifoIn = I.getOperand(7).getReg(); Register AvailIn = I.getOperand(8).getReg(); diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fifo-store-conv.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fifo-store-conv.mir new file mode 100644 index 000000000000..d20b8c7a4ae2 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fifo-store-conv.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: test_fifo_st_push_576_conv_accfloat_to_bfp16ebs8 +tracksRegLiveness: true +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: test_fifo_st_push_576_conv_accfloat_to_bfp16ebs8 + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_PUSH_576_CONV_bfp16ebs8_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_576_CONV_bfp16ebs8_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:accregbank(<64 x s32>) = COPY $dm0 + %2:ptrregbank(p0) = IMPLICIT_DEF + %3:fiforegbank(<32 x s32>) = IMPLICIT_DEF + %4:gprregbank(s32) = IMPLICIT_DEF + %5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v64accfloat.to.v64bfp16ebs8), %1(<64 x s32>) + %7:ptrregbank(p0), %8:fiforegbank(<32 x s32>), %9:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.st.push.576.bfp16), %2:ptrregbank(p0), %5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>), %3:fiforegbank(<32 x s32>), %4:gprregbank(s32) + PseudoRET implicit $lr +... + +--- +name: test_fifo_st_push_544_conv_accfloat_to_bfp16ebs8 +tracksRegLiveness: true +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: test_fifo_st_push_544_conv_accfloat_to_bfp16ebs8 + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:accregbank(<64 x s32>) = COPY $dm0 + %2:ptrregbank(p0) = IMPLICIT_DEF + %3:fiforegbank(<32 x s32>) = IMPLICIT_DEF + %4:gprregbank(s32) = IMPLICIT_DEF + %5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v64accfloat.to.v64bfp16ebs8), %1(<64 x s32>) + %7:ptrregbank(p0), %8:fiforegbank(<32 x s32>), %9:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.st.push.544.bfp16), %2:ptrregbank(p0), %5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>), %3:fiforegbank(<32 x s32>), %4:gprregbank(s32) + PseudoRET implicit $lr +... + +--- +name: test_fifo_st_push_544_conv_bfp16ebs8_to_bfp16ebs16 +tracksRegLiveness: true +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $e0, $p0, $x0 + ; CHECK-LABEL: name: test_fifo_st_push_544_conv_bfp16ebs8_to_bfp16ebs16 + ; CHECK: liveins: $e0, $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:el = COPY $e0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec576 = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e + ; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_ebs8 [[DEF1]], [[REG_SEQUENCE]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:vregbank(<64 x s8>) = COPY $x0 + %2:gprregbank(<8 x s8>) = COPY $e0 + %3:ptrregbank(p0) = IMPLICIT_DEF + %4:fiforegbank(<32 x s32>) = IMPLICIT_DEF + %5:gprregbank(s32) = IMPLICIT_DEF + %6:vregbank(<64 x s8>), %7:gprregbank(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v64bfp16ebs8.to.v64bfp16ebs16), %1(<64 x s8>), %2(<8 x s8>) + %8:ptrregbank(p0), %9:fiforegbank(<32 x s32>), %10:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.st.push.544.bfp16), %3:ptrregbank(p0), %6:vregbank(<64 x s8>), %7:gprregbank(<8 x s8>), %4:fiforegbank(<32 x s32>), %5:gprregbank(s32) + PseudoRET implicit $lr +... From 19288c117378e66c620abf12a463517f9d7536f0 Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Thu, 13 Feb 2025 13:37:41 +0530 Subject: [PATCH 2/2] [AIE2P] Transform VST.FLUSH into VST.FLUSH.CONV in PostSelectOptimize --- llvm/lib/Target/AIE/AIEBaseInstrInfo.h | 9 + llvm/lib/Target/AIE/AIEPostSelectOptimize.cpp | 41 +++ llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 21 ++ llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h | 3 + .../post-select-optimize-vst.flush.conv.mir | 244 ++++++++++++++++++ 5 files changed, 318 insertions(+) create mode 100644 llvm/test/CodeGen/AIE/aie2p/GlobalIsel/post-select-optimize-vst.flush.conv.mir diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h index 6a5521f88d42..b5fa40fa7c35 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h @@ -259,6 +259,15 @@ struct AIEBaseInstrInfo : public TargetInstrInfo { TypeSize Size) const { llvm_unreachable("Target didn't implement getCombinedPostIncOpcode"); } + + /// Check whether Opcode is a VST.PUSH.CONV + virtual bool isFifoStoreConvOpcode(unsigned Opcode) const { return false; } + /// \return Corresponding VST.FLUSH.CONV Opcode based on \a VST.FLUSH Opcode + virtual std::optional + getStoreFlushConvOpcode(unsigned StoreFlushOpcode) const { + llvm_unreachable("Target didn't implement getStoreFlushConvOpcode!"); + } + /// \return AIE2p OpCode based on \a IntrinsicID virtual unsigned getOpCode(MachineInstr &MI) const { llvm_unreachable("Target didn't implement getOpCode"); diff --git a/llvm/lib/Target/AIE/AIEPostSelectOptimize.cpp b/llvm/lib/Target/AIE/AIEPostSelectOptimize.cpp index e9dc139dd7ec..3f1c64765123 100644 --- a/llvm/lib/Target/AIE/AIEPostSelectOptimize.cpp +++ b/llvm/lib/Target/AIE/AIEPostSelectOptimize.cpp @@ -584,6 +584,39 @@ bool fixLoadMemOpInfo(MachineFunction &MF, MachineBasicBlock &MBB, return Changed; } +// Replace the VST.FLUSH opcode with VST.FLUSH.CONV if it is chained with +// VST.PUSH.CONV. The CONV variant behaves identically to the normal variant but +// all actions are delayed by one pipeline stage. +bool modifyStoreFlush(MachineBasicBlock &MBB, MachineRegisterInfo &MRI) { + const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + const AIEBaseInstrInfo *AIEII = static_cast(TII); + bool Changed = false; + + // Helper function to recursively update VST.FLUSH to VST.FLUSH.CONV + std::function Impl = [&](const Register UseReg) { + for (MachineInstr &UseMI : MRI.use_instructions(UseReg)) { + std::optional StoreFlushConvOpcode = + AIEII->getStoreFlushConvOpcode(UseMI.getOpcode()); + if (StoreFlushConvOpcode) { + UseMI.setDesc(TII->get(*StoreFlushConvOpcode)); + Changed = true; + // Update the opcode for the next dependent instruction in the chain + const Register UseDstReg = UseMI.getOperand(0).getReg(); + Impl(UseDstReg); + } + } + }; + + for (MachineInstr &MI : MBB) { + if (AIEII->isFifoStoreConvOpcode(MI.getOpcode())) { + const Register DstReg = MI.getOperand(0).getReg(); + Impl(DstReg); + } + } + + return Changed; +} + bool AIEPostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "\n******* POST I-SEL OPTIMIZATION PASS *******\n" << "********** Function: " << MF.getName() << '\n'); @@ -624,6 +657,14 @@ bool AIEPostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { Changed |= fixLoadMemOpInfo(MF, MBB, MF.getRegInfo()); } + // 5. Convert store flush instructions only on AIE2P targets: when VST.FLUSH + // and VST.PUSH.CONV are chained, replace VST.FLUSH with VST.FLUSH.CONV + if (MF.getTarget().getTargetTriple().isAIE2P()) { + for (MachineBasicBlock &MBB : MF) { + Changed |= modifyStoreFlush(MBB, MF.getRegInfo()); + } + } + return Changed; } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index a7f0374bc638..ca01940489ba 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -270,6 +270,27 @@ bool AIE2PInstrInfo::isGenericOffsetMemOpcode(unsigned Opcode) const { (Opcode == AIE2P::G_AIE_OFFSET_ZEXTLOAD)); } +bool AIE2PInstrInfo::isFifoStoreConvOpcode(unsigned Opcode) const { + return ((Opcode == AIE2P::VST_PUSH_544_CONV_bfp16ebs16_ebs8) || + (Opcode == AIE2P::VST_PUSH_544_CONV_bfp16ebs16_fp32) || + (Opcode == AIE2P::VST_PUSH_576_CONV_bfp16ebs8_fp32)); +} + +std::optional +AIE2PInstrInfo::getStoreFlushConvOpcode(unsigned StoreFlushOpcode) const { + switch (StoreFlushOpcode) { + case AIE2P::VST_FLUSH_512_normal_flush: + return AIE2P::VST_FLUSH_512_CONV_normal_flush; + case AIE2P::VST_FLUSH_512_fifo_1d_flush: + return AIE2P::VST_FLUSH_512_CONV_fifo_1d_flush; + case AIE2P::VST_FLUSH_512_2D: + return AIE2P::VST_FLUSH_512_CONV_2D; + case AIE2P::VST_FLUSH_512_3D: + return AIE2P::VST_FLUSH_512_CONV_3D; + } + return std::nullopt; +} + std::optional AIE2PInstrInfo::getCombinedPostIncOpcode( MachineInstr &BaseMemI, MachineInstr &PostIncI, TypeSize Size) const { switch (PostIncI.getOpcode()) { diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h index 9f582a3d51e1..d9ee2be3e051 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h @@ -87,6 +87,7 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo { bool isBooleanNot(unsigned Opc) const override; bool isConstStep(const MachineInstr &MI, int64_t &Step) const override; bool isGenericOffsetMemOpcode(unsigned Opcode) const override; + bool isFifoStoreConvOpcode(unsigned Opcode) const override; bool verifyGenericInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; @@ -97,6 +98,8 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo { std::optional getCombinedPostIncOpcode(MachineInstr &BaseMemI, MachineInstr &PtrAddI, TypeSize Size) const override; + std::optional + getStoreFlushConvOpcode(unsigned StoreFlushOpcode) const override; unsigned getOpCode(MachineInstr &MI) const override; Register getVaddSignControlRegister() const override; diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/post-select-optimize-vst.flush.conv.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/post-select-optimize-vst.flush.conv.mir new file mode 100644 index 000000000000..f2dc89f99425 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/post-select-optimize-vst.flush.conv.mir @@ -0,0 +1,244 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=aie-post-select-optimize %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: test_vst_flush_conv +tracksRegLiveness: true +legalized: true +selected: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: test_vst_flush_conv + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_PUSH_576_CONV_bfp16ebs8_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_576_CONV_bfp16ebs8_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: [[VST_FLUSH_512_CONV_normal_flush:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_CONV_normal_flush1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_CONV_normal_flush2:%[0-9]+]]:mr26_fifo_st = VST_FLUSH_512_CONV_normal_flush [[VST_PUSH_576_CONV_bfp16ebs8_fp32_]], [[VST_PUSH_576_CONV_bfp16ebs8_fp32_1]], [[VST_PUSH_576_CONV_bfp16ebs8_fp32_2]], implicit-def $srfifo_of + ; CHECK-NEXT: PseudoRET implicit $lr + %1:acc2048 = COPY $dm0 + %2:mpfs = IMPLICIT_DEF + %3:mstfifo = IMPLICIT_DEF + %4:mr26_fifo_st = IMPLICIT_DEF + %8:mstfifo, %7:mpfs, %9:mr26_fifo_st = VST_PUSH_576_CONV_bfp16ebs8_fp32 %3, %1, %2, %4, implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + %10:mstfifo, %11:mpfs, %12:mr26_fifo_st = VST_FLUSH_512_normal_flush %8, %7, %9, implicit-def $srfifo_of + PseudoRET implicit $lr +... + +--- +name: test_vst_flush_1d_conv +tracksRegLiveness: true +legalized: true +selected: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: test_vst_flush_1d_conv + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:em = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_FLUSH_512_CONV_fifo_1d_flush:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_CONV_fifo_1d_flush1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_CONV_fifo_1d_flush2:%[0-9]+]]:mr26_fifo_st = VST_FLUSH_512_CONV_fifo_1d_flush [[VST_PUSH_544_CONV_bfp16ebs16_fp32_]], [[VST_PUSH_544_CONV_bfp16ebs16_fp32_1]], [[VST_PUSH_544_CONV_bfp16ebs16_fp32_2]], [[DEF3]], implicit-def $srfifo_of + ; CHECK-NEXT: PseudoRET implicit $lr + %1:acc2048 = COPY $dm0 + %2:mpfs = IMPLICIT_DEF + %3:mstfifo = IMPLICIT_DEF + %4:mr26_fifo_st = IMPLICIT_DEF + %8:mstfifo, %7:mpfs, %9:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_fp32 %3, %1, %2, %4, implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + %10:em = IMPLICIT_DEF + %11:mstfifo, %12:mpfs, %13:mr26_fifo_st = VST_FLUSH_512_fifo_1d_flush %8, %7, %9, %10, implicit-def $srfifo_of + PseudoRET implicit $lr +... + +--- +name: test_vst_flush_2d_conv +tracksRegLiveness: true +legalized: true +selected: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $e0, $p0, $x0, $d1 + ; CHECK-LABEL: name: test_vst_flush_2d_conv + ; CHECK: liveins: $e0, $p0, $x0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:el = COPY $e0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec576 = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e + ; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_ebs8 [[DEF1]], [[REG_SEQUENCE]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ed = COPY $d1 + ; CHECK-NEXT: [[VST_FLUSH_512_CONV_2D:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_CONV_2D1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_CONV_2D2:%[0-9]+]]:mr26_fifo_st, [[VST_FLUSH_512_CONV_2D3:%[0-9]+]]:edc = VST_FLUSH_512_CONV_2D [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_]], [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_1]], [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_2]], [[COPY2]], implicit-def $srfifo_of + ; CHECK-NEXT: PseudoRET implicit $lr + %1:vec512 = COPY $x0 + %2:el = COPY $e0 + %3:mpfs = IMPLICIT_DEF + %4:mstfifo = IMPLICIT_DEF + %5:mr26_fifo_st = IMPLICIT_DEF + %11:vec576 = REG_SEQUENCE %1, %subreg.sub_bfp16_x, %2, %subreg.sub_bfp16_e + %9:mstfifo, %8:mpfs, %10:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_ebs8 %4, %11, %3, %5, implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + %12:ed = COPY $d1 + %13:mstfifo, %14:mpfs, %15:mr26_fifo_st, %16:edc = VST_FLUSH_512_2D %9, %8, %10, %12, implicit-def $srfifo_of + PseudoRET implicit $lr +... + +--- +name: test_vst_flush_3d_conv_chain +tracksRegLiveness: true +legalized: true +selected: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0, $d1 + ; CHECK-LABEL: name: test_vst_flush_3d_conv_chain + ; CHECK: liveins: $p0, $dm0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_PUSH_576_CONV_bfp16ebs8_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_576_CONV_bfp16ebs8_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: [[VST_FLUSH_512_CONV_normal_flush:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_CONV_normal_flush1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_CONV_normal_flush2:%[0-9]+]]:mr26_fifo_st = VST_FLUSH_512_CONV_normal_flush [[VST_PUSH_576_CONV_bfp16ebs8_fp32_]], [[VST_PUSH_576_CONV_bfp16ebs8_fp32_1]], [[VST_PUSH_576_CONV_bfp16ebs8_fp32_2]], implicit-def $srfifo_of + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d + ; CHECK-NEXT: [[VST_FLUSH_512_CONV_3D:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_CONV_3D1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_CONV_3D2:%[0-9]+]]:mr26_fifo_st, [[VST_FLUSH_512_CONV_3D3:%[0-9]+]]:edcl, [[VST_FLUSH_512_CONV_3D4:%[0-9]+]]:edch = VST_FLUSH_512_CONV_3D [[VST_FLUSH_512_CONV_normal_flush]], [[VST_FLUSH_512_CONV_normal_flush1]], [[VST_FLUSH_512_CONV_normal_flush2]], [[COPY1]], implicit-def $srfifo_of + ; CHECK-NEXT: PseudoRET implicit $lr + %1:acc2048 = COPY $dm0 + %2:mpfs = IMPLICIT_DEF + %3:mstfifo = IMPLICIT_DEF + %4:mr26_fifo_st = IMPLICIT_DEF + %8:mstfifo, %7:mpfs, %9:mr26_fifo_st = VST_PUSH_576_CONV_bfp16ebs8_fp32 %3, %1, %2, %4, implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + %10:mstfifo, %11:mpfs, %12:mr26_fifo_st = VST_FLUSH_512_normal_flush %8, %7, %9, implicit-def $srfifo_of + %13:eds = COPY $d1_3d + %14:mstfifo, %15:mpfs, %16:mr26_fifo_st, %17:edcl, %18:edch = VST_FLUSH_512_3D %10, %11, %12, %13, implicit-def $srfifo_of + PseudoRET implicit $lr +... + +--- +name: test_vst_push_multi_use_by_vst_flush +tracksRegLiveness: true +legalized: true +selected: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0, $d1 + ; CHECK-LABEL: name: test_vst_push_multi_use_by_vst_flush + ; CHECK: liveins: $p0, $dm0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:em = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_FLUSH_512_CONV_fifo_1d_flush:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_CONV_fifo_1d_flush1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_CONV_fifo_1d_flush2:%[0-9]+]]:mr26_fifo_st = VST_FLUSH_512_CONV_fifo_1d_flush [[VST_PUSH_544_CONV_bfp16ebs16_fp32_]], [[VST_PUSH_544_CONV_bfp16ebs16_fp32_1]], [[VST_PUSH_544_CONV_bfp16ebs16_fp32_2]], [[DEF3]], implicit-def $srfifo_of + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 + ; CHECK-NEXT: [[VST_FLUSH_512_CONV_2D:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_CONV_2D1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_CONV_2D2:%[0-9]+]]:mr26_fifo_st, [[VST_FLUSH_512_CONV_2D3:%[0-9]+]]:edc = VST_FLUSH_512_CONV_2D [[VST_PUSH_544_CONV_bfp16ebs16_fp32_]], [[VST_PUSH_544_CONV_bfp16ebs16_fp32_1]], [[VST_PUSH_544_CONV_bfp16ebs16_fp32_2]], [[COPY1]], implicit-def $srfifo_of + ; CHECK-NEXT: PseudoRET implicit $lr + %1:acc2048 = COPY $dm0 + %2:mpfs = IMPLICIT_DEF + %3:mstfifo = IMPLICIT_DEF + %4:mr26_fifo_st = IMPLICIT_DEF + %8:mstfifo, %7:mpfs, %9:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_fp32 %3, %1, %2, %4, implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + %10:em = IMPLICIT_DEF + %11:mstfifo, %12:mpfs, %13:mr26_fifo_st = VST_FLUSH_512_fifo_1d_flush %8, %7, %9, %10, implicit-def $srfifo_of + %17:ed = COPY $d1 + %14:mstfifo, %15:mpfs, %16:mr26_fifo_st, %18:edc = VST_FLUSH_512_2D %8, %7, %9, %17, implicit-def $srfifo_of + PseudoRET implicit $lr +... + +# Negative test case: VST_FLUSH is not combined into VST_FLUSH_CONV, as it is not a user of VST_PUSH_CONV. +--- +name: test_vst_flush_2d_conv_neg +tracksRegLiveness: true +legalized: true +selected: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $e0, $p0, $x0, $d1 + ; CHECK-LABEL: name: test_vst_flush_2d_conv_neg + ; CHECK: liveins: $e0, $p0, $x0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:el = COPY $e0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec576 = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e + ; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_ebs8 [[DEF1]], [[REG_SEQUENCE]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ed = COPY $d1 + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_FLUSH_512_2D:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_2D1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_2D2:%[0-9]+]]:mr26_fifo_st, [[VST_FLUSH_512_2D3:%[0-9]+]]:edc = VST_FLUSH_512_2D [[DEF4]], [[DEF3]], [[DEF5]], [[COPY2]], implicit-def $srfifo_of + ; CHECK-NEXT: PseudoRET implicit $lr + %1:vec512 = COPY $x0 + %2:el = COPY $e0 + %3:mpfs = IMPLICIT_DEF + %4:mstfifo = IMPLICIT_DEF + %5:mr26_fifo_st = IMPLICIT_DEF + %11:vec576 = REG_SEQUENCE %1, %subreg.sub_bfp16_x, %2, %subreg.sub_bfp16_e + %9:mstfifo, %8:mpfs, %10:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_ebs8 %4, %11, %3, %5, implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd + %12:ed = COPY $d1 + %13:mpfs = IMPLICIT_DEF + %14:mstfifo = IMPLICIT_DEF + %15:mr26_fifo_st = IMPLICIT_DEF + %16:mstfifo, %17:mpfs, %18:mr26_fifo_st, %19:edc = VST_FLUSH_512_2D %14, %13, %15, %12, implicit-def $srfifo_of + PseudoRET implicit $lr +... + +# Negative test case: VST_FLUSH is not combined into VST_FLUSH_CONV, as its def is VST_PUSH not VST_PUSH_CONV. +--- +name: test_vst_flush_3d_conv_chain_neg +tracksRegLiveness: true +legalized: true +selected: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $d1, $x0, $e0 + ; CHECK-LABEL: name: test_vst_flush_3d_conv_chain_neg + ; CHECK: liveins: $p0, $d1, $x0, $e0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:el = COPY $e0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec576 = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e + ; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF + ; CHECK-NEXT: [[VST_PUSH_576_:%[0-9]+]]:mstfifo, [[VST_PUSH_576_1:%[0-9]+]]:mpfs, [[VST_PUSH_576_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_576 [[DEF1]], [[REG_SEQUENCE]], [[DEF]], [[DEF2]], implicit-def $srfifo_of + ; CHECK-NEXT: [[VST_FLUSH_512_normal_flush:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_normal_flush1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_normal_flush2:%[0-9]+]]:mr26_fifo_st = VST_FLUSH_512_normal_flush [[VST_PUSH_576_]], [[VST_PUSH_576_1]], [[VST_PUSH_576_2]], implicit-def $srfifo_of + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:eds = COPY $d1_3d + ; CHECK-NEXT: [[VST_FLUSH_512_3D:%[0-9]+]]:mstfifo, [[VST_FLUSH_512_3D1:%[0-9]+]]:mpfs, [[VST_FLUSH_512_3D2:%[0-9]+]]:mr26_fifo_st, [[VST_FLUSH_512_3D3:%[0-9]+]]:edcl, [[VST_FLUSH_512_3D4:%[0-9]+]]:edch = VST_FLUSH_512_3D [[VST_FLUSH_512_normal_flush]], [[VST_FLUSH_512_normal_flush1]], [[VST_FLUSH_512_normal_flush2]], [[COPY2]], implicit-def $srfifo_of + ; CHECK-NEXT: PseudoRET implicit $lr + %0:vec512 = COPY $x0 + %20:el = COPY $e0 + %1:vec576 = REG_SEQUENCE %0, %subreg.sub_bfp16_x, %20, %subreg.sub_bfp16_e + %2:mpfs = IMPLICIT_DEF + %3:mstfifo = IMPLICIT_DEF + %4:mr26_fifo_st = IMPLICIT_DEF + %8:mstfifo, %7:mpfs, %9:mr26_fifo_st = VST_PUSH_576 %3, %1, %2, %4, implicit-def $srfifo_of + %10:mstfifo, %11:mpfs, %12:mr26_fifo_st = VST_FLUSH_512_normal_flush %8, %7, %9, implicit-def $srfifo_of + %13:eds = COPY $d1_3d + %14:mstfifo, %15:mpfs, %16:mr26_fifo_st, %17:edcl, %18:edch = VST_FLUSH_512_3D %10, %11, %12, %13, implicit-def $srfifo_of + PseudoRET implicit $lr +...