Skip to content

[AIE2P] Combine VST.PUSH.CONV #351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,15 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
TypeSize Size) const {
llvm_unreachable("Target didn't implement getCombinedPostIncOpcode");
}

/// Check whether Opcode is a VST.PUSH.CONV
virtual bool isFifoStoreConvOpcode(unsigned Opcode) const { return false; }
/// \return Corresponding VST.FLUSH.CONV Opcode based on \a VST.FLUSH Opcode
virtual std::optional<unsigned>
getStoreFlushConvOpcode(unsigned StoreFlushOpcode) const {
llvm_unreachable("Target didn't implement getStoreFlushConvOpcode!");
}

/// \return AIE2p OpCode based on \a IntrinsicID
virtual unsigned getOpCode(MachineInstr &MI) const {
llvm_unreachable("Target didn't implement getOpCode");
Expand Down
41 changes: 41 additions & 0 deletions llvm/lib/Target/AIE/AIEPostSelectOptimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,39 @@ bool fixLoadMemOpInfo(MachineFunction &MF, MachineBasicBlock &MBB,
return Changed;
}

// Replace the VST.FLUSH opcode with VST.FLUSH.CONV if it is chained with
// VST.PUSH.CONV. The CONV variant behaves identically to the normal variant but
// all actions are delayed by one pipeline stage.
bool modifyStoreFlush(MachineBasicBlock &MBB, MachineRegisterInfo &MRI) {
const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
const AIEBaseInstrInfo *AIEII = static_cast<const AIEBaseInstrInfo *>(TII);
bool Changed = false;

// Helper function to recursively update VST.FLUSH to VST.FLUSH.CONV
std::function<void(const Register)> Impl = [&](const Register UseReg) {
for (MachineInstr &UseMI : MRI.use_instructions(UseReg)) {
std::optional<unsigned> StoreFlushConvOpcode =
AIEII->getStoreFlushConvOpcode(UseMI.getOpcode());
if (StoreFlushConvOpcode) {
UseMI.setDesc(TII->get(*StoreFlushConvOpcode));
Changed = true;
// Update the opcode for the next dependent instruction in the chain
const Register UseDstReg = UseMI.getOperand(0).getReg();
Impl(UseDstReg);
}
}
};

for (MachineInstr &MI : MBB) {
if (AIEII->isFifoStoreConvOpcode(MI.getOpcode())) {
const Register DstReg = MI.getOperand(0).getReg();
Impl(DstReg);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we also have to check, that there is only one user of DstReg, i.e. nobody is using the intermediate results of the conversion except for VST_PUSH?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we have to check, Iam checking that in AIE2PInstructionSelector.cpp

if (!canCombineCONV(StoreI, *ConvOp) ||
      StoreI.getParent() != ConvOp->getParent() || !MRI.hasOneUse(ConvResult))
    return false;

}
}

return Changed;
}

bool AIEPostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "\n******* POST I-SEL OPTIMIZATION PASS *******\n"
<< "********** Function: " << MF.getName() << '\n');
Expand Down Expand Up @@ -624,6 +657,14 @@ bool AIEPostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
Changed |= fixLoadMemOpInfo(MF, MBB, MF.getRegInfo());
}

// 5. Convert store flush instructions only on AIE2P targets: when VST.FLUSH
// and VST.PUSH.CONV are chained, replace VST.FLUSH with VST.FLUSH.CONV
if (MF.getTarget().getTargetTriple().isAIE2P()) {
for (MachineBasicBlock &MBB : MF) {
Changed |= modifyStoreFlush(MBB, MF.getRegInfo());
}
}

return Changed;
}

Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,27 @@ bool AIE2PInstrInfo::isGenericOffsetMemOpcode(unsigned Opcode) const {
(Opcode == AIE2P::G_AIE_OFFSET_ZEXTLOAD));
}

bool AIE2PInstrInfo::isFifoStoreConvOpcode(unsigned Opcode) const {
return ((Opcode == AIE2P::VST_PUSH_544_CONV_bfp16ebs16_ebs8) ||
(Opcode == AIE2P::VST_PUSH_544_CONV_bfp16ebs16_fp32) ||
(Opcode == AIE2P::VST_PUSH_576_CONV_bfp16ebs8_fp32));
}

std::optional<unsigned>
AIE2PInstrInfo::getStoreFlushConvOpcode(unsigned StoreFlushOpcode) const {
switch (StoreFlushOpcode) {
case AIE2P::VST_FLUSH_512_normal_flush:
return AIE2P::VST_FLUSH_512_CONV_normal_flush;
case AIE2P::VST_FLUSH_512_fifo_1d_flush:
return AIE2P::VST_FLUSH_512_CONV_fifo_1d_flush;
case AIE2P::VST_FLUSH_512_2D:
return AIE2P::VST_FLUSH_512_CONV_2D;
case AIE2P::VST_FLUSH_512_3D:
return AIE2P::VST_FLUSH_512_CONV_3D;
}
return std::nullopt;
}

std::optional<unsigned> AIE2PInstrInfo::getCombinedPostIncOpcode(
MachineInstr &BaseMemI, MachineInstr &PostIncI, TypeSize Size) const {
switch (PostIncI.getOpcode()) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo {
bool isBooleanNot(unsigned Opc) const override;
bool isConstStep(const MachineInstr &MI, int64_t &Step) const override;
bool isGenericOffsetMemOpcode(unsigned Opcode) const override;
bool isFifoStoreConvOpcode(unsigned Opcode) const override;

bool verifyGenericInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
Expand All @@ -97,6 +98,8 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo {
std::optional<unsigned>
getCombinedPostIncOpcode(MachineInstr &BaseMemI, MachineInstr &PtrAddI,
TypeSize Size) const override;
std::optional<unsigned>
getStoreFlushConvOpcode(unsigned StoreFlushOpcode) const override;
unsigned getOpCode(MachineInstr &MI) const override;
Register getVaddSignControlRegister() const override;

Expand Down
102 changes: 91 additions & 11 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector {
bool isWrite);
bool selectVST_FIFO(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectG_TRUNC(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVST_FIFO_CONV(MachineInstr &StoreI, MachineRegisterInfo &MRI);

static const char *getName() { return DEBUG_TYPE; }

Expand Down Expand Up @@ -3630,19 +3631,28 @@ std::optional<LoadStoreOpcodes> AIE2PInstructionSelector::getCombinedOpcodeCONV(
std::optional<APInt> Immediate) {
const bool AlwaysFitsImmediateRange = true;
const bool NoImmediate = false;
if (CombOp.getOpcode() != AIE2P::G_INTRINSIC_W_SIDE_EFFECTS ||
(cast<GIntrinsic>(CombOp).getIntrinsicID() !=
Intrinsic::aie2p_v16accfloat_to_v16bf16 &&
cast<GIntrinsic>(CombOp).getIntrinsicID() !=
Intrinsic::aie2p_v32accfloat_to_v32bf16))
return {};
if (CombOp.getOpcode() != AIE2P::G_INTRINSIC_W_SIDE_EFFECTS)
return std::nullopt;

const unsigned CombOpID = cast<GIntrinsic>(CombOp).getIntrinsicID();
switch (CombOpID) {
case Intrinsic::aie2p_v16accfloat_to_v16bf16:
case Intrinsic::aie2p_v32accfloat_to_v32bf16:
case Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs8:
case Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16:
case Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16:
break;
default:
return std::nullopt;
}

assert(((cast<GIntrinsic>(CombOp).getIntrinsicID() ==
Intrinsic::aie2p_v16accfloat_to_v16bf16 &&
assert(((CombOpID == Intrinsic::aie2p_v16accfloat_to_v16bf16 &&
getLoadStoreSize(MemOp) == 256) ||
(cast<GIntrinsic>(CombOp).getIntrinsicID() ==
Intrinsic::aie2p_v32accfloat_to_v32bf16 &&
getLoadStoreSize(MemOp) == 512)) &&
(CombOpID == Intrinsic::aie2p_v32accfloat_to_v32bf16 &&
getLoadStoreSize(MemOp) == 512) ||
(CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs8) ||
(CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16) ||
(CombOpID == Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16)) &&
"Unexpected VST.CONV size");

unsigned ISelOpcode;
Expand Down Expand Up @@ -3697,6 +3707,21 @@ std::optional<LoadStoreOpcodes> AIE2PInstructionSelector::getCombinedOpcodeCONV(
ISelOpcode = AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf;
return LoadStoreOpcodes{ISelOpcode, NoImmediate,
/*OffsetOpcode=*/{}};
case AIE2P::G_INTRINSIC_W_SIDE_EFFECTS:
switch (cast<GIntrinsic>(MemOp).getIntrinsicID()) {
case Intrinsic::aie2p_fifo_st_push_544_bfp16:
if (CombOpID == Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16)
ISelOpcode = AIE2P::VST_PUSH_544_CONV_bfp16ebs16_ebs8;
else /* CombOpID ==
Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16 */
ISelOpcode = AIE2P::VST_PUSH_544_CONV_bfp16ebs16_fp32;
return LoadStoreOpcodes{ISelOpcode, NoImmediate,
/*OffsetOpcode=*/{}};
case Intrinsic::aie2p_fifo_st_push_576_bfp16:
return LoadStoreOpcodes{AIE2P::VST_PUSH_576_CONV_bfp16ebs8_fp32,
NoImmediate,
/*OffsetOpcode=*/{}};
}
}
return {};
}
Expand Down Expand Up @@ -4905,6 +4930,57 @@ unsigned int getStoreFifoOpcode(MachineInstr &I) {
return AIE2P::INSTRUCTION_LIST_END;
}

bool AIE2PInstructionSelector::selectVST_FIFO_CONV(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {
Register ConvResult = StoreI.getOperand(5).getReg();
MachineInstr *ConvOp = getDefIgnoringCopiesAndBitcasts(ConvResult, MRI);
assert(ConvOp && "Expected SSA.");

if (!canCombineCONV(StoreI, *ConvOp) ||
StoreI.getParent() != ConvOp->getParent() || !MRI.hasOneUse(ConvResult))
return false;

const std::optional<APInt> NoImmediate = {};
std::optional<LoadStoreOpcodes> LSO =
getCombinedOpcodeCONV(StoreI, *ConvOp, NoImmediate);
assert(LSO && "Unexpected VST.FIFO.CONV combine failure");

Register PtrOut = StoreI.getOperand(0).getReg();
Register FifoOut = StoreI.getOperand(1).getReg();
Register AvailOut = StoreI.getOperand(2).getReg();

Register PtrIn = StoreI.getOperand(4).getReg();
Register FifoIn = StoreI.getOperand(7).getReg();
Register AvailIn = StoreI.getOperand(8).getReg();
Register SrcReg;

unsigned CombOpID = cast<GIntrinsic>(*ConvOp).getIntrinsicID();
if (CombOpID == Intrinsic::aie2p_v64bfp16ebs8_to_v64bfp16ebs16) {
Register MantIn = ConvOp->getOperand(3).getReg();
Register ExpIn = ConvOp->getOperand(4).getReg();
SrcReg = MRI.createVirtualRegister(&AIE2P::mEXaRegClass);
MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {SrcReg}, {})
.addReg(MantIn)
.addImm(AIE2P::sub_bfp16_x)
.addReg(ExpIn)
.addImm(AIE2P::sub_bfp16_e);
} else {
assert((CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs8 ||
CombOpID == Intrinsic::aie2p_v64accfloat_to_v64bfp16ebs16) &&
"Unexpected IntrinsicID in VST.FIFO.CONV combine");
SrcReg = ConvOp->getOperand(3).getReg();
}

auto NewInstr = MIB.buildInstr(LSO->ISelOpcode, {FifoOut, PtrOut, AvailOut},
{FifoIn, SrcReg, PtrIn, AvailIn});
NewInstr.cloneMemRefs(StoreI);

makeDeadMI(*ConvOp, MRI);
StoreI.eraseFromParent();

return constrainSelectedInstRegOperands(*NewInstr.getInstr(), TII, TRI, RBI);
}

bool AIE2PInstructionSelector::selectVST_FIFO(MachineInstr &I,
MachineRegisterInfo &MRI) {
auto IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
Expand All @@ -4928,6 +5004,10 @@ bool AIE2PInstructionSelector::selectVST_FIFO(MachineInstr &I,
}
case Intrinsic::aie2p_fifo_st_push_544_bfp16:
case Intrinsic::aie2p_fifo_st_push_576_bfp16: {
// First try to match CONV combine
if (selectVST_FIFO_CONV(I, MRI))
return true;

Register PtrIn = I.getOperand(4).getReg();
Register FifoIn = I.getOperand(7).getReg();
Register AvailIn = I.getOperand(8).getReg();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s

---
name: test_fifo_st_push_576_conv_accfloat_to_bfp16ebs8
tracksRegLiveness: true
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0, $dm0
; CHECK-LABEL: name: test_fifo_st_push_576_conv_accfloat_to_bfp16ebs8
; CHECK: liveins: $p0, $dm0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0
; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF
; CHECK-NEXT: [[VST_PUSH_576_CONV_bfp16ebs8_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_576_CONV_bfp16ebs8_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_576_CONV_bfp16ebs8_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd
; CHECK-NEXT: PseudoRET implicit $lr
%0:ptrregbank(p0) = COPY $p0
%1:accregbank(<64 x s32>) = COPY $dm0
%2:ptrregbank(p0) = IMPLICIT_DEF
%3:fiforegbank(<32 x s32>) = IMPLICIT_DEF
%4:gprregbank(s32) = IMPLICIT_DEF
%5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v64accfloat.to.v64bfp16ebs8), %1(<64 x s32>)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be testing the same intrinsic as the test below. I guess you meant to test aie2p.v64accfloat.to.v64bfp16ebs16 instead? Maybe adjust the test name to reflect the change as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have different stores in both tests aie2p.fifo.st.push.576.bfp16 and aie2p.fifo.st.push.544.bfp16 with same conv

%7:ptrregbank(p0), %8:fiforegbank(<32 x s32>), %9:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.st.push.576.bfp16), %2:ptrregbank(p0), %5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>), %3:fiforegbank(<32 x s32>), %4:gprregbank(s32)
PseudoRET implicit $lr
...

---
name: test_fifo_st_push_544_conv_accfloat_to_bfp16ebs8
tracksRegLiveness: true
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0, $dm0
; CHECK-LABEL: name: test_fifo_st_push_544_conv_accfloat_to_bfp16ebs8
; CHECK: liveins: $p0, $dm0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0
; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF
; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_fp32_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_fp32_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_fp32 [[DEF1]], [[COPY]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd
; CHECK-NEXT: PseudoRET implicit $lr
%0:ptrregbank(p0) = COPY $p0
%1:accregbank(<64 x s32>) = COPY $dm0
%2:ptrregbank(p0) = IMPLICIT_DEF
%3:fiforegbank(<32 x s32>) = IMPLICIT_DEF
%4:gprregbank(s32) = IMPLICIT_DEF
%5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v64accfloat.to.v64bfp16ebs8), %1(<64 x s32>)
%7:ptrregbank(p0), %8:fiforegbank(<32 x s32>), %9:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.st.push.544.bfp16), %2:ptrregbank(p0), %5:vregbank(<64 x s8>), %6:gprregbank(<8 x s8>), %3:fiforegbank(<32 x s32>), %4:gprregbank(s32)
PseudoRET implicit $lr
...

---
name: test_fifo_st_push_544_conv_bfp16ebs8_to_bfp16ebs16
tracksRegLiveness: true
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $e0, $p0, $x0
; CHECK-LABEL: name: test_fifo_st_push_544_conv_bfp16ebs8_to_bfp16ebs16
; CHECK: liveins: $e0, $p0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:el = COPY $e0
; CHECK-NEXT: [[DEF:%[0-9]+]]:mpfs = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:mstfifo = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:mr26_fifo_st = IMPLICIT_DEF
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec576 = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e
; CHECK-NEXT: [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_:%[0-9]+]]:mstfifo, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_1:%[0-9]+]]:mpfs, [[VST_PUSH_544_CONV_bfp16ebs16_ebs8_2:%[0-9]+]]:mr26_fifo_st = VST_PUSH_544_CONV_bfp16ebs16_ebs8 [[DEF1]], [[REG_SEQUENCE]], [[DEF]], [[DEF2]], implicit-def $srf2bflags, implicit-def $srfifo_of, implicit $crf2bmask, implicit $crrnd
; CHECK-NEXT: PseudoRET implicit $lr
%0:ptrregbank(p0) = COPY $p0
%1:vregbank(<64 x s8>) = COPY $x0
%2:gprregbank(<8 x s8>) = COPY $e0
%3:ptrregbank(p0) = IMPLICIT_DEF
%4:fiforegbank(<32 x s32>) = IMPLICIT_DEF
%5:gprregbank(s32) = IMPLICIT_DEF
%6:vregbank(<64 x s8>), %7:gprregbank(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v64bfp16ebs8.to.v64bfp16ebs16), %1(<64 x s8>), %2(<8 x s8>)
%8:ptrregbank(p0), %9:fiforegbank(<32 x s32>), %10:gprregbank(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.fifo.st.push.544.bfp16), %3:ptrregbank(p0), %6:vregbank(<64 x s8>), %7:gprregbank(<8 x s8>), %4:fiforegbank(<32 x s32>), %5:gprregbank(s32)
PseudoRET implicit $lr
...
Loading