Skip to content

Stuckmann.multi.slot.pseudos #304

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,23 @@ const MCSlotInfo *AIEBaseInstrInfo::getSlotInfo(const MCSlotKind Kind) const {
return FormatInterface->getSlotInfo(Kind);
}

bool AIEBaseInstrInfo::isMultiSlotPseudo(const MachineInstr &MI) const {
return MI.isPseudo() &&
getFormatInterface()->getAlternateInstsOpcode(MI.getOpcode());
}

std::optional<unsigned>
AIEBaseInstrInfo::getSlotOpcode(const MCSlotKind Slot,
const MachineInstr &MI) const {
assert(isMultiSlotPseudo(MI));
for (const auto &OpCode :
*getFormatInterface()->getAlternateInstsOpcode(MI.getOpcode())) {
if (getSlotKind(OpCode) == Slot)
return OpCode;
}
return {};
}

const PacketFormats &AIEBaseInstrInfo::getPacketFormats() const {
return FormatInterface->getPacketFormats();
}
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
/// slot will be the default one (unknown).
MCSlotKind getSlotKind(unsigned Opcode) const;
virtual const MCSlotInfo *getSlotInfo(const MCSlotKind Kind) const;
/// \return Opcode of multi-slot pseudo \p MI that runs in \p Slot
std::optional<unsigned> getSlotOpcode(const MCSlotKind Slot,
const MachineInstr &MI) const;

/// \return wether \p MI is a multi-slot pseudo instruction
bool isMultiSlotPseudo(const MachineInstr &MI) const;

/// Return the Packet formats for this target
virtual const PacketFormats &getPacketFormats() const;
/// Return a nop of the given byte size, or the smallest if zero.
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "AIELiveRegs.h"
#include "AIEMachineScheduler.h"
#include "AIEMaxLatencyFinder.h"
#include "AIEMultiSlotInstrMaterializer.h"
#include "Utils/AIELoopUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
Expand Down Expand Up @@ -60,6 +61,11 @@ static cl::opt<int> PostPipelinerMaxII(
"aie-postpipeliner-maxii", cl::init(40),
cl::desc("[AIE] Maximum II to be tried in the post-ra pipeliner"));

static cl::opt<bool> EnableMultiSlotInstrMaterialization(
"aie-multi-slot-pseudo-instr", cl::Hidden, cl::init(false),
cl::desc("Statically materialize Multi-Slot Pseudo Instructions in "
"loops."));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious: Why not enable this by default?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had regressions with conv2d_1 in aie2 because the post-pipeliner was tripping up, I will check on QoR for aie2p if this still remains

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But maybe #359 might fix the issue. I'll run QoR with it merged


namespace llvm::AIE {

void dumpInterBlock(const InterBlockEdges &Edges) {
Expand Down Expand Up @@ -586,7 +592,7 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
// But first try SWP
if (BS.getRegions().size() == 1) {
auto &PostSWP = BS.getPostSWP();
if (PostSWP.canAccept(*BS.TheBlock)) {
if (PostSWP.isPostPipelineCandidate(*BS.TheBlock)) {
BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock);
return BS.FixPoint.Stage = SchedulingStage::Pipelining;
}
Expand Down Expand Up @@ -1161,6 +1167,11 @@ void BlockState::initInterBlock(const MachineSchedContext &Context,
// Don't worry, this just constructs a mostly empty container class
auto NumInstrs = getTop().getFreeInstructions().size();
PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs);

// perform static assignment of multi-slot pseudos
if (EnableMultiSlotInstrMaterialization &&
PostSWP->isPostPipelineCandidate(*TheBlock))
staticallyMaterializeMultiSlotInstructions(*TheBlock, HR);
}

// We are called just after the first round of scheduling a block.
Expand Down
229 changes: 229 additions & 0 deletions llvm/lib/Target/AIE/AIEMultiSlotInstrMaterializer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
//===--- AIEMultiSlotInstrMaterializer.cpp - -Multi Slot Instr materializer===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
// \file assigns an issue slot to multi-slot pseudo instructions within a single
// block loop to help loop pipelining.
//
//===----------------------------------------------------------------------===//

#include "AIEMultiSlotInstrMaterializer.h"
#include "AIEHazardRecognizer.h"

using namespace llvm;

#define DEBUG_TYPE "aie-multi-slot-pseudo"

namespace llvm::AIE {

class SlotMapping {
public:
SlotMapping(const AIEBaseInstrInfo *TII) : TII(TII) {}

/// update \p MemBankBits assigned to \p Slot . Create the Slot mapping, if
/// necessary.
void update(const MCSlotKind &Slot, const MemoryBankBits MemBankBits) {
SlotToBanks[Slot] |= MemBankBits;
}

/// \return first Slot where MemoryBankBits overlap with \p MemBankBits .
std::optional<MCSlotKind>
getAssignedSlot(const MemoryBankBits MemBankBits) const {
auto IT =
find_if(SlotToBanks,
[MemBankBits](
const std::pair<MCSlotKind, MemoryBankBits> &SlotBankPair) {
auto [Slot, Banks] = SlotBankPair;
return (Banks & MemBankBits) != 0;
});

if (IT == SlotToBanks.end())
return {};

const auto Slot = IT->first;
return Slot;
}

/// \return whether no MemoryBank has multiple Slots assigned to it in the
/// current mapping.
bool hasUniqueSlotForBank() const {
MemoryBankBits AccumulatedBanks = {};
for (auto &[Slot, Banks] : SlotToBanks) {
if (Banks & AccumulatedBanks) {
LLVM_DEBUG(dbgs() << "Conflict detected at Slot " << Slot << "\n");
return false;
}
AccumulatedBanks |= Banks;
}
return true;
}

/// \return whether a Slot can be assigned to \b MI and assign it in the
/// mapping.
bool assignSlot(const MachineInstr &MI, const AIEHazardRecognizer &HR) {
auto MemBankBits = HR.getMemoryBanks(&MI);
LLVM_DEBUG(dbgs() << "Memory Bank: " << MemBankBits << " " << MI);
if (!MemBankBits) {
LLVM_DEBUG(dbgs() << "Warning: No MemoryBanks assigned to " << MI);
return false;
}

std::optional<MCSlotKind> SelectedSlot = getAssignedSlot(MemBankBits);
if (!SelectedSlot)
SelectedSlot = getUnusedLoadSlot();
if (!SelectedSlot) {
LLVM_DEBUG(dbgs() << "Reassigning existing Slot to MemoryBankBits "
<< MemBankBits << "\n");
SelectedSlot = getLeastRecentlyUsedSlot();
}

update(*SelectedSlot, MemBankBits);

return true;
}

private:
/// Mapping between a Slot and the MemoryBanks that occupy the Slot.
std::map<MCSlotKind, MemoryBankBits> SlotToBanks;
/// If Slots have to be reassigned (because every Slot has already been
/// assigned to a Memory Bank), use an Index to cycle through already
/// used Slots.
unsigned ReassignIndex = 0;
const AIEBaseInstrInfo *TII;

/// \return an unused Slot from the mapping.
std::optional<MCSlotKind> getUnusedLoadSlot() const {
const SmallVector<MCSlotKind, 2> LoadSlots =
TII->getFormatInterface()->getLoadSlotKinds();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: just LoadSlots?

Nit2: Maybe also rename the function to getUnusedLoadSlot(), because this is only to be used for multi-slot instructions that load.

It might actually just be better to iterate over the slots of the multi-slot instruction directly? I'm not sure why we need getLoadSlotKinds()


for (const auto &Slot : LoadSlots) {

// check if Slot is already used in SlotMemBankBitsMap
auto FoundSlot = SlotToBanks.find(Slot);
if (FoundSlot != SlotToBanks.end())
continue;

LLVM_DEBUG(dbgs() << " Found Unused Slot " << Slot << "\n");
return Slot;
}

// no slots were assigned yet, assign first Slot.
// FIXME: use a heuristic that takes Slots utilization and
// utilization of MemoryBanks into consideration.
return LoadSlots[0];
}

/// Cycle through load Slots and \return an already used Slot
/// FIXME: use a heuristic that takes utilization into account, instead of
/// blindly cycling through the Slots.
std::optional<MCSlotKind> getLeastRecentlyUsedSlot() {
const auto AvailableSlots = TII->getFormatInterface()->getLoadSlotKinds();

if (ReassignIndex >= AvailableSlots.size())
ReassignIndex = 0;

return AvailableSlots[ReassignIndex++];
}
};

/// \return a map between Slots and the MemoryBanks that occurs within \p MBB .
SlotMapping getAssignedSlots(const MachineBasicBlock &MBB,
const AIEBaseInstrInfo *TII,
const AIEHazardRecognizer &HR) {
SlotMapping SlotToBanks(TII);

LLVM_DEBUG(dbgs() << "Collecting any already materialized Slot to MemoryBank "
"assignments\n");
for (const auto &MI : MBB) {
if (!MI.mayLoad() || TII->isMultiSlotPseudo(MI))
continue;

const auto Slot = TII->getSlotKind(MI.getOpcode());
const MemoryBankBits MemBankBits = HR.getMemoryBanks(&MI);
LLVM_DEBUG(dbgs() << "Slot: " << Slot << " MemoryBank: " << MemBankBits
<< " on " << MI);

SlotToBanks.update(Slot, MemBankBits);
}
return SlotToBanks;
}

/// \return whether a valid assignment of Slots to MemoryBankBits is found.
/// Multi-Slot pseudo load instructions in \p MBB get a Slot assigned, according
/// to the MemoyBankBits that is attached to the MachineInstr. Existing mappings
/// in \p SlotToBanks are used and updated.
bool assignSlots(SlotMapping &SlotToBanks, const MachineBasicBlock &MBB,
const AIEBaseInstrInfo *TII, const AIEHazardRecognizer &HR) {
for (const auto &MI : MBB) {
if (!MI.mayLoad() || !TII->isMultiSlotPseudo(MI))
continue;

if (!SlotToBanks.assignSlot(MI, HR)) {
return false;
}
}

return SlotToBanks.hasUniqueSlotForBank();
}

/// Materialize \p MI with a Slot according to overlapping MemoryBankBits
/// between \p MI and the Slot mapping in \p SlotToBanks .
void materializeInstr(MachineInstr &MI, const SlotMapping &SlotToBanks,
const AIEBaseInstrInfo *TII,
const AIEHazardRecognizer &HR) {
auto MemBankBits = HR.getMemoryBanks(&MI);
assert(MemBankBits && "No MemoryBanks attached to MachineInstr.");

const auto Slot = SlotToBanks.getAssignedSlot(MemBankBits);
assert(Slot && "Could not find Slot for MemoryBank!");

auto OpCode = TII->getSlotOpcode(*Slot, MI);
assert(OpCode && "Failed to retrieve a valid Opcode");

MI.setDesc(TII->get(*OpCode));
LLVM_DEBUG(dbgs() << "Assigned " << *Slot << " to " << MI);
}

/// Materialize multi-slot pseudo instructions in \p MBB according to
/// overlapping MemoryBankBits between a MachineInstr and the Slot mapping in
/// \p SlotToBanks .
void materializeSlots(const SlotMapping &SlotToBanks, MachineBasicBlock &MBB,
const AIEBaseInstrInfo *TII,
const AIEHazardRecognizer &HR) {
LLVM_DEBUG(dbgs() << "\nAssigning Slots to MachineInstr\n");

for (auto &MI : MBB) {
if (!MI.mayLoad() || !TII->isMultiSlotPseudo(MI))
continue;

materializeInstr(MI, SlotToBanks, TII, HR);
}
}

void staticallyMaterializeMultiSlotInstructions(MachineBasicBlock &MBB,
const AIEHazardRecognizer &HR) {
LLVM_DEBUG(dbgs() << "Statically Assigning multi slot pseudos for "
<< MBB.getName() << "\n");

const AIEBaseInstrInfo *TII = static_cast<const AIEBaseInstrInfo *>(
MBB.getParent()->getSubtarget().getInstrInfo());

auto SlotToBanks = getAssignedSlots(MBB, TII, HR);

if (!assignSlots(SlotToBanks, MBB, TII, HR)) {
LLVM_DEBUG(
dbgs()
<< "Could not find Slot Assignments, Skipping materialization\n");
return;
}

materializeSlots(SlotToBanks, MBB, TII, HR);
}
} // namespace llvm::AIE
//
29 changes: 29 additions & 0 deletions llvm/lib/Target/AIE/AIEMultiSlotInstrMaterializer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===--- AIEMultiSlotInstrMaterializer.h -Multi Slot Instr materializer----===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
// \file assigns an issue-slot to multi slot pseudo instructions within a single
// block loop to help loop pipelining.
//
//===----------------------------------------------------------------------===//
#include "AIEBaseInstrInfo.h"

namespace llvm {
class AIEHazardRecognizer;
}

namespace llvm::AIE {

/// Statically assign and materialize Slots to multi-slot pseudo MachineInstr in
/// \p MBB .
/// FIXME: Currently we are only handling multi-slot memory load pseudos.
void staticallyMaterializeMultiSlotInstructions(MachineBasicBlock &MBB,
const AIEHazardRecognizer &HR);

} // namespace llvm::AIE
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class PostPipelineDumper : public PipelineScheduleVisitor {
PostPipeliner::PostPipeliner(const AIEHazardRecognizer &HR, int NInstr)
: HR(HR), NInstr(NInstr) {}

bool PostPipeliner::canAccept(MachineBasicBlock &LoopBlock) {
bool PostPipeliner::isPostPipelineCandidate(MachineBasicBlock &LoopBlock) {
// We leave the single-block loop criterion to our caller. It is fulfilled
// by being a loopaware scheduling candidate.
// First get us some instruments
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AIE/AIEPostPipeliner.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,10 @@ class PostPipeliner {

/// Check whether this is a suitable loop for the PostPipeliner. It also
/// leaves some useful information.
bool canAccept(MachineBasicBlock &LoopBlock);
bool isPostPipelineCandidate(MachineBasicBlock &LoopBlock);

/// Get a lowerbound for the II required to accommodate the slots.
/// \pre canAccept has returned true
/// \pre isPostPipelineCandidate has returned true
int getResMII(MachineBasicBlock &LoopBlock);

// Schedule using the given InitiationInterval. Return true when successful.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ add_llvm_target(AIECodeGen
AIEMaxLatencyFinder.cpp
AIEMCInstLower.cpp
AIEMIRFormatter.cpp
AIEMultiSlotInstrMaterializer.cpp
AIEPostPipeliner.cpp
AIEPostSelectOptimize.cpp
AIEPseudoBranchExpansion.cpp
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/MCTargetDesc/AIE2MCFormats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,8 @@ const MCFormatDesc *AIE2MCFormats::getMCFormats() const {

const PacketFormats &AIE2MCFormats::getPacketFormats() const { return Formats; }

SmallVector<MCSlotKind, 2> AIE2MCFormats::getLoadSlotKinds() const {
return {AIE2SlotKind::AIE2_SLOT_LDB, AIE2SlotKind::AIE2_SLOT_LDA};
}

} // end namespace llvm
Loading