-
Notifications
You must be signed in to change notification settings - Fork 25
Stuckmann.multi.slot.pseudos #304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
#include "AIELiveRegs.h" | ||
#include "AIEMachineScheduler.h" | ||
#include "AIEMaxLatencyFinder.h" | ||
#include "AIEMultiSlotInstrMaterializer.h" | ||
#include "Utils/AIELoopUtils.h" | ||
#include "llvm/ADT/PostOrderIterator.h" | ||
#include "llvm/CodeGen/MachineBasicBlock.h" | ||
|
@@ -60,6 +61,11 @@ static cl::opt<int> PostPipelinerMaxII( | |
"aie-postpipeliner-maxii", cl::init(40), | ||
cl::desc("[AIE] Maximum II to be tried in the post-ra pipeliner")); | ||
|
||
static cl::opt<bool> EnableMultiSlotInstrMaterialization( | ||
"aie-multi-slot-pseudo-instr", cl::Hidden, cl::init(false), | ||
cl::desc("Statically materialize Multi-Slot Pseudo Instructions in " | ||
"loops.")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious: Why not enable this by default? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had regressions with conv2d_1 in aie2 because the post-pipeliner was tripping up, I will check on QoR for aie2p if this still remains There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But maybe #359 might fix the issue. I'll run QoR with it merged |
||
|
||
namespace llvm::AIE { | ||
|
||
void dumpInterBlock(const InterBlockEdges &Edges) { | ||
|
@@ -586,7 +592,7 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) { | |
// But first try SWP | ||
if (BS.getRegions().size() == 1) { | ||
auto &PostSWP = BS.getPostSWP(); | ||
if (PostSWP.canAccept(*BS.TheBlock)) { | ||
if (PostSWP.isPostPipelineCandidate(*BS.TheBlock)) { | ||
BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock); | ||
return BS.FixPoint.Stage = SchedulingStage::Pipelining; | ||
} | ||
|
@@ -1161,6 +1167,11 @@ void BlockState::initInterBlock(const MachineSchedContext &Context, | |
// Don't worry, this just constructs a mostly empty container class | ||
auto NumInstrs = getTop().getFreeInstructions().size(); | ||
PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs); | ||
|
||
// perform static assignment of multi-slot pseudos | ||
if (EnableMultiSlotInstrMaterialization && | ||
PostSWP->isPostPipelineCandidate(*TheBlock)) | ||
staticallyMaterializeMultiSlotInstructions(*TheBlock, HR); | ||
} | ||
|
||
// We are called just after the first round of scheduling a block. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
//===--- AIEMultiSlotInstrMaterializer.cpp - -Multi Slot Instr materializer===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// \file assigns an issue slot to multi-slot pseudo instructions within a single | ||
// block loop to help loop pipelining. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "AIEMultiSlotInstrMaterializer.h" | ||
#include "AIEHazardRecognizer.h" | ||
|
||
using namespace llvm; | ||
|
||
#define DEBUG_TYPE "aie-multi-slot-pseudo" | ||
|
||
namespace llvm::AIE { | ||
|
||
class SlotMapping { | ||
public: | ||
SlotMapping(const AIEBaseInstrInfo *TII) : TII(TII) {} | ||
|
||
/// update \p MemBankBits assigned to \p Slot . Create the Slot mapping, if | ||
/// necessary. | ||
void update(const MCSlotKind &Slot, const MemoryBankBits MemBankBits) { | ||
SlotToBanks[Slot] |= MemBankBits; | ||
} | ||
|
||
/// \return first Slot where MemoryBankBits overlap with \p MemBankBits . | ||
std::optional<MCSlotKind> | ||
getAssignedSlot(const MemoryBankBits MemBankBits) const { | ||
auto IT = | ||
find_if(SlotToBanks, | ||
[MemBankBits]( | ||
const std::pair<MCSlotKind, MemoryBankBits> &SlotBankPair) { | ||
auto [Slot, Banks] = SlotBankPair; | ||
return (Banks & MemBankBits) != 0; | ||
}); | ||
|
||
if (IT == SlotToBanks.end()) | ||
return {}; | ||
|
||
const auto Slot = IT->first; | ||
return Slot; | ||
} | ||
|
||
/// \return whether no MemoryBank has multiple Slots assigned to it in the | ||
/// current mapping. | ||
bool hasUniqueSlotForBank() const { | ||
MemoryBankBits AccumulatedBanks = {}; | ||
for (auto &[Slot, Banks] : SlotToBanks) { | ||
if (Banks & AccumulatedBanks) { | ||
LLVM_DEBUG(dbgs() << "Conflict detected at Slot " << Slot << "\n"); | ||
return false; | ||
} | ||
AccumulatedBanks |= Banks; | ||
} | ||
return true; | ||
F-Stuckmann marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/// \return whether a Slot can be assigned to \b MI and assign it in the | ||
/// mapping. | ||
bool assignSlot(const MachineInstr &MI, const AIEHazardRecognizer &HR) { | ||
auto MemBankBits = HR.getMemoryBanks(&MI); | ||
LLVM_DEBUG(dbgs() << "Memory Bank: " << MemBankBits << " " << MI); | ||
if (!MemBankBits) { | ||
LLVM_DEBUG(dbgs() << "Warning: No MemoryBanks assigned to " << MI); | ||
return false; | ||
} | ||
|
||
std::optional<MCSlotKind> SelectedSlot = getAssignedSlot(MemBankBits); | ||
if (!SelectedSlot) | ||
SelectedSlot = getUnusedLoadSlot(); | ||
if (!SelectedSlot) { | ||
LLVM_DEBUG(dbgs() << "Reassigning existing Slot to MemoryBankBits " | ||
<< MemBankBits << "\n"); | ||
SelectedSlot = getLeastRecentlyUsedSlot(); | ||
} | ||
|
||
update(*SelectedSlot, MemBankBits); | ||
|
||
return true; | ||
} | ||
|
||
private: | ||
/// Mapping between a Slot and the MemoryBanks that occupy the Slot. | ||
std::map<MCSlotKind, MemoryBankBits> SlotToBanks; | ||
/// If Slots have to be reassigned (because every Slot has already been | ||
/// assigned to a Memory Bank), use an Index to cycle through already | ||
/// used Slots. | ||
unsigned ReassignIndex = 0; | ||
const AIEBaseInstrInfo *TII; | ||
|
||
/// \return an unused Slot from the mapping. | ||
std::optional<MCSlotKind> getUnusedLoadSlot() const { | ||
const SmallVector<MCSlotKind, 2> LoadSlots = | ||
TII->getFormatInterface()->getLoadSlotKinds(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: just Nit2: Maybe also rename the function to It might actually just be better to iterate over the slots of the multi-slot instruction directly? I'm not sure why we need |
||
|
||
for (const auto &Slot : LoadSlots) { | ||
|
||
// check if Slot is already used in SlotMemBankBitsMap | ||
auto FoundSlot = SlotToBanks.find(Slot); | ||
if (FoundSlot != SlotToBanks.end()) | ||
continue; | ||
|
||
LLVM_DEBUG(dbgs() << " Found Unused Slot " << Slot << "\n"); | ||
return Slot; | ||
} | ||
|
||
// no slots were assigned yet, assign first Slot. | ||
// FIXME: use a heuristic that takes Slots utilization and | ||
// utilization of MemoryBanks into consideration. | ||
return LoadSlots[0]; | ||
} | ||
|
||
/// Cycle through load Slots and \return an already used Slot | ||
/// FIXME: use a heuristic that takes utilization into account, instead of | ||
/// blindly cycling through the Slots. | ||
std::optional<MCSlotKind> getLeastRecentlyUsedSlot() { | ||
const auto AvailableSlots = TII->getFormatInterface()->getLoadSlotKinds(); | ||
|
||
if (ReassignIndex >= AvailableSlots.size()) | ||
ReassignIndex = 0; | ||
|
||
return AvailableSlots[ReassignIndex++]; | ||
} | ||
}; | ||
|
||
/// \return a map between Slots and the MemoryBanks that occurs within \p MBB . | ||
SlotMapping getAssignedSlots(const MachineBasicBlock &MBB, | ||
const AIEBaseInstrInfo *TII, | ||
const AIEHazardRecognizer &HR) { | ||
SlotMapping SlotToBanks(TII); | ||
|
||
LLVM_DEBUG(dbgs() << "Collecting any already materialized Slot to MemoryBank " | ||
"assignments\n"); | ||
for (const auto &MI : MBB) { | ||
if (!MI.mayLoad() || TII->isMultiSlotPseudo(MI)) | ||
F-Stuckmann marked this conversation as resolved.
Show resolved
Hide resolved
|
||
continue; | ||
|
||
const auto Slot = TII->getSlotKind(MI.getOpcode()); | ||
const MemoryBankBits MemBankBits = HR.getMemoryBanks(&MI); | ||
LLVM_DEBUG(dbgs() << "Slot: " << Slot << " MemoryBank: " << MemBankBits | ||
<< " on " << MI); | ||
|
||
SlotToBanks.update(Slot, MemBankBits); | ||
} | ||
return SlotToBanks; | ||
} | ||
|
||
/// \return whether a valid assignment of Slots to MemoryBankBits is found. | ||
/// Multi-Slot pseudo load instructions in \p MBB get a Slot assigned, according | ||
/// to the MemoyBankBits that is attached to the MachineInstr. Existing mappings | ||
/// in \p SlotToBanks are used and updated. | ||
bool assignSlots(SlotMapping &SlotToBanks, const MachineBasicBlock &MBB, | ||
const AIEBaseInstrInfo *TII, const AIEHazardRecognizer &HR) { | ||
for (const auto &MI : MBB) { | ||
if (!MI.mayLoad() || !TII->isMultiSlotPseudo(MI)) | ||
continue; | ||
|
||
if (!SlotToBanks.assignSlot(MI, HR)) { | ||
return false; | ||
} | ||
} | ||
|
||
return SlotToBanks.hasUniqueSlotForBank(); | ||
} | ||
|
||
/// Materialize \p MI with a Slot according to overlapping MemoryBankBits | ||
/// between \p MI and the Slot mapping in \p SlotToBanks . | ||
void materializeInstr(MachineInstr &MI, const SlotMapping &SlotToBanks, | ||
const AIEBaseInstrInfo *TII, | ||
const AIEHazardRecognizer &HR) { | ||
auto MemBankBits = HR.getMemoryBanks(&MI); | ||
assert(MemBankBits && "No MemoryBanks attached to MachineInstr."); | ||
|
||
const auto Slot = SlotToBanks.getAssignedSlot(MemBankBits); | ||
assert(Slot && "Could not find Slot for MemoryBank!"); | ||
|
||
auto OpCode = TII->getSlotOpcode(*Slot, MI); | ||
assert(OpCode && "Failed to retrieve a valid Opcode"); | ||
|
||
MI.setDesc(TII->get(*OpCode)); | ||
LLVM_DEBUG(dbgs() << "Assigned " << *Slot << " to " << MI); | ||
} | ||
|
||
/// Materialize multi-slot pseudo instructions in \p MBB according to | ||
/// overlapping MemoryBankBits between a MachineInstr and the Slot mapping in | ||
/// \p SlotToBanks . | ||
void materializeSlots(const SlotMapping &SlotToBanks, MachineBasicBlock &MBB, | ||
const AIEBaseInstrInfo *TII, | ||
const AIEHazardRecognizer &HR) { | ||
LLVM_DEBUG(dbgs() << "\nAssigning Slots to MachineInstr\n"); | ||
|
||
for (auto &MI : MBB) { | ||
if (!MI.mayLoad() || !TII->isMultiSlotPseudo(MI)) | ||
continue; | ||
|
||
materializeInstr(MI, SlotToBanks, TII, HR); | ||
} | ||
} | ||
|
||
void staticallyMaterializeMultiSlotInstructions(MachineBasicBlock &MBB, | ||
const AIEHazardRecognizer &HR) { | ||
LLVM_DEBUG(dbgs() << "Statically Assigning multi slot pseudos for " | ||
<< MBB.getName() << "\n"); | ||
|
||
const AIEBaseInstrInfo *TII = static_cast<const AIEBaseInstrInfo *>( | ||
MBB.getParent()->getSubtarget().getInstrInfo()); | ||
|
||
auto SlotToBanks = getAssignedSlots(MBB, TII, HR); | ||
|
||
if (!assignSlots(SlotToBanks, MBB, TII, HR)) { | ||
LLVM_DEBUG( | ||
dbgs() | ||
<< "Could not find Slot Assignments, Skipping materialization\n"); | ||
return; | ||
} | ||
|
||
materializeSlots(SlotToBanks, MBB, TII, HR); | ||
} | ||
} // namespace llvm::AIE | ||
// |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
//===--- AIEMultiSlotInstrMaterializer.h -Multi Slot Instr materializer----===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// \file assigns an issue-slot to multi slot pseudo instructions within a single | ||
// block loop to help loop pipelining. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#include "AIEBaseInstrInfo.h" | ||
|
||
namespace llvm { | ||
class AIEHazardRecognizer; | ||
} | ||
|
||
namespace llvm::AIE { | ||
|
||
/// Statically assign and materialize Slots to multi-slot pseudo MachineInstr in | ||
/// \p MBB . | ||
/// FIXME: Currently we are only handling multi-slot memory load pseudos. | ||
void staticallyMaterializeMultiSlotInstructions(MachineBasicBlock &MBB, | ||
const AIEHazardRecognizer &HR); | ||
|
||
} // namespace llvm::AIE |
Uh oh!
There was an error while loading. Please reload this page.