Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIEX] Delay metalizing of multi-slot until iterative scheduling is converged #182

Open
wants to merge 2 commits into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/lib/Target/AIE/AIE2MultiSlotPseudoInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, Itinerary = II_

// Pseudo VLD
let hasSideEffects = false, mayLoad = true, mayStore = false in {
let Itinerary = II_VLDA_W in {
let Itinerary = II_VLDB in {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you give more details about this change? Why does this impact the postmisched? It should not be impacted by the resources of the multi-slot itinerary, because when checking or adding to the scoreboard, the final itinerary should be used.

def VLD_idx_pseudo : MultiSlot_Pseudo< (outs mWa:$dst),
(ins eP:$ptr, eDJ:$dj),
"vld_idx_pseudo", "$dst, [$ptr, $dj]",
Expand All @@ -83,7 +83,7 @@ let hasSideEffects = false, mayLoad = true, mayStore = false in {
"vld_idx_imm_imm3x32_pseudo", "$dst, [$ptr, $imm]",
[VLDB_dmw_ldb_ag_idx_imm, VLDA_dmw_lda_w_ag_idx_imm]>;
}
let Itinerary = II_VLDA_POSTINC_W in
let Itinerary = II_VLDB_POSTINC in
let Constraints = "$ptr_out = $ptr" in {
def VLD_pstm_pseudo : MultiSlot_Pseudo< (outs mWa:$dst, eP:$ptr_out),
(ins eP:$ptr, eM:$mod),
Expand All @@ -94,14 +94,14 @@ let hasSideEffects = false, mayLoad = true, mayStore = false in {
"vld_pstm_imm_4x32_pseudo", "$dst, [$ptr], $imm",
[VLDB_dmw_ldb_ag_pstm_nrm_imm, VLDA_dmw_lda_w_ag_pstm_nrm_imm]>;
}
let Itinerary = II_VLDA_2D_W in
let Itinerary = II_VLDB_2D in
let Constraints = "$ptr_out = $ptr" in {
def VLD_2D_pseudo : MultiSlot_Pseudo< (outs mWa:$dst, eP:$ptr_out, eDC:$count_out),
(ins eP:$ptr, eD:$mod),
"vld.2d_pseudo", "$dst, [$ptr], $mod",
[VLDB_2D, VLDA_2D_dmw_lda_w]>, AIE_HasTiedSubregister;
}
let Itinerary = II_VLDA_3D_W in
let Itinerary = II_VLDB_3D in
let Constraints = "$ptr_out = $ptr" in {
def VLD_3D_pseudo : MultiSlot_Pseudo< (outs mWa:$dst, eP:$ptr_out, eDC:$count_lo_out, eDC:$count_hi_out),
(ins eP:$ptr, eDS:$mod),
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIEAlternateDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class AIEAlternateDescriptors {
AIEAlternateDescriptors() = default;
~AIEAlternateDescriptors() = default;

MIAltDescsMap::const_iterator begin() const { return AlternateDescs.begin(); }
MIAltDescsMap::const_iterator end() const { return AlternateDescs.end(); }

// Construct an alternate descriptor with the given alternate descriptors.
AIEAlternateDescriptors(const MIAltDescsMap &AltDescs)
: AlternateDescs(AltDescs) {}
Expand All @@ -43,6 +46,10 @@ class AIEAlternateDescriptors {
AlternateDescs[MI] = &TII->get(AltInstOpcode);
}

void setAlternateDescriptor(MachineInstr *MI, const MCInstrDesc *AltDesc) {
AlternateDescs[MI] = AltDesc;
}

// Return the alternate descriptor for the given multi-opcode instruction.
std::optional<const MCInstrDesc *>
getSelectedDescriptor(MachineInstr *MI) const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/AIEHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ ScheduleHazardRecognizer::HazardType AIEHazardRecognizer::getHazardType(
bool AIEHazardRecognizer::checkConflict(
const ResourceScoreboard<FuncUnitWrapper> &Scoreboard, MachineInstr &MI,
int DeltaCycles) const {
const MCInstrDesc &Desc = MI.getDesc();
const MCInstrDesc &Desc = *SelectedAltDescs.getDesc(&MI);
const unsigned SchedClass =
TII->getSchedClass(Desc, MI.operands(), MI.getMF()->getRegInfo());
const MemoryBankBits MemoryBanks = getMemoryBanks(&MI);
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ namespace {
/// into the appropriate blockstate region.
/// TimedRegion is built one bundle at the time
class PipelineExtractor : public PipelineScheduleVisitor {
AIEAlternateDescriptors &AlternateDesc;
BlockState &Loop;
BlockState *Prologue = nullptr;
BlockState *Epilogue = nullptr;
Expand Down Expand Up @@ -334,14 +335,20 @@ class PipelineExtractor : public PipelineScheduleVisitor {
// Prologue and epilogue obtain copies.
MachineInstr *ToBeEmitted =
InLoop ? MI : Loop.TheBlock->getParent()->CloneMachineInstr(MI);
gbossu marked this conversation as resolved.
Show resolved Hide resolved
krishnamtibrewala marked this conversation as resolved.
Show resolved Hide resolved
CurrentBundle.add(ToBeEmitted);
// We force the prologue/epilogue copies to have the same descriptor as the
// original instruction in the steady state.
if (auto AltDesc = AlternateDesc.getSelectedDescriptor(MI))
AlternateDesc.setAlternateDescriptor(ToBeEmitted, AltDesc.value());

CurrentBundle.add(ToBeEmitted, AlternateDesc.getOpcode(MI));
}
void endBundle() override { TimedRegion.emplace_back(CurrentBundle); }

public:
PipelineExtractor(InterBlockScheduling &InterBlock, BlockState &BS,
const AIEBaseInstrInfo &TII)
: Loop(BS), CurrentBundle(TII.getFormatInterface()) {
: AlternateDesc(InterBlock.getSelectedAltDescs()), Loop(BS),
CurrentBundle(TII.getFormatInterface()) {
MachineBasicBlock *LoopBlock = Loop.TheBlock;
for (auto *P : LoopBlock->predecessors()) {
if (P == LoopBlock) {
Expand Down
38 changes: 18 additions & 20 deletions llvm/lib/Target/AIE/AIEMachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ static cl::opt<bool> PreSchedFollowsSkipPipeliner(
"aie-presched-follows-skip-pipeliner", cl::init(true),
cl::desc("Don't run the prescheduler if the pipeliner is skipped"));

static cl::opt<bool> ReAssignMultiSlotInstr(
"aie-reassign-multislot-instr", cl::init(true),
cl::desc("Re-assign multi-slot instructions during iterative scheduling"));

namespace {
// A sentinel value to represent an unknown SUnit.
const constexpr unsigned UnknownSUNum = ~0;
Expand Down Expand Up @@ -277,6 +281,9 @@ void AIEPostRASchedStrategy::initializeBotScoreBoard(ScoreboardTrust Trust) {
/// make sure we always have enough lookahead available. We arrange for that
/// by starting in the earliest possible cycle, -Depth
auto InsertInCycle = [=](MachineInstr &MI, int Cycle) {
assert(BotHazardRec->getSelectedAltDescs().getSelectedDescriptor(&MI) ==
std::nullopt &&
"Instructions opcode are already materialized");
BotHazardRec->emitInScoreboard(
MI.getDesc(), BotHazardRec->getMemoryBanks(&MI), MI.operands(),
MI.getMF()->getRegInfo(), Cycle - Depth);
Expand Down Expand Up @@ -607,6 +614,9 @@ void AIEPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
void AIEPostRASchedStrategy::commitBlockSchedule(MachineBasicBlock *BB) {
auto &BS = InterBlock.getBlockState(BB);

if (ReAssignMultiSlotInstr)
materializeMultiSlotInstrs();

// TODO: Update assert when the fixed instructions become part of the
// scheduling region.
assert(BS.getRegions().empty() ||
Expand Down Expand Up @@ -673,8 +683,8 @@ void AIEPostRASchedStrategy::leaveRegion(const SUnit &ExitSU) {
if (BS.FixPoint.Stage != SchedulingStage::Scheduling) {
return;
}
materializeMultiOpcodeInstrs();
InterBlock.getSelectedAltDescs().clear();
if (!ReAssignMultiSlotInstr)
materializeMultiSlotInstrs();
if (IsBottomRegion) {
// This is the earliest point where we can destroy the recorded
// schedule in iterative scheduling. enterMBB and enterRegion are too early,
Expand All @@ -700,25 +710,13 @@ void AIEPostRASchedStrategy::leaveRegion(const SUnit &ExitSU) {
DEBUG_BLOCKS(dbgs() << " << leaveRegion\n");
}

void AIEPostRASchedStrategy::materializeMultiOpcodeInstrs() {
const TargetInstrInfo *TII = getTII(CurMBB);
const AIEHazardRecognizer &TopHazardRec = *getAIEHazardRecognizer(Top);
const AIEHazardRecognizer &BotHazardRec = *getAIEHazardRecognizer(Bot);

auto MaterializePseudo = [&TII](MachineInstr &MI,
const AIEHazardRecognizer &HazardRec) {
// Materialize instructions with multiple opcode options
if (std::optional<unsigned> AltOpcode =
HazardRec.getSelectedAltDescs().getSelectedOpcode(&MI)) {
MI.setDesc(TII->get(*AltOpcode));
}
};
void AIEPostRASchedStrategy::materializeMultiSlotInstrs() {
for (auto &[MI, Desc] : make_range(InterBlock.getSelectedAltDescs().begin(),
InterBlock.getSelectedAltDescs().end())) {
MI->setDesc(*Desc);
}

assert(DAG->top() == DAG->bottom());
for (MachineInstr &MI : make_range(DAG->begin(), DAG->top()))
MaterializePseudo(MI, TopHazardRec);
for (MachineInstr &MI : make_range(DAG->bottom(), DAG->end()))
MaterializePseudo(MI, BotHazardRec);
InterBlock.getSelectedAltDescs().clear();
}

const SUnit &getBundledSUnit(const ScheduleDAGMI *DAG, MachineInstr *MI) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/AIEMachineScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class AIEPostRASchedStrategy : public PostGenericScheduler {

/// Materialize "multi-opcode" instructions into the option that was selected
/// at schedule time. See AIEHazardRecognizer::getSelectedAltOpcode().
void materializeMultiOpcodeInstrs();
void materializeMultiSlotInstrs();

/// Returns true if, when "concatenated", the Top and Bot zone have resource
/// conflicts or timing issues.
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,18 @@ bool PostPipeliner::canAccept(MachineBasicBlock &LoopBlock) {
return true;
}

static SlotCounts getSlotCounts(MachineInstr &MI, const AIEBaseInstrInfo *TII) {
auto *SlotInfo = TII->getSlotInfo(TII->getSlotKind(MI.getOpcode()));
static SlotCounts getSlotCounts(MachineInstr &MI, const AIEBaseInstrInfo *TII,
const AIEHazardRecognizer &HR) {
const unsigned Opcode = HR.getSelectedAltDescs().getOpcode(&MI);
auto *SlotInfo = TII->getSlotInfo(TII->getSlotKind(Opcode));
return SlotInfo ? SlotInfo->getSlotSet() : 0;
}

int PostPipeliner::getResMII(MachineBasicBlock &LoopBlock) {
// Add up all slot requirements and return the maximum slot count
SlotCounts Counts;
for (auto &MI : LoopBlock) {
Counts += getSlotCounts(MI, TII);
Counts += getSlotCounts(MI, TII, HR);
}
int MII = Counts.max();
LLVM_DEBUG(dbgs() << "PostPipeliner: ResMII=" << MII << "\n");
Expand Down Expand Up @@ -221,7 +223,7 @@ void PostPipeliner::computeForward() {
const int NewEarliest = Me.Earliest + Dep.getSignedLatency();
SInfo.Earliest = std::max(SInfo.Earliest, NewEarliest);
}
Me.Slots = getSlotCounts(*SU.getInstr(), TII);
Me.Slots = getSlotCounts(*SU.getInstr(), TII, HR);
}
}

Expand Down Expand Up @@ -460,9 +462,9 @@ bool PostPipeliner::scheduleFirstIteration(PostPipelinerStrategy &Strategy) {
if (N > 0 && HR.checkConflict(Scoreboard, *MI, Cycle)) {
return false;
}

HR.emitInScoreboard(Scoreboard, MI->getDesc(), MemoryBanks,
MI->operands(), MI->getMF()->getRegInfo(), Cycle);
const MCInstrDesc &Desc = *HR.getSelectedAltDescs().getDesc(MI);
HR.emitInScoreboard(Scoreboard, Desc, MemoryBanks, MI->operands(),
MI->getMF()->getRegInfo(), Cycle);
Cycle += II;
}

Expand Down
Loading