Skip to content

Commit 5230bcf

Browse files
committed
[AIE2P] Allow more address chaining and add a profitability check heuristic
This allows chaining when the base pointer is used in other basic blocks but only when it is considered profitable: - This shouldn't happen in a loop as the resulting copy will be more costly - The cost of chaining is incremented for each offset falling outside the load/store immediate ranges. - An experimental threshold is used to determine if chaining is profitable based on the compute cost (e.g half the number of pointer adds to be chained)
1 parent 4d2b854 commit 5230bcf

File tree

7 files changed

+358
-5
lines changed

7 files changed

+358
-5
lines changed

llvm/lib/Target/AIE/AIE2InstrInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1650,3 +1650,15 @@ unsigned AIE2InstrInfo::getBasicVectorBitSize() const { return 512; }
16501650
unsigned AIE2InstrInfo::getMaxVectorBitSize() const { return 1024; }
16511651

16521652
unsigned AIE2InstrInfo::getMaxSupportedLdStIncSize() const { return 512; }
1653+
1654+
AIEBaseInstrInfo::ImmediateRangeBounds
1655+
AIE2InstrInfo::getLoadStorePostIncImmediateRange(LLT MemType) const {
1656+
if (MemType.getSizeInBits() == 8)
1657+
return {7, -8};
1658+
else if (MemType.getSizeInBits() == 16)
1659+
return {7, -8};
1660+
else if (MemType.getSizeInBits() <= 32)
1661+
return {252, -256};
1662+
else
1663+
llvm_unreachable("Unsupported");
1664+
}

llvm/lib/Target/AIE/AIE2InstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ class AIE2InstrInfo : public AIE2GenInstrInfo {
7575
unsigned getMaxVectorBitSize() const override;
7676
unsigned getMaxSupportedLdStIncSize() const override;
7777

78+
ImmediateRangeBounds
79+
getLoadStorePostIncImmediateRange(LLT MemType) const override;
80+
7881
virtual unsigned
7982
getNumReservedDelaySlots(const MachineInstr &MI) const override;
8083

llvm/lib/Target/AIE/AIEBaseInstrInfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,16 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
638638
llvm_unreachable("Target didn't implement getMaxSupportedLdStIncSize!");
639639
}
640640

641+
struct ImmediateRangeBounds {
642+
int64_t ImmediateRangeMax;
643+
int64_t ImmediateRangeMin;
644+
};
645+
virtual ImmediateRangeBounds
646+
getLoadStorePostIncImmediateRange(LLT MemType) const {
647+
llvm_unreachable(
648+
"Target didn't implement getLoadStorePostIncImmediateRange!");
649+
}
650+
641651
/// Abstract operations to help the decoding of complex operations.
642652
struct AbstractOp {
643653
enum class OperationType : unsigned {

llvm/lib/Target/AIE/AIEClusterBaseAddress.cpp

Lines changed: 128 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
7-
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
7+
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
88
//
99
//===----------------------------------------------------------------------===//
1010
//
@@ -45,12 +45,15 @@
4545
//===----------------------------------------------------------------------===//
4646

4747
#include "AIE.h"
48+
#include "AIEBaseInstrInfo.h"
49+
#include "Utils/AIELoopUtils.h"
4850
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
4951
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
5052
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
5153
#include "llvm/CodeGen/MachineFunction.h"
5254
#include "llvm/CodeGen/MachineFunctionPass.h"
5355
#include "llvm/CodeGen/MachineInstrBuilder.h"
56+
#include "llvm/CodeGen/MachineLoopInfo.h"
5457
#include "llvm/CodeGen/MachineModuleInfo.h"
5558
#include "llvm/CodeGen/TargetPassConfig.h"
5659
#include "llvm/InitializePasses.h"
@@ -72,6 +75,11 @@ static cl::opt<bool> EnableChainsForVectorLdSt(
7275
"aie-chain-addr-vec-ldst", cl::Hidden, cl::init(true),
7376
cl::desc("Enable ptradd chaining for vector loads and stores."));
7477

78+
cl::opt<int> AddressChainCostLimit(
79+
"aie-chain-cost-limit",
80+
cl::desc("Maximum allowed cost for pointer add chains"), cl::init(-1),
81+
cl::Hidden);
82+
7583
namespace {
7684

7785
/// Try and re-order PTR_ADD instructions to maximise the size of constant
@@ -163,6 +171,8 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
163171
void getAnalysisUsage(AnalysisUsage &AU) const override {
164172
AU.addRequired<MachineModuleInfoWrapperPass>();
165173
AU.addRequired<GISelCSEAnalysisWrapperPass>();
174+
AU.addRequired<MachineLoopInfo>();
175+
AU.addPreserved<MachineLoopInfo>();
166176
AU.addRequired<TargetPassConfig>();
167177
AU.setPreservesAll();
168178
}
@@ -223,10 +233,123 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
223233
if (Instrs.size() <= 1)
224234
return true;
225235

226-
// If the base reg is used in any of the successive MBBs, then we don't
227-
// want to chain the corresponding ptr adds, since this would introduce a
228-
// COPY and increase reg pressure.
229-
return isRegUsedInSuccessiveMBBs(&MBB, PtrReg);
236+
// If the base reg is used in any of the successive MBBs, would introduce a
237+
// COPY and increase reg pressure. We only skip chaining in this case if it
238+
// is considered unprofitable.
239+
if (isRegUsedInSuccessiveMBBs(&MBB, PtrReg) &&
240+
!isChainingProfitable(PtrReg, Instrs, MBB))
241+
return true;
242+
243+
return false;
244+
}
245+
246+
// Decide heuristically if chaining will be profitable
247+
bool isChainingProfitable(Register PtrReg,
248+
const SmallVector<MachineInstr *, 8> &Instrs,
249+
MachineBasicBlock &MBB) {
250+
const TargetSubtargetInfo &ST = MBB.getParent()->getSubtarget();
251+
const AIEBaseInstrInfo *TII =
252+
static_cast<const AIEBaseInstrInfo *>(ST.getInstrInfo());
253+
using OffsetType = std::variant<int64_t, std::string>;
254+
assert(Instrs.size() > 1);
255+
256+
bool InLoop = true;
257+
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
258+
MachineLoop *ToLoop = MLI.getLoopFor(&MBB);
259+
if (!ToLoop)
260+
InLoop = false;
261+
262+
unsigned ChainedCost = 0;
263+
unsigned ChainedCostLimit = Instrs.size() / 2; // Experimental threshold
264+
265+
if (AddressChainCostLimit > -1) {
266+
ChainedCostLimit = AddressChainCostLimit;
267+
}
268+
269+
if (isRegUsedInSuccessiveMBBs(&MBB, PtrReg)) {
270+
if (InLoop)
271+
return false; // A copy in a loop is costly
272+
ChainedCost += 1; // Add cost of resulting copy
273+
}
274+
275+
int64_t ImmediateRangeMax = 0;
276+
int64_t ImmediateRangeMin = 0;
277+
bool ImmediateRangeSet = false;
278+
int64_t AccumulatedOffset = 0;
279+
int64_t NewOffset;
280+
SmallVector<OffsetType, 8> Offsets;
281+
282+
for (unsigned I = 0; I < Instrs.size() - 1; I++) {
283+
MachineInstr *MI = Instrs[I];
284+
MachineInstr *MINext = Instrs[I + 1];
285+
286+
const Register PtrReg = MI->getOperand(0).getReg();
287+
for (const MachineInstr &UseMI : MRI->use_instructions(PtrReg)) {
288+
if (ImmediateRangeSet)
289+
continue; // Check first use only
290+
if (!UseMI.mayLoadOrStore())
291+
continue;
292+
const LLT MemType = getLoadStoreType(UseMI);
293+
// Immediate ranges for vectors are sufficient so we
294+
// assume chaining is always profitable.
295+
if (MemType.isVector()) {
296+
return true;
297+
} else {
298+
if (MemType.getSizeInBits() <= 32) {
299+
ImmediateRangeMax = TII->getLoadStorePostIncImmediateRange(MemType)
300+
.ImmediateRangeMax;
301+
ImmediateRangeMin = TII->getLoadStorePostIncImmediateRange(MemType)
302+
.ImmediateRangeMin;
303+
ImmediateRangeSet = true;
304+
} else {
305+
llvm_unreachable(
306+
"unreachable: Unsupported immediate range of scalar size ");
307+
}
308+
}
309+
}
310+
311+
// If the immediate range is not set, the pointers aren't used by any
312+
// loads and stores, so we return.
313+
if (!ImmediateRangeSet) {
314+
assert(ImmediateRangeMin == 0 && ImmediateRangeMax == 0);
315+
return false;
316+
}
317+
318+
auto OffsetMI =
319+
getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), *MRI);
320+
auto OffsetMINext = getIConstantVRegValWithLookThrough(
321+
MINext->getOperand(2).getReg(), *MRI);
322+
323+
if (shouldBreakChain(MI, MINext, OffsetMI, OffsetMINext)) {
324+
ChainedCost++;
325+
AccumulatedOffset = 0;
326+
Offsets.push_back("Break");
327+
continue;
328+
}
329+
330+
const int64_t CurrOffset = OffsetMI->Value.getSExtValue();
331+
const int64_t NextOffset = OffsetMINext->Value.getSExtValue();
332+
333+
assert(I == 0 || !Offsets.empty());
334+
AccumulatedOffset +=
335+
(I == 0 || (std::holds_alternative<std::string>(Offsets.back()) &&
336+
std::get<std::string>(Offsets.back()) == "Break"))
337+
? CurrOffset
338+
: NewOffset;
339+
Offsets.push_back(
340+
(I == 0 || (std::holds_alternative<std::string>(Offsets.back()) &&
341+
std::get<std::string>(Offsets.back()) == "Break"))
342+
? CurrOffset
343+
: OffsetType(NewOffset));
344+
345+
NewOffset = NextOffset - AccumulatedOffset;
346+
347+
if (NewOffset < ImmediateRangeMin || NewOffset > ImmediateRangeMax) {
348+
ChainedCost += 1; // Immediate materialization cost
349+
}
350+
}
351+
352+
return ChainedCostLimit > ChainedCost;
230353
}
231354

232355
// Build a chain (or set of chains) of G_PTR_ADDs. We consider as

llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1812,6 +1812,18 @@ unsigned AIE2PInstrInfo::getMaxVectorBitSize() const { return 2048; }
18121812

18131813
unsigned AIE2PInstrInfo::getMaxSupportedLdStIncSize() const { return 2048; }
18141814

1815+
AIEBaseInstrInfo::ImmediateRangeBounds
1816+
AIE2PInstrInfo::getLoadStorePostIncImmediateRange(LLT MemType) const {
1817+
if (MemType.getSizeInBits() == 8)
1818+
return {7, -8};
1819+
else if (MemType.getSizeInBits() == 16)
1820+
return {14, -16};
1821+
else if (MemType.getSizeInBits() <= 32)
1822+
return {28, -32};
1823+
else
1824+
llvm_unreachable("Unsupported");
1825+
}
1826+
18151827
using AbstractOp = AIEBaseInstrInfo::AbstractOp;
18161828

18171829
std::optional<const AbstractOp>

llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo {
7777
unsigned getMaxVectorBitSize() const override;
7878
unsigned getMaxSupportedLdStIncSize() const override;
7979

80+
ImmediateRangeBounds
81+
getLoadStorePostIncImmediateRange(LLT MemType) const override;
82+
8083
virtual unsigned
8184
getNumReservedDelaySlots(const MachineInstr &MI) const override;
8285

0 commit comments

Comments
 (0)