44//  See https://llvm.org/LICENSE.txt for license information.
55//  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66// 
7- //  (c) Copyright 2023-2024  Advanced Micro Devices, Inc. or its affiliates
7+ //  (c) Copyright 2023-2025  Advanced Micro Devices, Inc. or its affiliates
88// 
99// ===----------------------------------------------------------------------===//
1010// 
4545// ===----------------------------------------------------------------------===//
4646
4747#include  " AIE.h" 
48+ #include  " AIEBaseInstrInfo.h" 
49+ #include  " Utils/AIELoopUtils.h" 
4850#include  " llvm/CodeGen/GlobalISel/CSEInfo.h" 
4951#include  " llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" 
5052#include  " llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 
5153#include  " llvm/CodeGen/MachineFunction.h" 
5254#include  " llvm/CodeGen/MachineFunctionPass.h" 
5355#include  " llvm/CodeGen/MachineInstrBuilder.h" 
56+ #include  " llvm/CodeGen/MachineLoopInfo.h" 
5457#include  " llvm/CodeGen/MachineModuleInfo.h" 
5558#include  " llvm/CodeGen/TargetPassConfig.h" 
5659#include  " llvm/InitializePasses.h" 
@@ -72,6 +75,11 @@ static cl::opt<bool> EnableChainsForVectorLdSt(
7275    " aie-chain-addr-vec-ldst" true ),
7376    cl::desc(" Enable ptradd chaining for vector loads and stores." 
7477
78+ cl::opt<int > AddressChainCostLimit (
79+     " aie-chain-cost-limit" 
80+     cl::desc (" Maximum allowed cost for pointer add chains" 1 ),
81+     cl::Hidden);
82+ 
7583namespace  {
7684
7785// / Try and re-order PTR_ADD instructions to maximise the size of constant
@@ -163,6 +171,8 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
163171  void  getAnalysisUsage (AnalysisUsage &AU) const  override  {
164172    AU.addRequired <MachineModuleInfoWrapperPass>();
165173    AU.addRequired <GISelCSEAnalysisWrapperPass>();
174+     AU.addRequired <MachineLoopInfo>();
175+     AU.addPreserved <MachineLoopInfo>();
166176    AU.addRequired <TargetPassConfig>();
167177    AU.setPreservesAll ();
168178  }
@@ -223,10 +233,123 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
223233    if  (Instrs.size () <= 1 )
224234      return  true ;
225235
226-     //  If the base reg is used in any of the successive MBBs, then we don't
227-     //  want to chain the corresponding ptr adds, since this would introduce a
228-     //  COPY and increase reg pressure.
229-     return  isRegUsedInSuccessiveMBBs (&MBB, PtrReg);
236+     //  If the base reg is used in any of the successive MBBs, would introduce a
237+     //  COPY and increase reg pressure. We only skip chaining in this case if it
238+     //  is considered unprofitable.
239+     if  (isRegUsedInSuccessiveMBBs (&MBB, PtrReg) &&
240+         !isChainingProfitable (PtrReg, Instrs, MBB))
241+       return  true ;
242+ 
243+     return  false ;
244+   }
245+ 
246+   //  Decide heuristically if chaining will be profitable
247+   bool  isChainingProfitable (Register PtrReg,
248+                             const  SmallVector<MachineInstr *, 8 > &Instrs,
249+                             MachineBasicBlock &MBB) {
250+     const  TargetSubtargetInfo &ST = MBB.getParent ()->getSubtarget ();
251+     const  AIEBaseInstrInfo *TII =
252+         static_cast <const  AIEBaseInstrInfo *>(ST.getInstrInfo ());
253+     using  OffsetType = std::variant<int64_t , std::string>;
254+     assert (Instrs.size () > 1 );
255+ 
256+     bool  InLoop = true ;
257+     MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
258+     MachineLoop *ToLoop = MLI.getLoopFor (&MBB);
259+     if  (!ToLoop)
260+       InLoop = false ;
261+ 
262+     unsigned  ChainedCost = 0 ;
263+     unsigned  ChainedCostLimit = Instrs.size () / 2 ; //  Experimental threshold
264+ 
265+     if  (AddressChainCostLimit > -1 ) {
266+       ChainedCostLimit = AddressChainCostLimit;
267+     }
268+ 
269+     if  (isRegUsedInSuccessiveMBBs (&MBB, PtrReg)) {
270+       if  (InLoop)
271+         return  false ;   //  A copy in a loop is costly
272+       ChainedCost += 1 ; //  Add cost of resulting copy
273+     }
274+ 
275+     int64_t  ImmediateRangeMax = 0 ;
276+     int64_t  ImmediateRangeMin = 0 ;
277+     bool  ImmediateRangeSet = false ;
278+     int64_t  AccumulatedOffset = 0 ;
279+     int64_t  NewOffset;
280+     SmallVector<OffsetType, 8 > Offsets;
281+ 
282+     for  (unsigned  I = 0 ; I < Instrs.size () - 1 ; I++) {
283+       MachineInstr *MI = Instrs[I];
284+       MachineInstr *MINext = Instrs[I + 1 ];
285+ 
286+       const  Register PtrReg = MI->getOperand (0 ).getReg ();
287+       for  (const  MachineInstr &UseMI : MRI->use_instructions (PtrReg)) {
288+         if  (ImmediateRangeSet)
289+           continue ; //  Check first use only
290+         if  (!UseMI.mayLoadOrStore ())
291+           continue ;
292+         const  LLT MemType = getLoadStoreType (UseMI);
293+         //  Immediate ranges for vectors are sufficient so we
294+         //  assume chaining is always profitable.
295+         if  (MemType.isVector ()) {
296+           return  true ;
297+         } else  {
298+           if  (MemType.getSizeInBits () <= 32 ) {
299+             ImmediateRangeMax = TII->getLoadStorePostIncImmediateRange (MemType)
300+                                     .ImmediateRangeMax ;
301+             ImmediateRangeMin = TII->getLoadStorePostIncImmediateRange (MemType)
302+                                     .ImmediateRangeMin ;
303+             ImmediateRangeSet = true ;
304+           } else  {
305+             llvm_unreachable (
306+                 " unreachable: Unsupported immediate range of scalar size " 
307+           }
308+         }
309+       }
310+ 
311+       //  If the immediate range is not set, the pointers aren't used by any
312+       //  loads and stores, so we return.
313+       if  (!ImmediateRangeSet) {
314+         assert (ImmediateRangeMin == 0  && ImmediateRangeMax == 0 );
315+         return  false ;
316+       }
317+ 
318+       auto  OffsetMI =
319+           getIConstantVRegValWithLookThrough (MI->getOperand (2 ).getReg (), *MRI);
320+       auto  OffsetMINext = getIConstantVRegValWithLookThrough (
321+           MINext->getOperand (2 ).getReg (), *MRI);
322+ 
323+       if  (shouldBreakChain (MI, MINext, OffsetMI, OffsetMINext)) {
324+         ChainedCost++;
325+         AccumulatedOffset = 0 ;
326+         Offsets.push_back (" Break" 
327+         continue ;
328+       }
329+ 
330+       const  int64_t  CurrOffset = OffsetMI->Value .getSExtValue ();
331+       const  int64_t  NextOffset = OffsetMINext->Value .getSExtValue ();
332+ 
333+       assert (I == 0  || !Offsets.empty ());
334+       AccumulatedOffset +=
335+           (I == 0  || (std::holds_alternative<std::string>(Offsets.back ()) &&
336+                       std::get<std::string>(Offsets.back ()) == " Break" 
337+               ? CurrOffset
338+               : NewOffset;
339+       Offsets.push_back (
340+           (I == 0  || (std::holds_alternative<std::string>(Offsets.back ()) &&
341+                       std::get<std::string>(Offsets.back ()) == " Break" 
342+               ? CurrOffset
343+               : OffsetType (NewOffset));
344+ 
345+       NewOffset = NextOffset - AccumulatedOffset;
346+ 
347+       if  (NewOffset < ImmediateRangeMin || NewOffset > ImmediateRangeMax) {
348+         ChainedCost += 1 ; //  Immediate materialization cost
349+       }
350+     }
351+ 
352+     return  ChainedCostLimit > ChainedCost;
230353  }
231354
232355  //  Build a chain (or set of chains) of G_PTR_ADDs. We consider as
0 commit comments