Skip to content

Commit c886438

Browse files
committed
[SelectionDAG] Optimize unaligned load stores to realign after offset
Summary: For loads/stores from GEPs: - Replace MPI(gep, 0) with MPI(base_ptr, const_offset) - Preserve base pointer's stronger alignment - Optimize expandUnalignedLoad/Store Issue: #143215
1 parent 8c18de4 commit c886438

File tree

8 files changed

+215
-92
lines changed

8 files changed

+215
-92
lines changed

llvm/include/llvm/CodeGen/MachineMemOperand.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,27 +50,28 @@ struct MachinePointerInfo {
5050

5151
uint8_t StackID;
5252

53+
const Value *OrgV;
54+
5355
explicit MachinePointerInfo(const Value *v, int64_t offset = 0,
54-
uint8_t ID = 0)
55-
: V(v), Offset(offset), StackID(ID) {
56+
uint8_t ID = 0, const Value *orgv = nullptr)
57+
: V(v), Offset(offset), StackID(ID), OrgV(orgv) {
5658
AddrSpace = v ? v->getType()->getPointerAddressSpace() : 0;
5759
}
5860

5961
explicit MachinePointerInfo(const PseudoSourceValue *v, int64_t offset = 0,
6062
uint8_t ID = 0)
61-
: V(v), Offset(offset), StackID(ID) {
63+
: V(v), Offset(offset), StackID(ID), OrgV((const Value *)nullptr) {
6264
AddrSpace = v ? v->getAddressSpace() : 0;
6365
}
6466

6567
explicit MachinePointerInfo(unsigned AddressSpace = 0, int64_t offset = 0)
6668
: V((const Value *)nullptr), Offset(offset), AddrSpace(AddressSpace),
67-
StackID(0) {}
69+
StackID(0), OrgV((const Value *)nullptr) {}
6870

6971
explicit MachinePointerInfo(
70-
PointerUnion<const Value *, const PseudoSourceValue *> v,
71-
int64_t offset = 0,
72-
uint8_t ID = 0)
73-
: V(v), Offset(offset), StackID(ID) {
72+
PointerUnion<const Value *, const PseudoSourceValue *> v,
73+
int64_t offset = 0, uint8_t ID = 0)
74+
: V(v), Offset(offset), StackID(ID), OrgV((const Value *)nullptr) {
7475
if (V) {
7576
if (const auto *ValPtr = dyn_cast_if_present<const Value *>(V))
7677
AddrSpace = ValPtr->getType()->getPointerAddressSpace();
@@ -83,7 +84,8 @@ struct MachinePointerInfo {
8384
if (V.isNull())
8485
return MachinePointerInfo(AddrSpace, Offset + O);
8586
if (isa<const Value *>(V))
86-
return MachinePointerInfo(cast<const Value *>(V), Offset + O, StackID);
87+
return MachinePointerInfo(cast<const Value *>(V), Offset + O, StackID,
88+
OrgV);
8789
return MachinePointerInfo(cast<const PseudoSourceValue *>(V), Offset + O,
8890
StackID);
8991
}

llvm/lib/CodeGen/MachineOperand.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1050,7 +1050,7 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
10501050

10511051
return isDereferenceableAndAlignedPointer(
10521052
BasePtr, Align(1), APInt(DL.getPointerSizeInBits(), Offset + Size), DL,
1053-
dyn_cast<Instruction>(BasePtr));
1053+
dyn_cast<Instruction>(OrgV ? OrgV : BasePtr));
10541054
}
10551055

10561056
/// getConstantPool - Return a MachinePointerInfo record that refers to the

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4562,10 +4562,41 @@ static std::optional<ConstantRange> getRange(const Instruction &I) {
45624562
return std::nullopt;
45634563
}
45644564

4565+
static void tryToImproveAlign(const DataLayout &DL, Type *Ty, Align &Alignment,
4566+
const Value *&PtrV, const Value *&CxtI,
4567+
int64_t &Offset) {
4568+
Align PrefAlign = DL.getPrefTypeAlign(Ty);
4569+
if (auto *GEP = dyn_cast<GetElementPtrInst>(PtrV);
4570+
GEP && PrefAlign > Alignment && PrefAlign.previous() > Alignment) {
4571+
const Value *BasePtrV = GEP->getPointerOperand();
4572+
APInt OffsetAccumulated =
4573+
APInt(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
4574+
if (GEP->accumulateConstantOffset(DL, OffsetAccumulated)) {
4575+
KnownBits Known = computeKnownBits(PtrV, DL);
4576+
KnownBits SplitKnown =
4577+
KnownBits::add(Known, KnownBits::makeConstant(APInt(
4578+
Known.getBitWidth(), Alignment.value())));
4579+
unsigned TrailZ = std::min(SplitKnown.countMinTrailingZeros(),
4580+
+Value::MaxAlignmentExponent);
4581+
Align ExpandAlign =
4582+
Align(1ull << std::min(Known.getBitWidth() - 1, TrailZ));
4583+
Align BaseAlignment =
4584+
getKnownAlignment(const_cast<Value *>(BasePtrV), DL, GEP);
4585+
if (ExpandAlign > Alignment) {
4586+
CxtI = PtrV;
4587+
PtrV = BasePtrV;
4588+
Alignment = BaseAlignment;
4589+
Offset = OffsetAccumulated.getSExtValue();
4590+
}
4591+
}
4592+
}
4593+
}
4594+
45654595
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
45664596
if (I.isAtomic())
45674597
return visitAtomicLoad(I);
45684598

4599+
const DataLayout &DL = DAG.getDataLayout();
45694600
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
45704601
const Value *SV = I.getOperand(0);
45714602
if (TLI.supportSwiftError()) {
@@ -4587,7 +4618,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
45874618
Type *Ty = I.getType();
45884619
SmallVector<EVT, 4> ValueVTs, MemVTs;
45894620
SmallVector<TypeSize, 4> Offsets;
4590-
ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
4621+
ComputeValueVTs(TLI, DL, Ty, ValueVTs, &MemVTs, &Offsets);
45914622
unsigned NumValues = ValueVTs.size();
45924623
if (NumValues == 0)
45934624
return;
@@ -4597,7 +4628,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
45974628
const MDNode *Ranges = getRangeMetadata(I);
45984629
bool isVolatile = I.isVolatile();
45994630
MachineMemOperand::Flags MMOFlags =
4600-
TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
4631+
TLI.getLoadMemOperandFlags(I, DL, AC, LibInfo);
4632+
4633+
// See visitStore comments.
4634+
int64_t Offset = 0;
4635+
const Value *CxtI = nullptr;
4636+
tryToImproveAlign(DL, Ty, Alignment, SV, CxtI, Offset);
46014637

46024638
SDValue Root;
46034639
bool ConstantMemory = false;
@@ -4647,7 +4683,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
46474683
// TODO: MachinePointerInfo only supports a fixed length offset.
46484684
MachinePointerInfo PtrInfo =
46494685
!Offsets[i].isScalable() || Offsets[i].isZero()
4650-
? MachinePointerInfo(SV, Offsets[i].getKnownMinValue())
4686+
? MachinePointerInfo(SV, Offsets[i].getKnownMinValue() + Offset, 0,
4687+
CxtI)
46514688
: MachinePointerInfo();
46524689

46534690
SDValue A = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]);
@@ -4734,6 +4771,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
47344771
if (I.isAtomic())
47354772
return visitAtomicStore(I);
47364773

4774+
const DataLayout &DL = DAG.getDataLayout();
47374775
const Value *SrcV = I.getOperand(0);
47384776
const Value *PtrV = I.getOperand(1);
47394777

@@ -4754,8 +4792,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
47544792

47554793
SmallVector<EVT, 4> ValueVTs, MemVTs;
47564794
SmallVector<TypeSize, 4> Offsets;
4757-
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
4758-
SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
4795+
ComputeValueVTs(DAG.getTargetLoweringInfo(), DL, SrcV->getType(), ValueVTs,
4796+
&MemVTs, &Offsets);
47594797
unsigned NumValues = ValueVTs.size();
47604798
if (NumValues == 0)
47614799
return;
@@ -4772,7 +4810,19 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
47724810
Align Alignment = I.getAlign();
47734811
AAMDNodes AAInfo = I.getAAMetadata();
47744812

4775-
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
4813+
// refine MPI: V + Offset
4814+
// Example:
4815+
// align 4 %p
4816+
// %gep = getelementptr i8, ptr %p, i32 1
4817+
// store i32 %v, ptr %len, align 1
4818+
// ->
4819+
// MPI: V = %p, Offset = 1
4820+
// SDNode: store<(store (s32) into %p + 1, align 1, basealign 4)>
4821+
int64_t Offset = 0;
4822+
const Value *CxtI = nullptr;
4823+
tryToImproveAlign(DL, SrcV->getType(), Alignment, PtrV, CxtI, Offset);
4824+
4825+
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DL);
47764826

47774827
unsigned ChainI = 0;
47784828
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
@@ -4787,7 +4837,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
47874837
// TODO: MachinePointerInfo only supports a fixed length offset.
47884838
MachinePointerInfo PtrInfo =
47894839
!Offsets[i].isScalable() || Offsets[i].isZero()
4790-
? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue())
4840+
? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue() + Offset,
4841+
0, CxtI)
47914842
: MachinePointerInfo();
47924843

47934844
SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 110 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10370,14 +10370,66 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
1037010370
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
1037110371
"Unaligned load of unsupported type.");
1037210372

10373+
Align BaseAlignment = LD->getBaseAlign();
10374+
Align Alignment = LD->getAlign();
10375+
10376+
// Divide the load according to the latest align information
10377+
if (commonAlignment(BaseAlignment,
10378+
Alignment.value() + LD->getPointerInfo().Offset) >
10379+
Alignment) {
10380+
ISD::LoadExtType HiExtType = LD->getExtensionType();
10381+
10382+
// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10383+
if (HiExtType == ISD::NON_EXTLOAD)
10384+
HiExtType = ISD::ZEXTLOAD;
10385+
10386+
bool IsLE = DAG.getDataLayout().isLittleEndian();
10387+
unsigned NumBytes = LoadedVT.getSizeInBits() / 8;
10388+
// LE/BE use the same initial Alignment
10389+
unsigned PtrOffset = IsLE ? 0 : (NumBytes - Alignment.value());
10390+
unsigned RemainderBytes = NumBytes;
10391+
SDValue Result = DAG.getConstant(0, dl, VT);
10392+
SmallVector<SDValue, 4> Chains;
10393+
while (RemainderBytes) {
10394+
unsigned CurrBytes =
10395+
std::min(1ul << Log2_32(RemainderBytes), Alignment.value());
10396+
ISD::LoadExtType ExtType = ISD::ZEXTLOAD;
10397+
if (RemainderBytes + CurrBytes == NumBytes)
10398+
ExtType = HiExtType;
10399+
10400+
SDValue CurrLD = DAG.getExtLoad(
10401+
ExtType, dl, VT, Chain,
10402+
DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(PtrOffset)),
10403+
LD->getPointerInfo().getWithOffset(PtrOffset),
10404+
EVT::getIntegerVT(*DAG.getContext(), CurrBytes * 8), BaseAlignment,
10405+
LD->getMemOperand()->getFlags(), LD->getAAInfo());
10406+
if (IsLE)
10407+
Chains.push_back(CurrLD.getValue(1));
10408+
else
10409+
Chains.insert(Chains.begin(), CurrLD.getValue(1));
10410+
SDValue CurrV = DAG.getNode(
10411+
ISD::SHL, dl, VT, CurrLD,
10412+
DAG.getShiftAmountConstant((NumBytes - RemainderBytes) * 8, VT, dl));
10413+
Result = DAG.getNode(ISD::OR, dl, VT, CurrV, Result);
10414+
RemainderBytes -= CurrBytes;
10415+
if (RemainderBytes == 0)
10416+
break;
10417+
Alignment = commonAlignment(BaseAlignment,
10418+
LD->getPointerInfo().Offset + PtrOffset +
10419+
(IsLE ? CurrBytes : -CurrBytes));
10420+
PtrOffset =
10421+
IsLE ? NumBytes - RemainderBytes : RemainderBytes - Alignment.value();
10422+
}
10423+
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
10424+
return std::make_pair(Result, TF);
10425+
}
1037310426
// Compute the new VT that is half the size of the old one. This is an
1037410427
// integer MVT.
1037510428
unsigned NumBits = LoadedVT.getSizeInBits();
1037610429
EVT NewLoadedVT;
1037710430
NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
1037810431
NumBits >>= 1;
1037910432

10380-
Align Alignment = LD->getBaseAlign();
1038110433
unsigned IncrementSize = NumBits / 8;
1038210434
ISD::LoadExtType HiExtType = LD->getExtensionType();
1038310435

@@ -10389,24 +10441,24 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
1038910441
SDValue Lo, Hi;
1039010442
if (DAG.getDataLayout().isLittleEndian()) {
1039110443
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10392-
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10393-
LD->getAAInfo());
10444+
NewLoadedVT, BaseAlignment,
10445+
LD->getMemOperand()->getFlags(), LD->getAAInfo());
1039410446

1039510447
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
1039610448
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
1039710449
LD->getPointerInfo().getWithOffset(IncrementSize),
10398-
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10399-
LD->getAAInfo());
10450+
NewLoadedVT, BaseAlignment,
10451+
LD->getMemOperand()->getFlags(), LD->getAAInfo());
1040010452
} else {
1040110453
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10402-
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10403-
LD->getAAInfo());
10454+
NewLoadedVT, BaseAlignment,
10455+
LD->getMemOperand()->getFlags(), LD->getAAInfo());
1040410456

1040510457
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
1040610458
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
1040710459
LD->getPointerInfo().getWithOffset(IncrementSize),
10408-
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10409-
LD->getAAInfo());
10460+
NewLoadedVT, BaseAlignment,
10461+
LD->getMemOperand()->getFlags(), LD->getAAInfo());
1041010462
}
1041110463

1041210464
// aggregate the two parts
@@ -10428,7 +10480,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
1042810480
SDValue Ptr = ST->getBasePtr();
1042910481
SDValue Val = ST->getValue();
1043010482
EVT VT = Val.getValueType();
10431-
Align Alignment = ST->getBaseAlign();
10483+
Align BaseAlignment = ST->getBaseAlign();
10484+
Align Alignment = ST->getAlign();
1043210485
auto &MF = DAG.getMachineFunction();
1043310486
EVT StoreMemVT = ST->getMemoryVT();
1043410487

@@ -10447,7 +10500,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
1044710500
// FIXME: Does not handle truncating floating point stores!
1044810501
SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
1044910502
Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10450-
Alignment, ST->getMemOperand()->getFlags());
10503+
BaseAlignment, ST->getMemOperand()->getFlags());
1045110504
return Result;
1045210505
}
1045310506
// Do a (aligned) store to a stack slot, then copy from the stack slot
@@ -10515,6 +10568,47 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
1051510568

1051610569
assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
1051710570
"Unaligned store of unknown type.");
10571+
10572+
// Divide the store value according to the latest align information
10573+
if (commonAlignment(BaseAlignment,
10574+
Alignment.value() + ST->getPointerInfo().Offset) >
10575+
Alignment) {
10576+
bool IsLE = DAG.getDataLayout().isLittleEndian();
10577+
unsigned NumBytes = StoreMemVT.getFixedSizeInBits() / 8;
10578+
SmallVector<SDValue, 8> Stores;
10579+
// LE/BE use the same initial Alignment
10580+
unsigned PtrOffset = IsLE ? 0 : (NumBytes - Alignment.value());
10581+
unsigned RemainderBytes = NumBytes;
10582+
while (RemainderBytes) {
10583+
unsigned CurrBytes =
10584+
std::min(1ul << Log2_32(RemainderBytes), Alignment.value());
10585+
SDValue CurrST = DAG.getTruncStore(
10586+
Chain, dl, Val,
10587+
DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(PtrOffset)),
10588+
ST->getPointerInfo().getWithOffset(PtrOffset),
10589+
EVT::getIntegerVT(*DAG.getContext(), CurrBytes * 8), BaseAlignment,
10590+
ST->getMemOperand()->getFlags(), ST->getAAInfo());
10591+
if (IsLE)
10592+
Stores.push_back(CurrST);
10593+
else
10594+
Stores.insert(Stores.begin(), CurrST);
10595+
RemainderBytes -= CurrBytes;
10596+
if (RemainderBytes == 0)
10597+
break;
10598+
10599+
Val = DAG.getNode(ISD::SRL, dl, VT, Val,
10600+
DAG.getShiftAmountConstant(CurrBytes * 8, VT, dl));
10601+
Alignment = commonAlignment(BaseAlignment,
10602+
ST->getPointerInfo().Offset + PtrOffset +
10603+
(IsLE ? CurrBytes : -CurrBytes));
10604+
PtrOffset =
10605+
IsLE ? NumBytes - RemainderBytes : RemainderBytes - Alignment.value();
10606+
}
10607+
10608+
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10609+
return Result;
10610+
}
10611+
1051810612
// Get the half-size VT
1051910613
EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
1052010614
unsigned NumBits = NewStoredVT.getFixedSizeInBits();
@@ -10538,17 +10632,18 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
1053810632
SDValue Store1, Store2;
1053910633
Store1 = DAG.getTruncStore(Chain, dl,
1054010634
DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10541-
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10542-
ST->getMemOperand()->getFlags());
10635+
Ptr, ST->getPointerInfo(), NewStoredVT,
10636+
BaseAlignment, ST->getMemOperand()->getFlags());
1054310637

1054410638
Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
1054510639
Store2 = DAG.getTruncStore(
1054610640
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10547-
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10548-
ST->getMemOperand()->getFlags(), ST->getAAInfo());
10641+
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT,
10642+
BaseAlignment, ST->getMemOperand()->getFlags(), ST->getAAInfo());
1054910643

1055010644
SDValue Result =
1055110645
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10646+
1055210647
return Result;
1055310648
}
1055410649

llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
1414
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
1515
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
1616
; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
17-
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
18-
; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
17+
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.f1.kernarg.segment + 24, align 8, addrspace 4)
18+
; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.f1.kernarg.segment + 40, align 8, addrspace 4)
1919
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
2020
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
2121
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 0, implicit-def $scc

llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ define amdgpu_kernel void @uniform_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x
113113
; GCN-NEXT: liveins: $sgpr4_sgpr5
114114
; GCN-NEXT: {{ $}}
115115
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
116-
; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128) from %ir.out.kernarg.offset, align 4, addrspace 4)
116+
; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128) from %ir.uniform_trunc_i64_to_i1.kernarg.segment + 36, align 4, basealign 16, addrspace 4)
117117
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 13, 0 :: (dereferenceable invariant load (s32) from %ir.z.kernarg.offset.align.down, addrspace 4)
118118
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1
119119
; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0

0 commit comments

Comments
 (0)