Skip to content

Commit 9300f4e

Browse files
committed
[AMDGPU] Fold into uses of splat REG_SEQUENCEs through COPYs.
1 parent 2c90c0b commit 9300f4e

File tree

2 files changed

+10
-9
lines changed

2 files changed

+10
-9
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1164,11 +1164,18 @@ void SIFoldOperandsImpl::foldOperand(
11641164
// Grab the use operands first
11651165
SmallVector<MachineOperand *, 4> UsesToProcess(
11661166
llvm::make_pointer_range(MRI->use_nodbg_operands(RegSeqDstReg)));
1167-
for (auto *RSUse : UsesToProcess) {
1167+
for (unsigned I = 0; I != UsesToProcess.size(); ++I) {
1168+
MachineOperand *RSUse = UsesToProcess[I];
11681169
MachineInstr *RSUseMI = RSUse->getParent();
11691170
unsigned OpNo = RSUseMI->getOperandNo(RSUse);
11701171

11711172
if (SplatRC) {
1173+
if (RSUseMI->isCopy()) {
1174+
Register DstReg = RSUseMI->getOperand(0).getReg();
1175+
append_range(UsesToProcess,
1176+
make_pointer_range(MRI->use_nodbg_operands(DstReg)));
1177+
continue;
1178+
}
11721179
if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
11731180
FoldableDef SplatDef(SplatVal, SplatRC);
11741181
appendFoldCandidate(FoldList, RSUseMI, OpNo, SplatDef);

llvm/test/CodeGen/AMDGPU/packed-fp32.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,11 +2155,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
21552155
; GFX90A-GISEL-LABEL: fadd_fadd_fsub_0:
21562156
; GFX90A-GISEL: ; %bb.0: ; %bb
21572157
; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
2158-
; GFX90A-GISEL-NEXT: s_mov_b32 s2, 0
2159-
; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2
2160-
; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
21612158
; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2162-
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
2159+
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
21632160
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v1
21642161
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
21652162
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2170,11 +2167,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
21702167
; GFX942-GISEL-LABEL: fadd_fadd_fsub_0:
21712168
; GFX942-GISEL: ; %bb.0: ; %bb
21722169
; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
2173-
; GFX942-GISEL-NEXT: s_mov_b32 s2, 0
2174-
; GFX942-GISEL-NEXT: s_mov_b32 s3, s2
2175-
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
21762170
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2177-
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
2171+
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
21782172
; GFX942-GISEL-NEXT: s_nop 0
21792173
; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v1
21802174
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0

0 commit comments

Comments
 (0)