Skip to content

Commit c8c8455

Browse files
committed
[AMDGPU] Fold into uses of splat REG_SEQUENCEs through COPYs.
1 parent 14e280e commit c8c8455

File tree

2 files changed

+10
-9
lines changed

2 files changed

+10
-9
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1036,11 +1036,18 @@ void SIFoldOperandsImpl::foldOperand(
10361036
// Grab the use operands first
10371037
SmallVector<MachineOperand *, 4> UsesToProcess(
10381038
llvm::make_pointer_range(MRI->use_nodbg_operands(RegSeqDstReg)));
1039-
for (auto *RSUse : UsesToProcess) {
1039+
for (unsigned I = 0; I != UsesToProcess.size(); ++I) {
1040+
MachineOperand *RSUse = UsesToProcess[I];
10401041
MachineInstr *RSUseMI = RSUse->getParent();
10411042
unsigned OpNo = RSUseMI->getOperandNo(RSUse);
10421043

10431044
if (SplatVal) {
1045+
if (RSUseMI->isCopy()) {
1046+
Register DstReg = RSUseMI->getOperand(0).getReg();
1047+
append_range(UsesToProcess,
1048+
make_pointer_range(MRI->use_nodbg_operands(DstReg)));
1049+
continue;
1050+
}
10441051
if (MachineOperand *Foldable =
10451052
tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
10461053
appendFoldCandidate(FoldList, RSUseMI, OpNo, Foldable);

llvm/test/CodeGen/AMDGPU/packed-fp32.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,11 +2155,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
21552155
; GFX90A-GISEL-LABEL: fadd_fadd_fsub_0:
21562156
; GFX90A-GISEL: ; %bb.0: ; %bb
21572157
; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
2158-
; GFX90A-GISEL-NEXT: s_mov_b32 s2, 0
2159-
; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2
2160-
; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
21612158
; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2162-
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
2159+
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
21632160
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v1
21642161
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
21652162
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2170,11 +2167,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
21702167
; GFX942-GISEL-LABEL: fadd_fadd_fsub_0:
21712168
; GFX942-GISEL: ; %bb.0: ; %bb
21722169
; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
2173-
; GFX942-GISEL-NEXT: s_mov_b32 s2, 0
2174-
; GFX942-GISEL-NEXT: s_mov_b32 s3, s2
2175-
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
21762170
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2177-
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
2171+
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
21782172
; GFX942-GISEL-NEXT: s_nop 0
21792173
; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v1
21802174
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0

0 commit comments

Comments
 (0)