diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 0ed06c37507af..b4a596bd12f06 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1164,11 +1164,18 @@ void SIFoldOperandsImpl::foldOperand( // Grab the use operands first SmallVector UsesToProcess( llvm::make_pointer_range(MRI->use_nodbg_operands(RegSeqDstReg))); - for (auto *RSUse : UsesToProcess) { + for (unsigned I = 0; I != UsesToProcess.size(); ++I) { + MachineOperand *RSUse = UsesToProcess[I]; MachineInstr *RSUseMI = RSUse->getParent(); unsigned OpNo = RSUseMI->getOperandNo(RSUse); if (SplatRC) { + if (RSUseMI->isCopy()) { + Register DstReg = RSUseMI->getOperand(0).getReg(); + append_range(UsesToProcess, + make_pointer_range(MRI->use_nodbg_operands(DstReg))); + continue; + } if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) { FoldableDef SplatDef(SplatVal, SplatRC); appendFoldCandidate(FoldList, RSUseMI, OpNo, SplatDef); diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll index bef38c1a65ef8..4db3f2189bfc3 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll @@ -2155,11 +2155,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) { ; GFX90A-GISEL-LABEL: fadd_fadd_fsub_0: ; GFX90A-GISEL: ; %bb.0: ; %bb ; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX90A-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2 -; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1] +; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0 ; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v1 ; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0 ; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2170,11 +2167,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) { ; GFX942-GISEL-LABEL: fadd_fadd_fsub_0: ; GFX942-GISEL: ; %bb.0: ; %bb ; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX942-GISEL-NEXT: s_mov_b32 s2, 0 -; GFX942-GISEL-NEXT: s_mov_b32 s3, s2 -; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1] +; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0 ; GFX942-GISEL-NEXT: s_nop 0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v1 ; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0