From 7449171d0e7cf27dd7666ab99250085294bb423d Mon Sep 17 00:00:00 2001 From: cdevadas Date: Tue, 11 Jun 2019 16:33:29 +0530 Subject: [PATCH] Create a subregclass from SGPR for the call clobbered register pairs. Use this register class in the return instruction for the operand that holds the return address. --- lib/Target/AMDGPU/SIFrameLowering.cpp | 6 ++++++ lib/Target/AMDGPU/SIISelLowering.cpp | 10 +++++----- lib/Target/AMDGPU/SIRegisterInfo.td | 6 ++++++ lib/Target/AMDGPU/SOPInstructions.td | 7 ++++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index ca2fcfa036f3..d7ed345d69aa 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -683,6 +683,12 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, LivePhysRegs LiveRegs(*ST.getRegisterInfo()); LiveRegs.addLiveIns(MBB); + // to avoid clobbering the registers used in the return instruction. + if (MBBI->getOpcode() == AMDGPU::S_SETPC_B64_return && + MBBI->getOperand(0).isReg()) { + LiveRegs.addReg(MBBI->getOperand(0).getReg()); + } + ScratchExecCopy = findScratchNonCalleeSaveRegister(MF, LiveRegs, *TRI.getWaveMaskRegClass()); diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 30fde3babb7d..a9deca77b644 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2180,13 +2180,13 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // FIXME: Should be able to use a vreg here, but need a way to prevent it // from being allcoated to a CSR. - SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), - MVT::i64); - - Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag); + SDValue ReturnAddrVirtualReg = DAG.getRegister(MF.getRegInfo().createVirtualRegister( + &AMDGPU::CCR_SGPR_64RegClass), + MVT::i64); + Chain = DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag); Flag = Chain.getValue(1); - RetOps.push_back(PhysReturnAddrReg); + RetOps.push_back(ReturnAddrVirtualReg); } // Copy the result values into the output registers. diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 79e3fef6c8c6..b682421c2097 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -464,6 +464,12 @@ def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32 let AllocationPriority = 8; } +// CCR (call clobbered registers) SGPR 64-bit registers +def CCR_SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add (trunc SGPR_64, 16))> { + let CopyCost = 1; + let AllocationPriority = 8; +} + def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; } diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index d7bb869377a5..86e2442701e9 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -130,6 +130,11 @@ class SOP1_1 pattern=[]> : SOP1_Pseudo < let has_sdst = 0; } +// 64-bit input(a subset of SGPR_64) , no output +class SOP1_64_2 pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins CCR_SGPR_64:$src0), "$src0", pattern> { + let has_sdst = 0; +} let isMoveImm = 1 in { let isReMaterializable = 1, isAsCheapAsAMove = 1 in { @@ -224,7 +229,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">; let isReturn = 1 in { // Define variant marked as return rather than branch. -def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>; +def S_SETPC_B64_return : SOP1_64_2<"", [(AMDGPUret_flag i64:$src0)]>; } } // End isTerminator = 1, isBarrier = 1