diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2310d511b1df8..0e27b0f764795 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7977,6 +7977,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::amdgcn_call_whole_wave: { TargetLowering::ArgListTy Args; + bool isTailCall = I.isTailCall(); // The first argument is the callee. Skip it when assembling the call args. TargetLowering::ArgListEntry Arg; @@ -7984,6 +7985,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Arg.Node = getValue(I.getArgOperand(Idx)); Arg.Ty = I.getArgOperand(Idx)->getType(); Arg.setAttributes(&I, Idx); + + if (Arg.IsSRet && isa(I.getArgOperand(Idx))) + isTailCall = false; + Args.push_back(Arg); } @@ -7998,7 +8003,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, .setChain(getRoot()) .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(), getValue(I.getArgOperand(0)), std::move(Args)) - .setTailCall(false) + .setTailCall(isTailCall && canTailCall(I)) .setIsPreallocated( I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) .setConvergent(I.isConvergent()) @@ -8879,6 +8884,29 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, return Result; } +bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const { + bool isMustTailCall = CB.isMustTailCall(); + + // Avoid emitting tail calls in functions with the disable-tail-calls + // attribute. + auto *Caller = CB.getParent()->getParent(); + if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == + "true" && + !isMustTailCall) + return false; + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + if (DAG.getTargetLoweringInfo().supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + return false; + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + return isInTailCallPosition(CB, DAG.getTarget()); +} + void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, bool isTailCall, bool isMustTailCall, const BasicBlock *EHPadBB, @@ -8893,21 +8921,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (isTailCall) { - // Avoid emitting tail calls in functions with the disable-tail-calls - // attribute. - auto *Caller = CB.getParent()->getParent(); - if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == - "true" && !isMustTailCall) - isTailCall = false; - - // We can't tail call inside a function with a swifterror argument. Lowering - // does not support this yet. It would have to move into the swifterror - // register before the call. - if (TLI.supportSwiftError() && - Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - isTailCall = false; - } + if (isTailCall) + isTailCall = canTailCall(CB); for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { TargetLowering::ArgListEntry Entry; @@ -8952,11 +8967,6 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, Args.push_back(Entry); } - // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget())) - isTailCall = false; - // Disable tail calls if there is an swifterror argument. Targets have not // been updated to support tail calls. if (TLI.supportSwiftError() && SwiftErrorVal) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 1c278076a219d..58d9f04c61156 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -408,6 +408,10 @@ class SelectionDAGBuilder { bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr, const TargetLowering::PtrAuthInfo *PAI = nullptr); + // Check some of the target-independent constraints for tail calls. This does + // not iterate over the call arguments. + bool canTailCall(const CallBase &CB) const; + // Lower range metadata from 0 to N to assert zext to an integer of nearest // floor power of two. SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 9488cccf8fe5c..1df6eb79763b4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -993,8 +993,14 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64; } - return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX : - AMDGPU::SI_TCRETURN; + if (CallerF.getFunction().getCallingConv() == + CallingConv::AMDGPU_Gfx_WholeWave) + return AMDGPU::SI_TCRETURN_GFX_WholeWave; + + if (CC == CallingConv::AMDGPU_Gfx || CC == CallingConv::AMDGPU_Gfx_WholeWave) + return AMDGPU::SI_TCRETURN_GFX; + + return AMDGPU::SI_TCRETURN; } // Add operands to call instruction to track the callee. @@ -1273,6 +1279,13 @@ bool AMDGPUCallLowering::lowerTailCall( unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), /*IsTailCall*/ true, ST.isWave32(), CalleeCC, IsDynamicVGPRChainCall); auto MIB = MIRBuilder.buildInstrNoInsert(Opc); + + if (FuncInfo->isWholeWaveFunction()) + addOriginalExecToReturn(MF, MIB); + + // Keep track of the index of the next operand to be added to the call + unsigned CalleeIdx = MIB->getNumOperands(); + if (!addCallTargetOperands(MIB, MIRBuilder, Info, IsDynamicVGPRChainCall)) return false; @@ -1390,7 +1403,7 @@ bool AMDGPUCallLowering::lowerTailCall( // If we have -tailcallopt, we need to adjust the stack. We'll do the call // sequence start and end here. if (!IsSibCall) { - MIB->getOperand(1).setImm(FPDiff); + MIB->getOperand(CalleeIdx + 1).setImm(FPDiff); CallSeqStart.addImm(NumBytes).addImm(0); // End the call sequence *before* emitting the call. Normally, we would // tidy the frame up after the call. However, here, we've laid out the @@ -1402,16 +1415,24 @@ bool AMDGPUCallLowering::lowerTailCall( // Now we can add the actual call instruction to the correct basic block. MIRBuilder.insertInstr(MIB); + // If this is a whole wave tail call, we need to constrain the register for + // the original EXEC. + if (MIB->getOpcode() == AMDGPU::SI_TCRETURN_GFX_WholeWave) { + MIB->getOperand(0).setReg( + constrainOperandRegClass(MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), + *MIB, MIB->getDesc(), MIB->getOperand(0), 0)); + } + // If Callee is a reg, since it is used by a target specific // instruction, it must have a register class matching the // constraint of that instruction. // FIXME: We should define regbankselectable call instructions to handle // divergent call targets. - if (MIB->getOperand(0).isReg()) { - MIB->getOperand(0).setReg( - constrainOperandRegClass(MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), - *MIB, MIB->getDesc(), MIB->getOperand(0), 0)); + if (MIB->getOperand(CalleeIdx).isReg()) { + MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(), + MIB->getOperand(CalleeIdx), CalleeIdx)); } MF.getFrameInfo().setHasTailCall(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 0421ed87e61f4..389c7c1ea95a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -5639,6 +5639,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CALL) NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TC_RETURN_GFX) + NODE_NAME_CASE(TC_RETURN_GFX_WholeWave) NODE_NAME_CASE(TC_RETURN_CHAIN) NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR) NODE_NAME_CASE(TRAP) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 5716711de3402..120fa819e8a55 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -402,6 +402,7 @@ enum NodeType : unsigned { CALL, TC_RETURN, TC_RETURN_GFX, + TC_RETURN_GFX_WholeWave, TC_RETURN_CHAIN, TC_RETURN_CHAIN_DVGPR, TRAP, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index e305f08925cc6..b8fa6f3fc6867 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -94,6 +94,10 @@ def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; +def AMDGPUtc_return_gfx_ww: SDNode<"AMDGPUISD::TC_RETURN_GFX_WholeWave", AMDGPUTCReturnTP, +[SDNPHasChain, SDNPOptInGlue, SDNPVariadic] +>; + def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index b88df50c6c999..3caeda651f96b 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1125,9 +1125,18 @@ void SIFrameLowering::emitCSRSpillRestores( RestoreWWMRegisters(WWMCalleeSavedRegs); // The original EXEC is the first operand of the return instruction. - const MachineInstr &Return = MBB.instr_back(); - assert(Return.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN && - "Unexpected return inst"); + MachineInstr &Return = MBB.instr_back(); + unsigned Opcode = Return.getOpcode(); + switch (Opcode) { + case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN: + Opcode = AMDGPU::SI_RETURN; + break; + case AMDGPU::SI_TCRETURN_GFX_WholeWave: + Opcode = AMDGPU::SI_TCRETURN_GFX; + break; + default: + llvm_unreachable("Unexpected return inst"); + } Register OrigExec = Return.getOperand(0).getReg(); if (!WWMScratchRegs.empty()) { @@ -1141,6 +1150,11 @@ void SIFrameLowering::emitCSRSpillRestores( // Restore original EXEC. unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec); + + // Drop the first operand and update the opcode. + Return.removeOperand(0); + Return.setDesc(TII->get(Opcode)); + return; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index eec2dbcd2dd4a..cff04e38f3cd3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4131,6 +4131,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, break; } + // If the caller is a whole wave function, we need to use a special opcode + // so we can patch up EXEC. + if (Info->isWholeWaveFunction()) + OPC = AMDGPUISD::TC_RETURN_GFX_WholeWave; + return DAG.getNode(OPC, DL, MVT::Other, Ops); } @@ -5872,6 +5877,7 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); return SplitBB; } + case AMDGPU::SI_TCRETURN_GFX_WholeWave: case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN: { assert(MFI->isWholeWaveFunction()); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index fc469f19c7808..96ad3168c2b41 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2515,7 +2515,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); break; } - case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN: case AMDGPU::SI_RETURN: { const MachineFunction *MF = MBB.getParent(); const GCNSubtarget &ST = MF->getSubtarget(); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 225a073db33d1..ca77573311ffa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -670,6 +670,33 @@ def SI_WHOLE_WAVE_FUNC_RETURN : SPseudoInstSI < def : GCNPat< (AMDGPUwhole_wave_return), (SI_WHOLE_WAVE_FUNC_RETURN (i1 (IMPLICIT_DEF)))>; +// Restores the previous EXEC and otherwise behaves entirely like a SI_TCRETURN. +// This is used for tail calls *from* a whole wave function. Tail calls to +// a whole wave function may use the usual opcodes, depending on the calling +// convention of the caller. +def SI_TCRETURN_GFX_WholeWave : SPseudoInstSI < + (outs), + (ins SReg_1:$orig_exec, Gfx_CCR_SGPR_64:$src0, unknown:$callee, i32imm:$fpdiff)> { + let isCall = 1; + let isTerminator = 1; + let isReturn = 1; + let isBarrier = 1; + let UseNamedOperandTable = 1; + let SchedRW = [WriteBranch]; + let isConvergent = 1; + + // We're going to use custom handling to set the $orig_exec to the correct value. + let usesCustomInserter = 1; +} + +// Generate a SI_TCRETURN_GFX_WholeWave pseudo with a placeholder for its +// argument. It will be filled in by the custom inserter. +def : GCNPat< + (AMDGPUtc_return_gfx_ww i64:$src0, tglobaladdr:$callee, i32:$fpdiff), + (SI_TCRETURN_GFX_WholeWave (i1 (IMPLICIT_DEF)), Gfx_CCR_SGPR_64:$src0, + tglobaladdr:$callee, i32:$fpdiff)>; + + // Return for returning shaders to a shader variant epilog. def SI_RETURN_TO_EPILOG : SPseudoInstSI < (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index e6af1ecc8db77..39a1d24a2a42c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1417,6 +1417,7 @@ constexpr bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { case CallingConv::C: case CallingConv::AMDGPU_Gfx: + case CallingConv::AMDGPU_Gfx_WholeWave: return true; default: return canGuaranteeTCO(CC); diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll index eac0767c88d80..356bf4b3cac28 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll @@ -96,6 +96,672 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) ret void } +define amdgpu_gfx i32 @tail_call_from_gfx(i32 %x, i32 inreg %c) { +; DAGISEL-LABEL: tail_call_from_gfx: +; DAGISEL: ; %bb.0: +; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; DAGISEL-NEXT: s_wait_expcnt 0x0 +; DAGISEL-NEXT: s_wait_samplecnt 0x0 +; DAGISEL-NEXT: s_wait_bvhcnt 0x0 +; DAGISEL-NEXT: s_wait_kmcnt 0x0 +; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 +; DAGISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi +; DAGISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo +; DAGISEL-NEXT: s_wait_alu 0xfffe +; DAGISEL-NEXT: s_setpc_b64 s[0:1] +; +; GISEL-LABEL: tail_call_from_gfx: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GISEL-NEXT: s_wait_expcnt 0x0 +; GISEL-NEXT: s_wait_samplecnt 0x0 +; GISEL-NEXT: s_wait_bvhcnt 0x0 +; GISEL-NEXT: s_wait_kmcnt 0x0 +; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 +; GISEL-NEXT: s_mov_b32 s36, good_callee@abs32@lo +; GISEL-NEXT: s_mov_b32 s37, good_callee@abs32@hi +; GISEL-NEXT: s_wait_alu 0xfffe +; GISEL-NEXT: s_setpc_b64 s[36:37] + %y = add i32 %x, 13 + %ret = tail call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c) + ret i32 %ret +} + +define amdgpu_gfx_whole_wave i32 @tail_call_from_whole_wave(i1 %active, i32 %x, i32 inreg %c) { +; DAGISEL-LABEL: tail_call_from_whole_wave: +; DAGISEL: ; %bb.0: +; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; DAGISEL-NEXT: s_wait_expcnt 0x0 +; DAGISEL-NEXT: s_wait_samplecnt 0x0 +; DAGISEL-NEXT: s_wait_bvhcnt 0x0 +; DAGISEL-NEXT: s_wait_kmcnt 0x0 +; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; DAGISEL-NEXT: s_clause 0xf +; DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 +; DAGISEL-NEXT: s_mov_b32 s37, good_callee@abs32@hi +; DAGISEL-NEXT: s_mov_b32 s36, good_callee@abs32@lo +; DAGISEL-NEXT: s_wait_alu 0xfffe +; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 +; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 +; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 +; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 +; DAGISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16 +; DAGISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20 +; DAGISEL-NEXT: scratch_load_b32 v6, off, s32 offset:24 +; DAGISEL-NEXT: scratch_load_b32 v7, off, s32 offset:28 +; DAGISEL-NEXT: scratch_load_b32 v8, off, s32 offset:32 +; DAGISEL-NEXT: scratch_load_b32 v9, off, s32 offset:36 +; DAGISEL-NEXT: scratch_load_b32 v10, off, s32 offset:40 +; DAGISEL-NEXT: scratch_load_b32 v11, off, s32 offset:44 +; DAGISEL-NEXT: scratch_load_b32 v12, off, s32 offset:48 +; DAGISEL-NEXT: scratch_load_b32 v13, off, s32 offset:52 +; DAGISEL-NEXT: scratch_load_b32 v14, off, s32 offset:56 +; DAGISEL-NEXT: scratch_load_b32 v15, off, s32 offset:60 +; DAGISEL-NEXT: scratch_load_b32 v16, off, s32 offset:64 +; DAGISEL-NEXT: scratch_load_b32 v17, off, s32 offset:68 +; DAGISEL-NEXT: scratch_load_b32 v18, off, s32 offset:72 +; DAGISEL-NEXT: scratch_load_b32 v19, off, s32 offset:76 +; DAGISEL-NEXT: scratch_load_b32 v20, off, s32 offset:80 +; DAGISEL-NEXT: scratch_load_b32 v21, off, s32 offset:84 +; DAGISEL-NEXT: scratch_load_b32 v22, off, s32 offset:88 +; DAGISEL-NEXT: scratch_load_b32 v23, off, s32 offset:92 +; DAGISEL-NEXT: scratch_load_b32 v24, off, s32 offset:96 +; DAGISEL-NEXT: scratch_load_b32 v25, off, s32 offset:100 +; DAGISEL-NEXT: scratch_load_b32 v26, off, s32 offset:104 +; DAGISEL-NEXT: scratch_load_b32 v27, off, s32 offset:108 +; DAGISEL-NEXT: scratch_load_b32 v28, off, s32 offset:112 +; DAGISEL-NEXT: scratch_load_b32 v29, off, s32 offset:116 +; DAGISEL-NEXT: scratch_load_b32 v30, off, s32 offset:120 +; DAGISEL-NEXT: scratch_load_b32 v31, off, s32 offset:124 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v32, off, s32 offset:128 +; DAGISEL-NEXT: scratch_load_b32 v33, off, s32 offset:132 +; DAGISEL-NEXT: scratch_load_b32 v34, off, s32 offset:136 +; DAGISEL-NEXT: scratch_load_b32 v35, off, s32 offset:140 +; DAGISEL-NEXT: scratch_load_b32 v36, off, s32 offset:144 +; DAGISEL-NEXT: scratch_load_b32 v37, off, s32 offset:148 +; DAGISEL-NEXT: scratch_load_b32 v38, off, s32 offset:152 +; DAGISEL-NEXT: scratch_load_b32 v39, off, s32 offset:156 +; DAGISEL-NEXT: scratch_load_b32 v48, off, s32 offset:160 +; DAGISEL-NEXT: scratch_load_b32 v49, off, s32 offset:164 +; DAGISEL-NEXT: scratch_load_b32 v50, off, s32 offset:168 +; DAGISEL-NEXT: scratch_load_b32 v51, off, s32 offset:172 +; DAGISEL-NEXT: scratch_load_b32 v52, off, s32 offset:176 +; DAGISEL-NEXT: scratch_load_b32 v53, off, s32 offset:180 +; DAGISEL-NEXT: scratch_load_b32 v54, off, s32 offset:184 +; DAGISEL-NEXT: scratch_load_b32 v55, off, s32 offset:188 +; DAGISEL-NEXT: scratch_load_b32 v64, off, s32 offset:192 +; DAGISEL-NEXT: scratch_load_b32 v65, off, s32 offset:196 +; DAGISEL-NEXT: scratch_load_b32 v66, off, s32 offset:200 +; DAGISEL-NEXT: scratch_load_b32 v67, off, s32 offset:204 +; DAGISEL-NEXT: scratch_load_b32 v68, off, s32 offset:208 +; DAGISEL-NEXT: scratch_load_b32 v69, off, s32 offset:212 +; DAGISEL-NEXT: scratch_load_b32 v70, off, s32 offset:216 +; DAGISEL-NEXT: scratch_load_b32 v71, off, s32 offset:220 +; DAGISEL-NEXT: scratch_load_b32 v80, off, s32 offset:224 +; DAGISEL-NEXT: scratch_load_b32 v81, off, s32 offset:228 +; DAGISEL-NEXT: scratch_load_b32 v82, off, s32 offset:232 +; DAGISEL-NEXT: scratch_load_b32 v83, off, s32 offset:236 +; DAGISEL-NEXT: scratch_load_b32 v84, off, s32 offset:240 +; DAGISEL-NEXT: scratch_load_b32 v85, off, s32 offset:244 +; DAGISEL-NEXT: scratch_load_b32 v86, off, s32 offset:248 +; DAGISEL-NEXT: scratch_load_b32 v87, off, s32 offset:252 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v96, off, s32 offset:256 +; DAGISEL-NEXT: scratch_load_b32 v97, off, s32 offset:260 +; DAGISEL-NEXT: scratch_load_b32 v98, off, s32 offset:264 +; DAGISEL-NEXT: scratch_load_b32 v99, off, s32 offset:268 +; DAGISEL-NEXT: scratch_load_b32 v100, off, s32 offset:272 +; DAGISEL-NEXT: scratch_load_b32 v101, off, s32 offset:276 +; DAGISEL-NEXT: scratch_load_b32 v102, off, s32 offset:280 +; DAGISEL-NEXT: scratch_load_b32 v103, off, s32 offset:284 +; DAGISEL-NEXT: scratch_load_b32 v112, off, s32 offset:288 +; DAGISEL-NEXT: scratch_load_b32 v113, off, s32 offset:292 +; DAGISEL-NEXT: scratch_load_b32 v114, off, s32 offset:296 +; DAGISEL-NEXT: scratch_load_b32 v115, off, s32 offset:300 +; DAGISEL-NEXT: scratch_load_b32 v116, off, s32 offset:304 +; DAGISEL-NEXT: scratch_load_b32 v117, off, s32 offset:308 +; DAGISEL-NEXT: scratch_load_b32 v118, off, s32 offset:312 +; DAGISEL-NEXT: scratch_load_b32 v119, off, s32 offset:316 +; DAGISEL-NEXT: scratch_load_b32 v128, off, s32 offset:320 +; DAGISEL-NEXT: scratch_load_b32 v129, off, s32 offset:324 +; DAGISEL-NEXT: scratch_load_b32 v130, off, s32 offset:328 +; DAGISEL-NEXT: scratch_load_b32 v131, off, s32 offset:332 +; DAGISEL-NEXT: scratch_load_b32 v132, off, s32 offset:336 +; DAGISEL-NEXT: scratch_load_b32 v133, off, s32 offset:340 +; DAGISEL-NEXT: scratch_load_b32 v134, off, s32 offset:344 +; DAGISEL-NEXT: scratch_load_b32 v135, off, s32 offset:348 +; DAGISEL-NEXT: scratch_load_b32 v144, off, s32 offset:352 +; DAGISEL-NEXT: scratch_load_b32 v145, off, s32 offset:356 +; DAGISEL-NEXT: scratch_load_b32 v146, off, s32 offset:360 +; DAGISEL-NEXT: scratch_load_b32 v147, off, s32 offset:364 +; DAGISEL-NEXT: scratch_load_b32 v148, off, s32 offset:368 +; DAGISEL-NEXT: scratch_load_b32 v149, off, s32 offset:372 +; DAGISEL-NEXT: scratch_load_b32 v150, off, s32 offset:376 +; DAGISEL-NEXT: scratch_load_b32 v151, off, s32 offset:380 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v160, off, s32 offset:384 +; DAGISEL-NEXT: scratch_load_b32 v161, off, s32 offset:388 +; DAGISEL-NEXT: scratch_load_b32 v162, off, s32 offset:392 +; DAGISEL-NEXT: scratch_load_b32 v163, off, s32 offset:396 +; DAGISEL-NEXT: scratch_load_b32 v164, off, s32 offset:400 +; DAGISEL-NEXT: scratch_load_b32 v165, off, s32 offset:404 +; DAGISEL-NEXT: scratch_load_b32 v166, off, s32 offset:408 +; DAGISEL-NEXT: scratch_load_b32 v167, off, s32 offset:412 +; DAGISEL-NEXT: scratch_load_b32 v176, off, s32 offset:416 +; DAGISEL-NEXT: scratch_load_b32 v177, off, s32 offset:420 +; DAGISEL-NEXT: scratch_load_b32 v178, off, s32 offset:424 +; DAGISEL-NEXT: scratch_load_b32 v179, off, s32 offset:428 +; DAGISEL-NEXT: scratch_load_b32 v180, off, s32 offset:432 +; DAGISEL-NEXT: scratch_load_b32 v181, off, s32 offset:436 +; DAGISEL-NEXT: scratch_load_b32 v182, off, s32 offset:440 +; DAGISEL-NEXT: scratch_load_b32 v183, off, s32 offset:444 +; DAGISEL-NEXT: scratch_load_b32 v192, off, s32 offset:448 +; DAGISEL-NEXT: scratch_load_b32 v193, off, s32 offset:452 +; DAGISEL-NEXT: scratch_load_b32 v194, off, s32 offset:456 +; DAGISEL-NEXT: scratch_load_b32 v195, off, s32 offset:460 +; DAGISEL-NEXT: scratch_load_b32 v196, off, s32 offset:464 +; DAGISEL-NEXT: scratch_load_b32 v197, off, s32 offset:468 +; DAGISEL-NEXT: scratch_load_b32 v198, off, s32 offset:472 +; DAGISEL-NEXT: scratch_load_b32 v199, off, s32 offset:476 +; DAGISEL-NEXT: scratch_load_b32 v208, off, s32 offset:480 +; DAGISEL-NEXT: scratch_load_b32 v209, off, s32 offset:484 +; DAGISEL-NEXT: scratch_load_b32 v210, off, s32 offset:488 +; DAGISEL-NEXT: scratch_load_b32 v211, off, s32 offset:492 +; DAGISEL-NEXT: scratch_load_b32 v212, off, s32 offset:496 +; DAGISEL-NEXT: scratch_load_b32 v213, off, s32 offset:500 +; DAGISEL-NEXT: scratch_load_b32 v214, off, s32 offset:504 +; DAGISEL-NEXT: scratch_load_b32 v215, off, s32 offset:508 +; DAGISEL-NEXT: s_clause 0xf +; DAGISEL-NEXT: scratch_load_b32 v224, off, s32 offset:512 +; DAGISEL-NEXT: scratch_load_b32 v225, off, s32 offset:516 +; DAGISEL-NEXT: scratch_load_b32 v226, off, s32 offset:520 +; DAGISEL-NEXT: scratch_load_b32 v227, off, s32 offset:524 +; DAGISEL-NEXT: scratch_load_b32 v228, off, s32 offset:528 +; DAGISEL-NEXT: scratch_load_b32 v229, off, s32 offset:532 +; DAGISEL-NEXT: scratch_load_b32 v230, off, s32 offset:536 +; DAGISEL-NEXT: scratch_load_b32 v231, off, s32 offset:540 +; DAGISEL-NEXT: scratch_load_b32 v240, off, s32 offset:544 +; DAGISEL-NEXT: scratch_load_b32 v241, off, s32 offset:548 +; DAGISEL-NEXT: scratch_load_b32 v242, off, s32 offset:552 +; DAGISEL-NEXT: scratch_load_b32 v243, off, s32 offset:556 +; DAGISEL-NEXT: scratch_load_b32 v244, off, s32 offset:560 +; DAGISEL-NEXT: scratch_load_b32 v245, off, s32 offset:564 +; DAGISEL-NEXT: scratch_load_b32 v246, off, s32 offset:568 +; DAGISEL-NEXT: scratch_load_b32 v247, off, s32 offset:572 +; DAGISEL-NEXT: s_mov_b32 exec_lo, s0 +; DAGISEL-NEXT: s_setpc_b64 s[36:37] +; +; GISEL-LABEL: tail_call_from_whole_wave: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GISEL-NEXT: s_wait_expcnt 0x0 +; GISEL-NEXT: s_wait_samplecnt 0x0 +; GISEL-NEXT: s_wait_bvhcnt 0x0 +; GISEL-NEXT: s_wait_kmcnt 0x0 +; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; GISEL-NEXT: s_clause 0xf +; GISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; GISEL-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 +; GISEL-NEXT: s_mov_b32 s36, good_callee@abs32@lo +; GISEL-NEXT: s_mov_b32 s37, good_callee@abs32@hi +; GISEL-NEXT: s_wait_alu 0xfffe +; GISEL-NEXT: s_xor_b32 exec_lo, s0, -1 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v0, off, s32 +; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 +; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 +; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 +; GISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16 +; GISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20 +; GISEL-NEXT: scratch_load_b32 v6, off, s32 offset:24 +; GISEL-NEXT: scratch_load_b32 v7, off, s32 offset:28 +; GISEL-NEXT: scratch_load_b32 v8, off, s32 offset:32 +; GISEL-NEXT: scratch_load_b32 v9, off, s32 offset:36 +; GISEL-NEXT: scratch_load_b32 v10, off, s32 offset:40 +; GISEL-NEXT: scratch_load_b32 v11, off, s32 offset:44 +; GISEL-NEXT: scratch_load_b32 v12, off, s32 offset:48 +; GISEL-NEXT: scratch_load_b32 v13, off, s32 offset:52 +; GISEL-NEXT: scratch_load_b32 v14, off, s32 offset:56 +; GISEL-NEXT: scratch_load_b32 v15, off, s32 offset:60 +; GISEL-NEXT: scratch_load_b32 v16, off, s32 offset:64 +; GISEL-NEXT: scratch_load_b32 v17, off, s32 offset:68 +; GISEL-NEXT: scratch_load_b32 v18, off, s32 offset:72 +; GISEL-NEXT: scratch_load_b32 v19, off, s32 offset:76 +; GISEL-NEXT: scratch_load_b32 v20, off, s32 offset:80 +; GISEL-NEXT: scratch_load_b32 v21, off, s32 offset:84 +; GISEL-NEXT: scratch_load_b32 v22, off, s32 offset:88 +; GISEL-NEXT: scratch_load_b32 v23, off, s32 offset:92 +; GISEL-NEXT: scratch_load_b32 v24, off, s32 offset:96 +; GISEL-NEXT: scratch_load_b32 v25, off, s32 offset:100 +; GISEL-NEXT: scratch_load_b32 v26, off, s32 offset:104 +; GISEL-NEXT: scratch_load_b32 v27, off, s32 offset:108 +; GISEL-NEXT: scratch_load_b32 v28, off, s32 offset:112 +; GISEL-NEXT: scratch_load_b32 v29, off, s32 offset:116 +; GISEL-NEXT: scratch_load_b32 v30, off, s32 offset:120 +; GISEL-NEXT: scratch_load_b32 v31, off, s32 offset:124 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v32, off, s32 offset:128 +; GISEL-NEXT: scratch_load_b32 v33, off, s32 offset:132 +; GISEL-NEXT: scratch_load_b32 v34, off, s32 offset:136 +; GISEL-NEXT: scratch_load_b32 v35, off, s32 offset:140 +; GISEL-NEXT: scratch_load_b32 v36, off, s32 offset:144 +; GISEL-NEXT: scratch_load_b32 v37, off, s32 offset:148 +; GISEL-NEXT: scratch_load_b32 v38, off, s32 offset:152 +; GISEL-NEXT: scratch_load_b32 v39, off, s32 offset:156 +; GISEL-NEXT: scratch_load_b32 v48, off, s32 offset:160 +; GISEL-NEXT: scratch_load_b32 v49, off, s32 offset:164 +; GISEL-NEXT: scratch_load_b32 v50, off, s32 offset:168 +; GISEL-NEXT: scratch_load_b32 v51, off, s32 offset:172 +; GISEL-NEXT: scratch_load_b32 v52, off, s32 offset:176 +; GISEL-NEXT: scratch_load_b32 v53, off, s32 offset:180 +; GISEL-NEXT: scratch_load_b32 v54, off, s32 offset:184 +; GISEL-NEXT: scratch_load_b32 v55, off, s32 offset:188 +; GISEL-NEXT: scratch_load_b32 v64, off, s32 offset:192 +; GISEL-NEXT: scratch_load_b32 v65, off, s32 offset:196 +; GISEL-NEXT: scratch_load_b32 v66, off, s32 offset:200 +; GISEL-NEXT: scratch_load_b32 v67, off, s32 offset:204 +; GISEL-NEXT: scratch_load_b32 v68, off, s32 offset:208 +; GISEL-NEXT: scratch_load_b32 v69, off, s32 offset:212 +; GISEL-NEXT: scratch_load_b32 v70, off, s32 offset:216 +; GISEL-NEXT: scratch_load_b32 v71, off, s32 offset:220 +; GISEL-NEXT: scratch_load_b32 v80, off, s32 offset:224 +; GISEL-NEXT: scratch_load_b32 v81, off, s32 offset:228 +; GISEL-NEXT: scratch_load_b32 v82, off, s32 offset:232 +; GISEL-NEXT: scratch_load_b32 v83, off, s32 offset:236 +; GISEL-NEXT: scratch_load_b32 v84, off, s32 offset:240 +; GISEL-NEXT: scratch_load_b32 v85, off, s32 offset:244 +; GISEL-NEXT: scratch_load_b32 v86, off, s32 offset:248 +; GISEL-NEXT: scratch_load_b32 v87, off, s32 offset:252 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v96, off, s32 offset:256 +; GISEL-NEXT: scratch_load_b32 v97, off, s32 offset:260 +; GISEL-NEXT: scratch_load_b32 v98, off, s32 offset:264 +; GISEL-NEXT: scratch_load_b32 v99, off, s32 offset:268 +; GISEL-NEXT: scratch_load_b32 v100, off, s32 offset:272 +; GISEL-NEXT: scratch_load_b32 v101, off, s32 offset:276 +; GISEL-NEXT: scratch_load_b32 v102, off, s32 offset:280 +; GISEL-NEXT: scratch_load_b32 v103, off, s32 offset:284 +; GISEL-NEXT: scratch_load_b32 v112, off, s32 offset:288 +; GISEL-NEXT: scratch_load_b32 v113, off, s32 offset:292 +; GISEL-NEXT: scratch_load_b32 v114, off, s32 offset:296 +; GISEL-NEXT: scratch_load_b32 v115, off, s32 offset:300 +; GISEL-NEXT: scratch_load_b32 v116, off, s32 offset:304 +; GISEL-NEXT: scratch_load_b32 v117, off, s32 offset:308 +; GISEL-NEXT: scratch_load_b32 v118, off, s32 offset:312 +; GISEL-NEXT: scratch_load_b32 v119, off, s32 offset:316 +; GISEL-NEXT: scratch_load_b32 v128, off, s32 offset:320 +; GISEL-NEXT: scratch_load_b32 v129, off, s32 offset:324 +; GISEL-NEXT: scratch_load_b32 v130, off, s32 offset:328 +; GISEL-NEXT: scratch_load_b32 v131, off, s32 offset:332 +; GISEL-NEXT: scratch_load_b32 v132, off, s32 offset:336 +; GISEL-NEXT: scratch_load_b32 v133, off, s32 offset:340 +; GISEL-NEXT: scratch_load_b32 v134, off, s32 offset:344 +; GISEL-NEXT: scratch_load_b32 v135, off, s32 offset:348 +; GISEL-NEXT: scratch_load_b32 v144, off, s32 offset:352 +; GISEL-NEXT: scratch_load_b32 v145, off, s32 offset:356 +; GISEL-NEXT: scratch_load_b32 v146, off, s32 offset:360 +; GISEL-NEXT: scratch_load_b32 v147, off, s32 offset:364 +; GISEL-NEXT: scratch_load_b32 v148, off, s32 offset:368 +; GISEL-NEXT: scratch_load_b32 v149, off, s32 offset:372 +; GISEL-NEXT: scratch_load_b32 v150, off, s32 offset:376 +; GISEL-NEXT: scratch_load_b32 v151, off, s32 offset:380 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v160, off, s32 offset:384 +; GISEL-NEXT: scratch_load_b32 v161, off, s32 offset:388 +; GISEL-NEXT: scratch_load_b32 v162, off, s32 offset:392 +; GISEL-NEXT: scratch_load_b32 v163, off, s32 offset:396 +; GISEL-NEXT: scratch_load_b32 v164, off, s32 offset:400 +; GISEL-NEXT: scratch_load_b32 v165, off, s32 offset:404 +; GISEL-NEXT: scratch_load_b32 v166, off, s32 offset:408 +; GISEL-NEXT: scratch_load_b32 v167, off, s32 offset:412 +; GISEL-NEXT: scratch_load_b32 v176, off, s32 offset:416 +; GISEL-NEXT: scratch_load_b32 v177, off, s32 offset:420 +; GISEL-NEXT: scratch_load_b32 v178, off, s32 offset:424 +; GISEL-NEXT: scratch_load_b32 v179, off, s32 offset:428 +; GISEL-NEXT: scratch_load_b32 v180, off, s32 offset:432 +; GISEL-NEXT: scratch_load_b32 v181, off, s32 offset:436 +; GISEL-NEXT: scratch_load_b32 v182, off, s32 offset:440 +; GISEL-NEXT: scratch_load_b32 v183, off, s32 offset:444 +; GISEL-NEXT: scratch_load_b32 v192, off, s32 offset:448 +; GISEL-NEXT: scratch_load_b32 v193, off, s32 offset:452 +; GISEL-NEXT: scratch_load_b32 v194, off, s32 offset:456 +; GISEL-NEXT: scratch_load_b32 v195, off, s32 offset:460 +; GISEL-NEXT: scratch_load_b32 v196, off, s32 offset:464 +; GISEL-NEXT: scratch_load_b32 v197, off, s32 offset:468 +; GISEL-NEXT: scratch_load_b32 v198, off, s32 offset:472 +; GISEL-NEXT: scratch_load_b32 v199, off, s32 offset:476 +; GISEL-NEXT: scratch_load_b32 v208, off, s32 offset:480 +; GISEL-NEXT: scratch_load_b32 v209, off, s32 offset:484 +; GISEL-NEXT: scratch_load_b32 v210, off, s32 offset:488 +; GISEL-NEXT: scratch_load_b32 v211, off, s32 offset:492 +; GISEL-NEXT: scratch_load_b32 v212, off, s32 offset:496 +; GISEL-NEXT: scratch_load_b32 v213, off, s32 offset:500 +; GISEL-NEXT: scratch_load_b32 v214, off, s32 offset:504 +; GISEL-NEXT: scratch_load_b32 v215, off, s32 offset:508 +; GISEL-NEXT: s_clause 0xf +; GISEL-NEXT: scratch_load_b32 v224, off, s32 offset:512 +; GISEL-NEXT: scratch_load_b32 v225, off, s32 offset:516 +; GISEL-NEXT: scratch_load_b32 v226, off, s32 offset:520 +; GISEL-NEXT: scratch_load_b32 v227, off, s32 offset:524 +; GISEL-NEXT: scratch_load_b32 v228, off, s32 offset:528 +; GISEL-NEXT: scratch_load_b32 v229, off, s32 offset:532 +; GISEL-NEXT: scratch_load_b32 v230, off, s32 offset:536 +; GISEL-NEXT: scratch_load_b32 v231, off, s32 offset:540 +; GISEL-NEXT: scratch_load_b32 v240, off, s32 offset:544 +; GISEL-NEXT: scratch_load_b32 v241, off, s32 offset:548 +; GISEL-NEXT: scratch_load_b32 v242, off, s32 offset:552 +; GISEL-NEXT: scratch_load_b32 v243, off, s32 offset:556 +; GISEL-NEXT: scratch_load_b32 v244, off, s32 offset:560 +; GISEL-NEXT: scratch_load_b32 v245, off, s32 offset:564 +; GISEL-NEXT: scratch_load_b32 v246, off, s32 offset:568 +; GISEL-NEXT: scratch_load_b32 v247, off, s32 offset:572 +; GISEL-NEXT: s_mov_b32 exec_lo, s0 +; GISEL-NEXT: s_setpc_b64 s[36:37] + %y = add i32 %x, 13 + %ret = tail call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c) + ret i32 %ret +} + declare amdgpu_gfx_whole_wave void @void_callee(i1 %active, i32 %x) define amdgpu_gfx void @ret_void(i32 %x) { diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir index 93f489170cea0..adba762235d8c 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir @@ -33,7 +33,7 @@ body: | ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0 renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 14, implicit $exec SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0 @@ -69,7 +69,7 @@ body: | ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0 + ; CHECK-NEXT: SI_RETURN renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec $vgpr40 = V_MOV_B32_e32 14, implicit $exec SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0 @@ -110,7 +110,7 @@ body: | ; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc ; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo + ; CHECK-NEXT: SI_RETURN $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192 renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec $sgpr20 = S_MOV_B32 14, implicit $exec @@ -151,7 +151,7 @@ body: | ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0 ; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo + ; CHECK-NEXT: SI_RETURN $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191 renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec $sgpr20 = S_MOV_B32 14, implicit $exec @@ -207,7 +207,7 @@ body: | ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo + ; CHECK-NEXT: SI_RETURN $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191 renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20 @@ -265,7 +265,7 @@ body: | ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr3 - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr3 + ; CHECK-NEXT: SI_RETURN $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191 renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20 @@ -322,7 +322,7 @@ body: | ; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) ; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0 renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 14, implicit $exec S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42 @@ -363,7 +363,7 @@ body: | ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40 ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0 renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40 SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0 @@ -422,7 +422,7 @@ body: | ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo - ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo, implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll index 3c8478c5a885b..9ca57c6a2a246 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll @@ -2413,6 +2413,1275 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ret <2 x half> %ret } +define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %active, <2 x half> %x, <2 x half> %y) { + ; This should not be turned into a tail call. +; DAGISEL-LABEL: tail_call_gfx_from_whole_wave: +; DAGISEL: ; %bb.0: +; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; DAGISEL-NEXT: s_wait_expcnt 0x0 +; DAGISEL-NEXT: s_wait_samplecnt 0x0 +; DAGISEL-NEXT: s_wait_bvhcnt 0x0 +; DAGISEL-NEXT: s_wait_kmcnt 0x0 +; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; DAGISEL-NEXT: s_clause 0xf +; DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-NEXT: v_mov_b32_e32 v2, v0 +; DAGISEL-NEXT: s_mov_b32 s37, gfx_callee@abs32@hi +; DAGISEL-NEXT: s_mov_b32 s36, gfx_callee@abs32@lo +; DAGISEL-NEXT: v_swap_b32 v0, v1 +; DAGISEL-NEXT: s_wait_alu 0xfffe +; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v0, off, s32 +; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 +; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 +; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 +; DAGISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16 +; DAGISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20 +; DAGISEL-NEXT: scratch_load_b32 v6, off, s32 offset:24 +; DAGISEL-NEXT: scratch_load_b32 v7, off, s32 offset:28 +; DAGISEL-NEXT: scratch_load_b32 v8, off, s32 offset:32 +; DAGISEL-NEXT: scratch_load_b32 v9, off, s32 offset:36 +; DAGISEL-NEXT: scratch_load_b32 v10, off, s32 offset:40 +; DAGISEL-NEXT: scratch_load_b32 v11, off, s32 offset:44 +; DAGISEL-NEXT: scratch_load_b32 v12, off, s32 offset:48 +; DAGISEL-NEXT: scratch_load_b32 v13, off, s32 offset:52 +; DAGISEL-NEXT: scratch_load_b32 v14, off, s32 offset:56 +; DAGISEL-NEXT: scratch_load_b32 v15, off, s32 offset:60 +; DAGISEL-NEXT: scratch_load_b32 v16, off, s32 offset:64 +; DAGISEL-NEXT: scratch_load_b32 v17, off, s32 offset:68 +; DAGISEL-NEXT: scratch_load_b32 v18, off, s32 offset:72 +; DAGISEL-NEXT: scratch_load_b32 v19, off, s32 offset:76 +; DAGISEL-NEXT: scratch_load_b32 v20, off, s32 offset:80 +; DAGISEL-NEXT: scratch_load_b32 v21, off, s32 offset:84 +; DAGISEL-NEXT: scratch_load_b32 v22, off, s32 offset:88 +; DAGISEL-NEXT: scratch_load_b32 v23, off, s32 offset:92 +; DAGISEL-NEXT: scratch_load_b32 v24, off, s32 offset:96 +; DAGISEL-NEXT: scratch_load_b32 v25, off, s32 offset:100 +; DAGISEL-NEXT: scratch_load_b32 v26, off, s32 offset:104 +; DAGISEL-NEXT: scratch_load_b32 v27, off, s32 offset:108 +; DAGISEL-NEXT: scratch_load_b32 v28, off, s32 offset:112 +; DAGISEL-NEXT: scratch_load_b32 v29, off, s32 offset:116 +; DAGISEL-NEXT: scratch_load_b32 v30, off, s32 offset:120 +; DAGISEL-NEXT: scratch_load_b32 v31, off, s32 offset:124 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v32, off, s32 offset:128 +; DAGISEL-NEXT: scratch_load_b32 v33, off, s32 offset:132 +; DAGISEL-NEXT: scratch_load_b32 v34, off, s32 offset:136 +; DAGISEL-NEXT: scratch_load_b32 v35, off, s32 offset:140 +; DAGISEL-NEXT: scratch_load_b32 v36, off, s32 offset:144 +; DAGISEL-NEXT: scratch_load_b32 v37, off, s32 offset:148 +; DAGISEL-NEXT: scratch_load_b32 v38, off, s32 offset:152 +; DAGISEL-NEXT: scratch_load_b32 v39, off, s32 offset:156 +; DAGISEL-NEXT: scratch_load_b32 v48, off, s32 offset:160 +; DAGISEL-NEXT: scratch_load_b32 v49, off, s32 offset:164 +; DAGISEL-NEXT: scratch_load_b32 v50, off, s32 offset:168 +; DAGISEL-NEXT: scratch_load_b32 v51, off, s32 offset:172 +; DAGISEL-NEXT: scratch_load_b32 v52, off, s32 offset:176 +; DAGISEL-NEXT: scratch_load_b32 v53, off, s32 offset:180 +; DAGISEL-NEXT: scratch_load_b32 v54, off, s32 offset:184 +; DAGISEL-NEXT: scratch_load_b32 v55, off, s32 offset:188 +; DAGISEL-NEXT: scratch_load_b32 v64, off, s32 offset:192 +; DAGISEL-NEXT: scratch_load_b32 v65, off, s32 offset:196 +; DAGISEL-NEXT: scratch_load_b32 v66, off, s32 offset:200 +; DAGISEL-NEXT: scratch_load_b32 v67, off, s32 offset:204 +; DAGISEL-NEXT: scratch_load_b32 v68, off, s32 offset:208 +; DAGISEL-NEXT: scratch_load_b32 v69, off, s32 offset:212 +; DAGISEL-NEXT: scratch_load_b32 v70, off, s32 offset:216 +; DAGISEL-NEXT: scratch_load_b32 v71, off, s32 offset:220 +; DAGISEL-NEXT: scratch_load_b32 v80, off, s32 offset:224 +; DAGISEL-NEXT: scratch_load_b32 v81, off, s32 offset:228 +; DAGISEL-NEXT: scratch_load_b32 v82, off, s32 offset:232 +; DAGISEL-NEXT: scratch_load_b32 v83, off, s32 offset:236 +; DAGISEL-NEXT: scratch_load_b32 v84, off, s32 offset:240 +; DAGISEL-NEXT: scratch_load_b32 v85, off, s32 offset:244 +; DAGISEL-NEXT: scratch_load_b32 v86, off, s32 offset:248 +; DAGISEL-NEXT: scratch_load_b32 v87, off, s32 offset:252 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v96, off, s32 offset:256 +; DAGISEL-NEXT: scratch_load_b32 v97, off, s32 offset:260 +; DAGISEL-NEXT: scratch_load_b32 v98, off, s32 offset:264 +; DAGISEL-NEXT: scratch_load_b32 v99, off, s32 offset:268 +; DAGISEL-NEXT: scratch_load_b32 v100, off, s32 offset:272 +; DAGISEL-NEXT: scratch_load_b32 v101, off, s32 offset:276 +; DAGISEL-NEXT: scratch_load_b32 v102, off, s32 offset:280 +; DAGISEL-NEXT: scratch_load_b32 v103, off, s32 offset:284 +; DAGISEL-NEXT: scratch_load_b32 v112, off, s32 offset:288 +; DAGISEL-NEXT: scratch_load_b32 v113, off, s32 offset:292 +; DAGISEL-NEXT: scratch_load_b32 v114, off, s32 offset:296 +; DAGISEL-NEXT: scratch_load_b32 v115, off, s32 offset:300 +; DAGISEL-NEXT: scratch_load_b32 v116, off, s32 offset:304 +; DAGISEL-NEXT: scratch_load_b32 v117, off, s32 offset:308 +; DAGISEL-NEXT: scratch_load_b32 v118, off, s32 offset:312 +; DAGISEL-NEXT: scratch_load_b32 v119, off, s32 offset:316 +; DAGISEL-NEXT: scratch_load_b32 v128, off, s32 offset:320 +; DAGISEL-NEXT: scratch_load_b32 v129, off, s32 offset:324 +; DAGISEL-NEXT: scratch_load_b32 v130, off, s32 offset:328 +; DAGISEL-NEXT: scratch_load_b32 v131, off, s32 offset:332 +; DAGISEL-NEXT: scratch_load_b32 v132, off, s32 offset:336 +; DAGISEL-NEXT: scratch_load_b32 v133, off, s32 offset:340 +; DAGISEL-NEXT: scratch_load_b32 v134, off, s32 offset:344 +; DAGISEL-NEXT: scratch_load_b32 v135, off, s32 offset:348 +; DAGISEL-NEXT: scratch_load_b32 v144, off, s32 offset:352 +; DAGISEL-NEXT: scratch_load_b32 v145, off, s32 offset:356 +; DAGISEL-NEXT: scratch_load_b32 v146, off, s32 offset:360 +; DAGISEL-NEXT: scratch_load_b32 v147, off, s32 offset:364 +; DAGISEL-NEXT: scratch_load_b32 v148, off, s32 offset:368 +; DAGISEL-NEXT: scratch_load_b32 v149, off, s32 offset:372 +; DAGISEL-NEXT: scratch_load_b32 v150, off, s32 offset:376 +; DAGISEL-NEXT: scratch_load_b32 v151, off, s32 offset:380 +; DAGISEL-NEXT: s_clause 0x1f +; DAGISEL-NEXT: scratch_load_b32 v160, off, s32 offset:384 +; DAGISEL-NEXT: scratch_load_b32 v161, off, s32 offset:388 +; DAGISEL-NEXT: scratch_load_b32 v162, off, s32 offset:392 +; DAGISEL-NEXT: scratch_load_b32 v163, off, s32 offset:396 +; DAGISEL-NEXT: scratch_load_b32 v164, off, s32 offset:400 +; DAGISEL-NEXT: scratch_load_b32 v165, off, s32 offset:404 +; DAGISEL-NEXT: scratch_load_b32 v166, off, s32 offset:408 +; DAGISEL-NEXT: scratch_load_b32 v167, off, s32 offset:412 +; DAGISEL-NEXT: scratch_load_b32 v176, off, s32 offset:416 +; DAGISEL-NEXT: scratch_load_b32 v177, off, s32 offset:420 +; DAGISEL-NEXT: scratch_load_b32 v178, off, s32 offset:424 +; DAGISEL-NEXT: scratch_load_b32 v179, off, s32 offset:428 +; DAGISEL-NEXT: scratch_load_b32 v180, off, s32 offset:432 +; DAGISEL-NEXT: scratch_load_b32 v181, off, s32 offset:436 +; DAGISEL-NEXT: scratch_load_b32 v182, off, s32 offset:440 +; DAGISEL-NEXT: scratch_load_b32 v183, off, s32 offset:444 +; DAGISEL-NEXT: scratch_load_b32 v192, off, s32 offset:448 +; DAGISEL-NEXT: scratch_load_b32 v193, off, s32 offset:452 +; DAGISEL-NEXT: scratch_load_b32 v194, off, s32 offset:456 +; DAGISEL-NEXT: scratch_load_b32 v195, off, s32 offset:460 +; DAGISEL-NEXT: scratch_load_b32 v196, off, s32 offset:464 +; DAGISEL-NEXT: scratch_load_b32 v197, off, s32 offset:468 +; DAGISEL-NEXT: scratch_load_b32 v198, off, s32 offset:472 +; DAGISEL-NEXT: scratch_load_b32 v199, off, s32 offset:476 +; DAGISEL-NEXT: scratch_load_b32 v208, off, s32 offset:480 +; DAGISEL-NEXT: scratch_load_b32 v209, off, s32 offset:484 +; DAGISEL-NEXT: scratch_load_b32 v210, off, s32 offset:488 +; DAGISEL-NEXT: scratch_load_b32 v211, off, s32 offset:492 +; DAGISEL-NEXT: scratch_load_b32 v212, off, s32 offset:496 +; DAGISEL-NEXT: scratch_load_b32 v213, off, s32 offset:500 +; DAGISEL-NEXT: scratch_load_b32 v214, off, s32 offset:504 +; DAGISEL-NEXT: scratch_load_b32 v215, off, s32 offset:508 +; DAGISEL-NEXT: s_clause 0xf +; DAGISEL-NEXT: scratch_load_b32 v224, off, s32 offset:512 +; DAGISEL-NEXT: scratch_load_b32 v225, off, s32 offset:516 +; DAGISEL-NEXT: scratch_load_b32 v226, off, s32 offset:520 +; DAGISEL-NEXT: scratch_load_b32 v227, off, s32 offset:524 +; DAGISEL-NEXT: scratch_load_b32 v228, off, s32 offset:528 +; DAGISEL-NEXT: scratch_load_b32 v229, off, s32 offset:532 +; DAGISEL-NEXT: scratch_load_b32 v230, off, s32 offset:536 +; DAGISEL-NEXT: scratch_load_b32 v231, off, s32 offset:540 +; DAGISEL-NEXT: scratch_load_b32 v240, off, s32 offset:544 +; DAGISEL-NEXT: scratch_load_b32 v241, off, s32 offset:548 +; DAGISEL-NEXT: scratch_load_b32 v242, off, s32 offset:552 +; DAGISEL-NEXT: scratch_load_b32 v243, off, s32 offset:556 +; DAGISEL-NEXT: scratch_load_b32 v244, off, s32 offset:560 +; DAGISEL-NEXT: scratch_load_b32 v245, off, s32 offset:564 +; DAGISEL-NEXT: scratch_load_b32 v246, off, s32 offset:568 +; DAGISEL-NEXT: scratch_load_b32 v247, off, s32 offset:572 +; DAGISEL-NEXT: s_mov_b32 exec_lo, s0 +; DAGISEL-NEXT: s_setpc_b64 s[36:37] +; +; GISEL-LABEL: tail_call_gfx_from_whole_wave: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GISEL-NEXT: s_wait_expcnt 0x0 +; GISEL-NEXT: s_wait_samplecnt 0x0 +; GISEL-NEXT: s_wait_bvhcnt 0x0 +; GISEL-NEXT: s_wait_kmcnt 0x0 +; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; GISEL-NEXT: s_clause 0xf +; GISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; GISEL-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-NEXT: v_mov_b32_e32 v2, v0 +; GISEL-NEXT: v_swap_b32 v0, v1 +; GISEL-NEXT: s_mov_b32 s36, gfx_callee@abs32@lo +; GISEL-NEXT: s_mov_b32 s37, gfx_callee@abs32@hi +; GISEL-NEXT: s_wait_alu 0xfffe +; GISEL-NEXT: s_xor_b32 exec_lo, s0, -1 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v0, off, s32 +; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4 +; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8 +; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12 +; GISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16 +; GISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20 +; GISEL-NEXT: scratch_load_b32 v6, off, s32 offset:24 +; GISEL-NEXT: scratch_load_b32 v7, off, s32 offset:28 +; GISEL-NEXT: scratch_load_b32 v8, off, s32 offset:32 +; GISEL-NEXT: scratch_load_b32 v9, off, s32 offset:36 +; GISEL-NEXT: scratch_load_b32 v10, off, s32 offset:40 +; GISEL-NEXT: scratch_load_b32 v11, off, s32 offset:44 +; GISEL-NEXT: scratch_load_b32 v12, off, s32 offset:48 +; GISEL-NEXT: scratch_load_b32 v13, off, s32 offset:52 +; GISEL-NEXT: scratch_load_b32 v14, off, s32 offset:56 +; GISEL-NEXT: scratch_load_b32 v15, off, s32 offset:60 +; GISEL-NEXT: scratch_load_b32 v16, off, s32 offset:64 +; GISEL-NEXT: scratch_load_b32 v17, off, s32 offset:68 +; GISEL-NEXT: scratch_load_b32 v18, off, s32 offset:72 +; GISEL-NEXT: scratch_load_b32 v19, off, s32 offset:76 +; GISEL-NEXT: scratch_load_b32 v20, off, s32 offset:80 +; GISEL-NEXT: scratch_load_b32 v21, off, s32 offset:84 +; GISEL-NEXT: scratch_load_b32 v22, off, s32 offset:88 +; GISEL-NEXT: scratch_load_b32 v23, off, s32 offset:92 +; GISEL-NEXT: scratch_load_b32 v24, off, s32 offset:96 +; GISEL-NEXT: scratch_load_b32 v25, off, s32 offset:100 +; GISEL-NEXT: scratch_load_b32 v26, off, s32 offset:104 +; GISEL-NEXT: scratch_load_b32 v27, off, s32 offset:108 +; GISEL-NEXT: scratch_load_b32 v28, off, s32 offset:112 +; GISEL-NEXT: scratch_load_b32 v29, off, s32 offset:116 +; GISEL-NEXT: scratch_load_b32 v30, off, s32 offset:120 +; GISEL-NEXT: scratch_load_b32 v31, off, s32 offset:124 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v32, off, s32 offset:128 +; GISEL-NEXT: scratch_load_b32 v33, off, s32 offset:132 +; GISEL-NEXT: scratch_load_b32 v34, off, s32 offset:136 +; GISEL-NEXT: scratch_load_b32 v35, off, s32 offset:140 +; GISEL-NEXT: scratch_load_b32 v36, off, s32 offset:144 +; GISEL-NEXT: scratch_load_b32 v37, off, s32 offset:148 +; GISEL-NEXT: scratch_load_b32 v38, off, s32 offset:152 +; GISEL-NEXT: scratch_load_b32 v39, off, s32 offset:156 +; GISEL-NEXT: scratch_load_b32 v48, off, s32 offset:160 +; GISEL-NEXT: scratch_load_b32 v49, off, s32 offset:164 +; GISEL-NEXT: scratch_load_b32 v50, off, s32 offset:168 +; GISEL-NEXT: scratch_load_b32 v51, off, s32 offset:172 +; GISEL-NEXT: scratch_load_b32 v52, off, s32 offset:176 +; GISEL-NEXT: scratch_load_b32 v53, off, s32 offset:180 +; GISEL-NEXT: scratch_load_b32 v54, off, s32 offset:184 +; GISEL-NEXT: scratch_load_b32 v55, off, s32 offset:188 +; GISEL-NEXT: scratch_load_b32 v64, off, s32 offset:192 +; GISEL-NEXT: scratch_load_b32 v65, off, s32 offset:196 +; GISEL-NEXT: scratch_load_b32 v66, off, s32 offset:200 +; GISEL-NEXT: scratch_load_b32 v67, off, s32 offset:204 +; GISEL-NEXT: scratch_load_b32 v68, off, s32 offset:208 +; GISEL-NEXT: scratch_load_b32 v69, off, s32 offset:212 +; GISEL-NEXT: scratch_load_b32 v70, off, s32 offset:216 +; GISEL-NEXT: scratch_load_b32 v71, off, s32 offset:220 +; GISEL-NEXT: scratch_load_b32 v80, off, s32 offset:224 +; GISEL-NEXT: scratch_load_b32 v81, off, s32 offset:228 +; GISEL-NEXT: scratch_load_b32 v82, off, s32 offset:232 +; GISEL-NEXT: scratch_load_b32 v83, off, s32 offset:236 +; GISEL-NEXT: scratch_load_b32 v84, off, s32 offset:240 +; GISEL-NEXT: scratch_load_b32 v85, off, s32 offset:244 +; GISEL-NEXT: scratch_load_b32 v86, off, s32 offset:248 +; GISEL-NEXT: scratch_load_b32 v87, off, s32 offset:252 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v96, off, s32 offset:256 +; GISEL-NEXT: scratch_load_b32 v97, off, s32 offset:260 +; GISEL-NEXT: scratch_load_b32 v98, off, s32 offset:264 +; GISEL-NEXT: scratch_load_b32 v99, off, s32 offset:268 +; GISEL-NEXT: scratch_load_b32 v100, off, s32 offset:272 +; GISEL-NEXT: scratch_load_b32 v101, off, s32 offset:276 +; GISEL-NEXT: scratch_load_b32 v102, off, s32 offset:280 +; GISEL-NEXT: scratch_load_b32 v103, off, s32 offset:284 +; GISEL-NEXT: scratch_load_b32 v112, off, s32 offset:288 +; GISEL-NEXT: scratch_load_b32 v113, off, s32 offset:292 +; GISEL-NEXT: scratch_load_b32 v114, off, s32 offset:296 +; GISEL-NEXT: scratch_load_b32 v115, off, s32 offset:300 +; GISEL-NEXT: scratch_load_b32 v116, off, s32 offset:304 +; GISEL-NEXT: scratch_load_b32 v117, off, s32 offset:308 +; GISEL-NEXT: scratch_load_b32 v118, off, s32 offset:312 +; GISEL-NEXT: scratch_load_b32 v119, off, s32 offset:316 +; GISEL-NEXT: scratch_load_b32 v128, off, s32 offset:320 +; GISEL-NEXT: scratch_load_b32 v129, off, s32 offset:324 +; GISEL-NEXT: scratch_load_b32 v130, off, s32 offset:328 +; GISEL-NEXT: scratch_load_b32 v131, off, s32 offset:332 +; GISEL-NEXT: scratch_load_b32 v132, off, s32 offset:336 +; GISEL-NEXT: scratch_load_b32 v133, off, s32 offset:340 +; GISEL-NEXT: scratch_load_b32 v134, off, s32 offset:344 +; GISEL-NEXT: scratch_load_b32 v135, off, s32 offset:348 +; GISEL-NEXT: scratch_load_b32 v144, off, s32 offset:352 +; GISEL-NEXT: scratch_load_b32 v145, off, s32 offset:356 +; GISEL-NEXT: scratch_load_b32 v146, off, s32 offset:360 +; GISEL-NEXT: scratch_load_b32 v147, off, s32 offset:364 +; GISEL-NEXT: scratch_load_b32 v148, off, s32 offset:368 +; GISEL-NEXT: scratch_load_b32 v149, off, s32 offset:372 +; GISEL-NEXT: scratch_load_b32 v150, off, s32 offset:376 +; GISEL-NEXT: scratch_load_b32 v151, off, s32 offset:380 +; GISEL-NEXT: s_clause 0x1f +; GISEL-NEXT: scratch_load_b32 v160, off, s32 offset:384 +; GISEL-NEXT: scratch_load_b32 v161, off, s32 offset:388 +; GISEL-NEXT: scratch_load_b32 v162, off, s32 offset:392 +; GISEL-NEXT: scratch_load_b32 v163, off, s32 offset:396 +; GISEL-NEXT: scratch_load_b32 v164, off, s32 offset:400 +; GISEL-NEXT: scratch_load_b32 v165, off, s32 offset:404 +; GISEL-NEXT: scratch_load_b32 v166, off, s32 offset:408 +; GISEL-NEXT: scratch_load_b32 v167, off, s32 offset:412 +; GISEL-NEXT: scratch_load_b32 v176, off, s32 offset:416 +; GISEL-NEXT: scratch_load_b32 v177, off, s32 offset:420 +; GISEL-NEXT: scratch_load_b32 v178, off, s32 offset:424 +; GISEL-NEXT: scratch_load_b32 v179, off, s32 offset:428 +; GISEL-NEXT: scratch_load_b32 v180, off, s32 offset:432 +; GISEL-NEXT: scratch_load_b32 v181, off, s32 offset:436 +; GISEL-NEXT: scratch_load_b32 v182, off, s32 offset:440 +; GISEL-NEXT: scratch_load_b32 v183, off, s32 offset:444 +; GISEL-NEXT: scratch_load_b32 v192, off, s32 offset:448 +; GISEL-NEXT: scratch_load_b32 v193, off, s32 offset:452 +; GISEL-NEXT: scratch_load_b32 v194, off, s32 offset:456 +; GISEL-NEXT: scratch_load_b32 v195, off, s32 offset:460 +; GISEL-NEXT: scratch_load_b32 v196, off, s32 offset:464 +; GISEL-NEXT: scratch_load_b32 v197, off, s32 offset:468 +; GISEL-NEXT: scratch_load_b32 v198, off, s32 offset:472 +; GISEL-NEXT: scratch_load_b32 v199, off, s32 offset:476 +; GISEL-NEXT: scratch_load_b32 v208, off, s32 offset:480 +; GISEL-NEXT: scratch_load_b32 v209, off, s32 offset:484 +; GISEL-NEXT: scratch_load_b32 v210, off, s32 offset:488 +; GISEL-NEXT: scratch_load_b32 v211, off, s32 offset:492 +; GISEL-NEXT: scratch_load_b32 v212, off, s32 offset:496 +; GISEL-NEXT: scratch_load_b32 v213, off, s32 offset:500 +; GISEL-NEXT: scratch_load_b32 v214, off, s32 offset:504 +; GISEL-NEXT: scratch_load_b32 v215, off, s32 offset:508 +; GISEL-NEXT: s_clause 0xf +; GISEL-NEXT: scratch_load_b32 v224, off, s32 offset:512 +; GISEL-NEXT: scratch_load_b32 v225, off, s32 offset:516 +; GISEL-NEXT: scratch_load_b32 v226, off, s32 offset:520 +; GISEL-NEXT: scratch_load_b32 v227, off, s32 offset:524 +; GISEL-NEXT: scratch_load_b32 v228, off, s32 offset:528 +; GISEL-NEXT: scratch_load_b32 v229, off, s32 offset:532 +; GISEL-NEXT: scratch_load_b32 v230, off, s32 offset:536 +; GISEL-NEXT: scratch_load_b32 v231, off, s32 offset:540 +; GISEL-NEXT: scratch_load_b32 v240, off, s32 offset:544 +; GISEL-NEXT: scratch_load_b32 v241, off, s32 offset:548 +; GISEL-NEXT: scratch_load_b32 v242, off, s32 offset:552 +; GISEL-NEXT: scratch_load_b32 v243, off, s32 offset:556 +; GISEL-NEXT: scratch_load_b32 v244, off, s32 offset:560 +; GISEL-NEXT: scratch_load_b32 v245, off, s32 offset:564 +; GISEL-NEXT: scratch_load_b32 v246, off, s32 offset:568 +; GISEL-NEXT: scratch_load_b32 v247, off, s32 offset:572 +; GISEL-NEXT: s_mov_b32 exec_lo, s0 +; GISEL-NEXT: s_setpc_b64 s[36:37] +; +; DAGISEL64-LABEL: tail_call_gfx_from_whole_wave: +; DAGISEL64: ; %bb.0: +; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 +; DAGISEL64-NEXT: s_wait_expcnt 0x0 +; DAGISEL64-NEXT: s_wait_samplecnt 0x0 +; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 +; DAGISEL64-NEXT: s_wait_kmcnt 0x0 +; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL64-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL64-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL64-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL64-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL64-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL64-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL64-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL64-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL64-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL64-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL64-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL64-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL64-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL64-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL64-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL64-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL64-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL64-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL64-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL64-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL64-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL64-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL64-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL64-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL64-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL64-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL64-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL64-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL64-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL64-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL64-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL64-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL64-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL64-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL64-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL64-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL64-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL64-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL64-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL64-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL64-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL64-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL64-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL64-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL64-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL64-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL64-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL64-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL64-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL64-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL64-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL64-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL64-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL64-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL64-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL64-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL64-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL64-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL64-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL64-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL64-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL64-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL64-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL64-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL64-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL64-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL64-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL64-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL64-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL64-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL64-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL64-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL64-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL64-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL64-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL64-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL64-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL64-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL64-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL64-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL64-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL64-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL64-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL64-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL64-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL64-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL64-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL64-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL64-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL64-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL64-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL64-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL64-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL64-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL64-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL64-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL64-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL64-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL64-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL64-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL64-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL64-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL64-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL64-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL64-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL64-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL64-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL64-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL64-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL64-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL64-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL64-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL64-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL64-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL64-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL64-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL64-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL64-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; DAGISEL64-NEXT: s_clause 0xf +; DAGISEL64-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL64-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL64-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL64-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL64-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL64-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL64-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL64-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL64-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL64-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL64-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL64-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL64-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL64-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL64-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL64-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; DAGISEL64-NEXT: s_mov_b64 exec, -1 +; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 +; DAGISEL64-NEXT: s_mov_b32 s37, gfx_callee@abs32@hi +; DAGISEL64-NEXT: s_mov_b32 s36, gfx_callee@abs32@lo +; DAGISEL64-NEXT: v_swap_b32 v0, v1 +; DAGISEL64-NEXT: s_wait_alu 0xfffe +; DAGISEL64-NEXT: s_xor_b64 exec, s[0:1], -1 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_load_b32 v0, off, s32 +; DAGISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 +; DAGISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8 +; DAGISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12 +; DAGISEL64-NEXT: scratch_load_b32 v4, off, s32 offset:16 +; DAGISEL64-NEXT: scratch_load_b32 v5, off, s32 offset:20 +; DAGISEL64-NEXT: scratch_load_b32 v6, off, s32 offset:24 +; DAGISEL64-NEXT: scratch_load_b32 v7, off, s32 offset:28 +; DAGISEL64-NEXT: scratch_load_b32 v8, off, s32 offset:32 +; DAGISEL64-NEXT: scratch_load_b32 v9, off, s32 offset:36 +; DAGISEL64-NEXT: scratch_load_b32 v10, off, s32 offset:40 +; DAGISEL64-NEXT: scratch_load_b32 v11, off, s32 offset:44 +; DAGISEL64-NEXT: scratch_load_b32 v12, off, s32 offset:48 +; DAGISEL64-NEXT: scratch_load_b32 v13, off, s32 offset:52 +; DAGISEL64-NEXT: scratch_load_b32 v14, off, s32 offset:56 +; DAGISEL64-NEXT: scratch_load_b32 v15, off, s32 offset:60 +; DAGISEL64-NEXT: scratch_load_b32 v16, off, s32 offset:64 +; DAGISEL64-NEXT: scratch_load_b32 v17, off, s32 offset:68 +; DAGISEL64-NEXT: scratch_load_b32 v18, off, s32 offset:72 +; DAGISEL64-NEXT: scratch_load_b32 v19, off, s32 offset:76 +; DAGISEL64-NEXT: scratch_load_b32 v20, off, s32 offset:80 +; DAGISEL64-NEXT: scratch_load_b32 v21, off, s32 offset:84 +; DAGISEL64-NEXT: scratch_load_b32 v22, off, s32 offset:88 +; DAGISEL64-NEXT: scratch_load_b32 v23, off, s32 offset:92 +; DAGISEL64-NEXT: scratch_load_b32 v24, off, s32 offset:96 +; DAGISEL64-NEXT: scratch_load_b32 v25, off, s32 offset:100 +; DAGISEL64-NEXT: scratch_load_b32 v26, off, s32 offset:104 +; DAGISEL64-NEXT: scratch_load_b32 v27, off, s32 offset:108 +; DAGISEL64-NEXT: scratch_load_b32 v28, off, s32 offset:112 +; DAGISEL64-NEXT: scratch_load_b32 v29, off, s32 offset:116 +; DAGISEL64-NEXT: scratch_load_b32 v30, off, s32 offset:120 +; DAGISEL64-NEXT: scratch_load_b32 v31, off, s32 offset:124 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_load_b32 v32, off, s32 offset:128 +; DAGISEL64-NEXT: scratch_load_b32 v33, off, s32 offset:132 +; DAGISEL64-NEXT: scratch_load_b32 v34, off, s32 offset:136 +; DAGISEL64-NEXT: scratch_load_b32 v35, off, s32 offset:140 +; DAGISEL64-NEXT: scratch_load_b32 v36, off, s32 offset:144 +; DAGISEL64-NEXT: scratch_load_b32 v37, off, s32 offset:148 +; DAGISEL64-NEXT: scratch_load_b32 v38, off, s32 offset:152 +; DAGISEL64-NEXT: scratch_load_b32 v39, off, s32 offset:156 +; DAGISEL64-NEXT: scratch_load_b32 v48, off, s32 offset:160 +; DAGISEL64-NEXT: scratch_load_b32 v49, off, s32 offset:164 +; DAGISEL64-NEXT: scratch_load_b32 v50, off, s32 offset:168 +; DAGISEL64-NEXT: scratch_load_b32 v51, off, s32 offset:172 +; DAGISEL64-NEXT: scratch_load_b32 v52, off, s32 offset:176 +; DAGISEL64-NEXT: scratch_load_b32 v53, off, s32 offset:180 +; DAGISEL64-NEXT: scratch_load_b32 v54, off, s32 offset:184 +; DAGISEL64-NEXT: scratch_load_b32 v55, off, s32 offset:188 +; DAGISEL64-NEXT: scratch_load_b32 v64, off, s32 offset:192 +; DAGISEL64-NEXT: scratch_load_b32 v65, off, s32 offset:196 +; DAGISEL64-NEXT: scratch_load_b32 v66, off, s32 offset:200 +; DAGISEL64-NEXT: scratch_load_b32 v67, off, s32 offset:204 +; DAGISEL64-NEXT: scratch_load_b32 v68, off, s32 offset:208 +; DAGISEL64-NEXT: scratch_load_b32 v69, off, s32 offset:212 +; DAGISEL64-NEXT: scratch_load_b32 v70, off, s32 offset:216 +; DAGISEL64-NEXT: scratch_load_b32 v71, off, s32 offset:220 +; DAGISEL64-NEXT: scratch_load_b32 v80, off, s32 offset:224 +; DAGISEL64-NEXT: scratch_load_b32 v81, off, s32 offset:228 +; DAGISEL64-NEXT: scratch_load_b32 v82, off, s32 offset:232 +; DAGISEL64-NEXT: scratch_load_b32 v83, off, s32 offset:236 +; DAGISEL64-NEXT: scratch_load_b32 v84, off, s32 offset:240 +; DAGISEL64-NEXT: scratch_load_b32 v85, off, s32 offset:244 +; DAGISEL64-NEXT: scratch_load_b32 v86, off, s32 offset:248 +; DAGISEL64-NEXT: scratch_load_b32 v87, off, s32 offset:252 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_load_b32 v96, off, s32 offset:256 +; DAGISEL64-NEXT: scratch_load_b32 v97, off, s32 offset:260 +; DAGISEL64-NEXT: scratch_load_b32 v98, off, s32 offset:264 +; DAGISEL64-NEXT: scratch_load_b32 v99, off, s32 offset:268 +; DAGISEL64-NEXT: scratch_load_b32 v100, off, s32 offset:272 +; DAGISEL64-NEXT: scratch_load_b32 v101, off, s32 offset:276 +; DAGISEL64-NEXT: scratch_load_b32 v102, off, s32 offset:280 +; DAGISEL64-NEXT: scratch_load_b32 v103, off, s32 offset:284 +; DAGISEL64-NEXT: scratch_load_b32 v112, off, s32 offset:288 +; DAGISEL64-NEXT: scratch_load_b32 v113, off, s32 offset:292 +; DAGISEL64-NEXT: scratch_load_b32 v114, off, s32 offset:296 +; DAGISEL64-NEXT: scratch_load_b32 v115, off, s32 offset:300 +; DAGISEL64-NEXT: scratch_load_b32 v116, off, s32 offset:304 +; DAGISEL64-NEXT: scratch_load_b32 v117, off, s32 offset:308 +; DAGISEL64-NEXT: scratch_load_b32 v118, off, s32 offset:312 +; DAGISEL64-NEXT: scratch_load_b32 v119, off, s32 offset:316 +; DAGISEL64-NEXT: scratch_load_b32 v128, off, s32 offset:320 +; DAGISEL64-NEXT: scratch_load_b32 v129, off, s32 offset:324 +; DAGISEL64-NEXT: scratch_load_b32 v130, off, s32 offset:328 +; DAGISEL64-NEXT: scratch_load_b32 v131, off, s32 offset:332 +; DAGISEL64-NEXT: scratch_load_b32 v132, off, s32 offset:336 +; DAGISEL64-NEXT: scratch_load_b32 v133, off, s32 offset:340 +; DAGISEL64-NEXT: scratch_load_b32 v134, off, s32 offset:344 +; DAGISEL64-NEXT: scratch_load_b32 v135, off, s32 offset:348 +; DAGISEL64-NEXT: scratch_load_b32 v144, off, s32 offset:352 +; DAGISEL64-NEXT: scratch_load_b32 v145, off, s32 offset:356 +; DAGISEL64-NEXT: scratch_load_b32 v146, off, s32 offset:360 +; DAGISEL64-NEXT: scratch_load_b32 v147, off, s32 offset:364 +; DAGISEL64-NEXT: scratch_load_b32 v148, off, s32 offset:368 +; DAGISEL64-NEXT: scratch_load_b32 v149, off, s32 offset:372 +; DAGISEL64-NEXT: scratch_load_b32 v150, off, s32 offset:376 +; DAGISEL64-NEXT: scratch_load_b32 v151, off, s32 offset:380 +; DAGISEL64-NEXT: s_clause 0x1f +; DAGISEL64-NEXT: scratch_load_b32 v160, off, s32 offset:384 +; DAGISEL64-NEXT: scratch_load_b32 v161, off, s32 offset:388 +; DAGISEL64-NEXT: scratch_load_b32 v162, off, s32 offset:392 +; DAGISEL64-NEXT: scratch_load_b32 v163, off, s32 offset:396 +; DAGISEL64-NEXT: scratch_load_b32 v164, off, s32 offset:400 +; DAGISEL64-NEXT: scratch_load_b32 v165, off, s32 offset:404 +; DAGISEL64-NEXT: scratch_load_b32 v166, off, s32 offset:408 +; DAGISEL64-NEXT: scratch_load_b32 v167, off, s32 offset:412 +; DAGISEL64-NEXT: scratch_load_b32 v176, off, s32 offset:416 +; DAGISEL64-NEXT: scratch_load_b32 v177, off, s32 offset:420 +; DAGISEL64-NEXT: scratch_load_b32 v178, off, s32 offset:424 +; DAGISEL64-NEXT: scratch_load_b32 v179, off, s32 offset:428 +; DAGISEL64-NEXT: scratch_load_b32 v180, off, s32 offset:432 +; DAGISEL64-NEXT: scratch_load_b32 v181, off, s32 offset:436 +; DAGISEL64-NEXT: scratch_load_b32 v182, off, s32 offset:440 +; DAGISEL64-NEXT: scratch_load_b32 v183, off, s32 offset:444 +; DAGISEL64-NEXT: scratch_load_b32 v192, off, s32 offset:448 +; DAGISEL64-NEXT: scratch_load_b32 v193, off, s32 offset:452 +; DAGISEL64-NEXT: scratch_load_b32 v194, off, s32 offset:456 +; DAGISEL64-NEXT: scratch_load_b32 v195, off, s32 offset:460 +; DAGISEL64-NEXT: scratch_load_b32 v196, off, s32 offset:464 +; DAGISEL64-NEXT: scratch_load_b32 v197, off, s32 offset:468 +; DAGISEL64-NEXT: scratch_load_b32 v198, off, s32 offset:472 +; DAGISEL64-NEXT: scratch_load_b32 v199, off, s32 offset:476 +; DAGISEL64-NEXT: scratch_load_b32 v208, off, s32 offset:480 +; DAGISEL64-NEXT: scratch_load_b32 v209, off, s32 offset:484 +; DAGISEL64-NEXT: scratch_load_b32 v210, off, s32 offset:488 +; DAGISEL64-NEXT: scratch_load_b32 v211, off, s32 offset:492 +; DAGISEL64-NEXT: scratch_load_b32 v212, off, s32 offset:496 +; DAGISEL64-NEXT: scratch_load_b32 v213, off, s32 offset:500 +; DAGISEL64-NEXT: scratch_load_b32 v214, off, s32 offset:504 +; DAGISEL64-NEXT: scratch_load_b32 v215, off, s32 offset:508 +; DAGISEL64-NEXT: s_clause 0xf +; DAGISEL64-NEXT: scratch_load_b32 v224, off, s32 offset:512 +; DAGISEL64-NEXT: scratch_load_b32 v225, off, s32 offset:516 +; DAGISEL64-NEXT: scratch_load_b32 v226, off, s32 offset:520 +; DAGISEL64-NEXT: scratch_load_b32 v227, off, s32 offset:524 +; DAGISEL64-NEXT: scratch_load_b32 v228, off, s32 offset:528 +; DAGISEL64-NEXT: scratch_load_b32 v229, off, s32 offset:532 +; DAGISEL64-NEXT: scratch_load_b32 v230, off, s32 offset:536 +; DAGISEL64-NEXT: scratch_load_b32 v231, off, s32 offset:540 +; DAGISEL64-NEXT: scratch_load_b32 v240, off, s32 offset:544 +; DAGISEL64-NEXT: scratch_load_b32 v241, off, s32 offset:548 +; DAGISEL64-NEXT: scratch_load_b32 v242, off, s32 offset:552 +; DAGISEL64-NEXT: scratch_load_b32 v243, off, s32 offset:556 +; DAGISEL64-NEXT: scratch_load_b32 v244, off, s32 offset:560 +; DAGISEL64-NEXT: scratch_load_b32 v245, off, s32 offset:564 +; DAGISEL64-NEXT: scratch_load_b32 v246, off, s32 offset:568 +; DAGISEL64-NEXT: scratch_load_b32 v247, off, s32 offset:572 +; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1] +; DAGISEL64-NEXT: s_setpc_b64 s[36:37] +; +; GISEL64-LABEL: tail_call_gfx_from_whole_wave: +; GISEL64: ; %bb.0: +; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 +; GISEL64-NEXT: s_wait_expcnt 0x0 +; GISEL64-NEXT: s_wait_samplecnt 0x0 +; GISEL64-NEXT: s_wait_bvhcnt 0x0 +; GISEL64-NEXT: s_wait_kmcnt 0x0 +; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL64-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL64-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL64-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL64-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL64-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL64-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL64-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL64-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL64-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL64-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL64-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL64-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL64-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL64-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL64-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL64-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL64-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL64-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL64-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL64-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL64-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL64-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL64-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL64-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL64-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL64-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL64-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL64-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL64-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL64-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL64-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL64-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL64-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL64-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL64-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL64-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL64-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL64-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL64-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL64-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL64-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL64-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL64-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL64-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL64-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL64-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL64-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL64-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL64-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL64-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL64-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL64-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL64-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL64-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL64-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL64-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL64-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL64-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL64-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL64-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL64-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL64-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL64-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL64-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL64-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL64-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL64-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL64-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL64-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL64-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL64-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL64-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL64-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL64-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL64-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL64-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL64-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL64-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL64-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL64-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL64-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL64-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL64-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL64-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL64-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL64-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL64-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL64-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL64-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL64-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL64-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL64-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL64-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL64-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL64-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL64-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL64-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL64-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL64-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL64-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL64-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL64-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL64-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL64-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL64-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL64-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL64-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL64-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL64-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL64-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL64-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL64-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL64-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL64-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL64-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL64-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL64-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL64-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; GISEL64-NEXT: s_clause 0xf +; GISEL64-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL64-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL64-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL64-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL64-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL64-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL64-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL64-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL64-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL64-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL64-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL64-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL64-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL64-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL64-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL64-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; GISEL64-NEXT: s_mov_b64 exec, -1 +; GISEL64-NEXT: v_mov_b32_e32 v2, v0 +; GISEL64-NEXT: v_swap_b32 v0, v1 +; GISEL64-NEXT: s_mov_b32 s36, gfx_callee@abs32@lo +; GISEL64-NEXT: s_mov_b32 s37, gfx_callee@abs32@hi +; GISEL64-NEXT: s_wait_alu 0xfffe +; GISEL64-NEXT: s_xor_b64 exec, s[0:1], -1 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_load_b32 v0, off, s32 +; GISEL64-NEXT: scratch_load_b32 v1, off, s32 offset:4 +; GISEL64-NEXT: scratch_load_b32 v2, off, s32 offset:8 +; GISEL64-NEXT: scratch_load_b32 v3, off, s32 offset:12 +; GISEL64-NEXT: scratch_load_b32 v4, off, s32 offset:16 +; GISEL64-NEXT: scratch_load_b32 v5, off, s32 offset:20 +; GISEL64-NEXT: scratch_load_b32 v6, off, s32 offset:24 +; GISEL64-NEXT: scratch_load_b32 v7, off, s32 offset:28 +; GISEL64-NEXT: scratch_load_b32 v8, off, s32 offset:32 +; GISEL64-NEXT: scratch_load_b32 v9, off, s32 offset:36 +; GISEL64-NEXT: scratch_load_b32 v10, off, s32 offset:40 +; GISEL64-NEXT: scratch_load_b32 v11, off, s32 offset:44 +; GISEL64-NEXT: scratch_load_b32 v12, off, s32 offset:48 +; GISEL64-NEXT: scratch_load_b32 v13, off, s32 offset:52 +; GISEL64-NEXT: scratch_load_b32 v14, off, s32 offset:56 +; GISEL64-NEXT: scratch_load_b32 v15, off, s32 offset:60 +; GISEL64-NEXT: scratch_load_b32 v16, off, s32 offset:64 +; GISEL64-NEXT: scratch_load_b32 v17, off, s32 offset:68 +; GISEL64-NEXT: scratch_load_b32 v18, off, s32 offset:72 +; GISEL64-NEXT: scratch_load_b32 v19, off, s32 offset:76 +; GISEL64-NEXT: scratch_load_b32 v20, off, s32 offset:80 +; GISEL64-NEXT: scratch_load_b32 v21, off, s32 offset:84 +; GISEL64-NEXT: scratch_load_b32 v22, off, s32 offset:88 +; GISEL64-NEXT: scratch_load_b32 v23, off, s32 offset:92 +; GISEL64-NEXT: scratch_load_b32 v24, off, s32 offset:96 +; GISEL64-NEXT: scratch_load_b32 v25, off, s32 offset:100 +; GISEL64-NEXT: scratch_load_b32 v26, off, s32 offset:104 +; GISEL64-NEXT: scratch_load_b32 v27, off, s32 offset:108 +; GISEL64-NEXT: scratch_load_b32 v28, off, s32 offset:112 +; GISEL64-NEXT: scratch_load_b32 v29, off, s32 offset:116 +; GISEL64-NEXT: scratch_load_b32 v30, off, s32 offset:120 +; GISEL64-NEXT: scratch_load_b32 v31, off, s32 offset:124 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_load_b32 v32, off, s32 offset:128 +; GISEL64-NEXT: scratch_load_b32 v33, off, s32 offset:132 +; GISEL64-NEXT: scratch_load_b32 v34, off, s32 offset:136 +; GISEL64-NEXT: scratch_load_b32 v35, off, s32 offset:140 +; GISEL64-NEXT: scratch_load_b32 v36, off, s32 offset:144 +; GISEL64-NEXT: scratch_load_b32 v37, off, s32 offset:148 +; GISEL64-NEXT: scratch_load_b32 v38, off, s32 offset:152 +; GISEL64-NEXT: scratch_load_b32 v39, off, s32 offset:156 +; GISEL64-NEXT: scratch_load_b32 v48, off, s32 offset:160 +; GISEL64-NEXT: scratch_load_b32 v49, off, s32 offset:164 +; GISEL64-NEXT: scratch_load_b32 v50, off, s32 offset:168 +; GISEL64-NEXT: scratch_load_b32 v51, off, s32 offset:172 +; GISEL64-NEXT: scratch_load_b32 v52, off, s32 offset:176 +; GISEL64-NEXT: scratch_load_b32 v53, off, s32 offset:180 +; GISEL64-NEXT: scratch_load_b32 v54, off, s32 offset:184 +; GISEL64-NEXT: scratch_load_b32 v55, off, s32 offset:188 +; GISEL64-NEXT: scratch_load_b32 v64, off, s32 offset:192 +; GISEL64-NEXT: scratch_load_b32 v65, off, s32 offset:196 +; GISEL64-NEXT: scratch_load_b32 v66, off, s32 offset:200 +; GISEL64-NEXT: scratch_load_b32 v67, off, s32 offset:204 +; GISEL64-NEXT: scratch_load_b32 v68, off, s32 offset:208 +; GISEL64-NEXT: scratch_load_b32 v69, off, s32 offset:212 +; GISEL64-NEXT: scratch_load_b32 v70, off, s32 offset:216 +; GISEL64-NEXT: scratch_load_b32 v71, off, s32 offset:220 +; GISEL64-NEXT: scratch_load_b32 v80, off, s32 offset:224 +; GISEL64-NEXT: scratch_load_b32 v81, off, s32 offset:228 +; GISEL64-NEXT: scratch_load_b32 v82, off, s32 offset:232 +; GISEL64-NEXT: scratch_load_b32 v83, off, s32 offset:236 +; GISEL64-NEXT: scratch_load_b32 v84, off, s32 offset:240 +; GISEL64-NEXT: scratch_load_b32 v85, off, s32 offset:244 +; GISEL64-NEXT: scratch_load_b32 v86, off, s32 offset:248 +; GISEL64-NEXT: scratch_load_b32 v87, off, s32 offset:252 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_load_b32 v96, off, s32 offset:256 +; GISEL64-NEXT: scratch_load_b32 v97, off, s32 offset:260 +; GISEL64-NEXT: scratch_load_b32 v98, off, s32 offset:264 +; GISEL64-NEXT: scratch_load_b32 v99, off, s32 offset:268 +; GISEL64-NEXT: scratch_load_b32 v100, off, s32 offset:272 +; GISEL64-NEXT: scratch_load_b32 v101, off, s32 offset:276 +; GISEL64-NEXT: scratch_load_b32 v102, off, s32 offset:280 +; GISEL64-NEXT: scratch_load_b32 v103, off, s32 offset:284 +; GISEL64-NEXT: scratch_load_b32 v112, off, s32 offset:288 +; GISEL64-NEXT: scratch_load_b32 v113, off, s32 offset:292 +; GISEL64-NEXT: scratch_load_b32 v114, off, s32 offset:296 +; GISEL64-NEXT: scratch_load_b32 v115, off, s32 offset:300 +; GISEL64-NEXT: scratch_load_b32 v116, off, s32 offset:304 +; GISEL64-NEXT: scratch_load_b32 v117, off, s32 offset:308 +; GISEL64-NEXT: scratch_load_b32 v118, off, s32 offset:312 +; GISEL64-NEXT: scratch_load_b32 v119, off, s32 offset:316 +; GISEL64-NEXT: scratch_load_b32 v128, off, s32 offset:320 +; GISEL64-NEXT: scratch_load_b32 v129, off, s32 offset:324 +; GISEL64-NEXT: scratch_load_b32 v130, off, s32 offset:328 +; GISEL64-NEXT: scratch_load_b32 v131, off, s32 offset:332 +; GISEL64-NEXT: scratch_load_b32 v132, off, s32 offset:336 +; GISEL64-NEXT: scratch_load_b32 v133, off, s32 offset:340 +; GISEL64-NEXT: scratch_load_b32 v134, off, s32 offset:344 +; GISEL64-NEXT: scratch_load_b32 v135, off, s32 offset:348 +; GISEL64-NEXT: scratch_load_b32 v144, off, s32 offset:352 +; GISEL64-NEXT: scratch_load_b32 v145, off, s32 offset:356 +; GISEL64-NEXT: scratch_load_b32 v146, off, s32 offset:360 +; GISEL64-NEXT: scratch_load_b32 v147, off, s32 offset:364 +; GISEL64-NEXT: scratch_load_b32 v148, off, s32 offset:368 +; GISEL64-NEXT: scratch_load_b32 v149, off, s32 offset:372 +; GISEL64-NEXT: scratch_load_b32 v150, off, s32 offset:376 +; GISEL64-NEXT: scratch_load_b32 v151, off, s32 offset:380 +; GISEL64-NEXT: s_clause 0x1f +; GISEL64-NEXT: scratch_load_b32 v160, off, s32 offset:384 +; GISEL64-NEXT: scratch_load_b32 v161, off, s32 offset:388 +; GISEL64-NEXT: scratch_load_b32 v162, off, s32 offset:392 +; GISEL64-NEXT: scratch_load_b32 v163, off, s32 offset:396 +; GISEL64-NEXT: scratch_load_b32 v164, off, s32 offset:400 +; GISEL64-NEXT: scratch_load_b32 v165, off, s32 offset:404 +; GISEL64-NEXT: scratch_load_b32 v166, off, s32 offset:408 +; GISEL64-NEXT: scratch_load_b32 v167, off, s32 offset:412 +; GISEL64-NEXT: scratch_load_b32 v176, off, s32 offset:416 +; GISEL64-NEXT: scratch_load_b32 v177, off, s32 offset:420 +; GISEL64-NEXT: scratch_load_b32 v178, off, s32 offset:424 +; GISEL64-NEXT: scratch_load_b32 v179, off, s32 offset:428 +; GISEL64-NEXT: scratch_load_b32 v180, off, s32 offset:432 +; GISEL64-NEXT: scratch_load_b32 v181, off, s32 offset:436 +; GISEL64-NEXT: scratch_load_b32 v182, off, s32 offset:440 +; GISEL64-NEXT: scratch_load_b32 v183, off, s32 offset:444 +; GISEL64-NEXT: scratch_load_b32 v192, off, s32 offset:448 +; GISEL64-NEXT: scratch_load_b32 v193, off, s32 offset:452 +; GISEL64-NEXT: scratch_load_b32 v194, off, s32 offset:456 +; GISEL64-NEXT: scratch_load_b32 v195, off, s32 offset:460 +; GISEL64-NEXT: scratch_load_b32 v196, off, s32 offset:464 +; GISEL64-NEXT: scratch_load_b32 v197, off, s32 offset:468 +; GISEL64-NEXT: scratch_load_b32 v198, off, s32 offset:472 +; GISEL64-NEXT: scratch_load_b32 v199, off, s32 offset:476 +; GISEL64-NEXT: scratch_load_b32 v208, off, s32 offset:480 +; GISEL64-NEXT: scratch_load_b32 v209, off, s32 offset:484 +; GISEL64-NEXT: scratch_load_b32 v210, off, s32 offset:488 +; GISEL64-NEXT: scratch_load_b32 v211, off, s32 offset:492 +; GISEL64-NEXT: scratch_load_b32 v212, off, s32 offset:496 +; GISEL64-NEXT: scratch_load_b32 v213, off, s32 offset:500 +; GISEL64-NEXT: scratch_load_b32 v214, off, s32 offset:504 +; GISEL64-NEXT: scratch_load_b32 v215, off, s32 offset:508 +; GISEL64-NEXT: s_clause 0xf +; GISEL64-NEXT: scratch_load_b32 v224, off, s32 offset:512 +; GISEL64-NEXT: scratch_load_b32 v225, off, s32 offset:516 +; GISEL64-NEXT: scratch_load_b32 v226, off, s32 offset:520 +; GISEL64-NEXT: scratch_load_b32 v227, off, s32 offset:524 +; GISEL64-NEXT: scratch_load_b32 v228, off, s32 offset:528 +; GISEL64-NEXT: scratch_load_b32 v229, off, s32 offset:532 +; GISEL64-NEXT: scratch_load_b32 v230, off, s32 offset:536 +; GISEL64-NEXT: scratch_load_b32 v231, off, s32 offset:540 +; GISEL64-NEXT: scratch_load_b32 v240, off, s32 offset:544 +; GISEL64-NEXT: scratch_load_b32 v241, off, s32 offset:548 +; GISEL64-NEXT: scratch_load_b32 v242, off, s32 offset:552 +; GISEL64-NEXT: scratch_load_b32 v243, off, s32 offset:556 +; GISEL64-NEXT: scratch_load_b32 v244, off, s32 offset:560 +; GISEL64-NEXT: scratch_load_b32 v245, off, s32 offset:564 +; GISEL64-NEXT: scratch_load_b32 v246, off, s32 offset:568 +; GISEL64-NEXT: scratch_load_b32 v247, off, s32 offset:572 +; GISEL64-NEXT: s_mov_b64 exec, s[0:1] +; GISEL64-NEXT: s_setpc_b64 s[36:37] + %ret = tail call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent + ret <2 x half> %ret +} + declare amdgpu_gfx_whole_wave float @callee(i1 %active, <8 x float> %x) define amdgpu_cs void @call_from_entry(<8 x float> %x, ptr %p) {