diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a71668e71c235..6f1ce5bdbe286 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3934,6 +3934,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, CSStackSize += SpillSize; } + // Save number of saved regs, so we can easily update CSStackSize later to + // account for any additional 64-bit GPR saves. Note: After this point + // only 64-bit GPRs can be added to SavedRegs. + unsigned NumSavedRegs = SavedRegs.count(); + // Increase the callee-saved stack size if the function has streaming mode // changes, as we will need to spill the value of the VG register. // For locally streaming functions, we spill both the streaming and @@ -3952,8 +3957,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (AFI->hasStackHazardSlotIndex()) CSStackSize += getStackHazardSize(MF); - // Save number of saved regs, so we can easily update CSStackSize later. - unsigned NumSavedRegs = SavedRegs.count(); + // If we must call __arm_get_current_vg in the prologue preserve the LR. + if (requiresSaveVG(MF) && !Subtarget.hasSVE()) + SavedRegs.set(AArch64::LR); // The frame record needs to be created by saving the appropriate registers uint64_t EstimatedStackSize = MFI.estimateStackSize(MF); diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll new file mode 100644 index 0000000000000..69f603458670c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -O0 < %s | FileCheck %s + +; Example of locally streaming function that (at -O0) must preserve the LR (X30) +; before calling __arm_get_current_vg. +define void @foo() "aarch64_pstate_sm_body" { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: lsr x9, x9, #3 +; CHECK-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; CHECK-NEXT: bl __arm_get_current_vg +; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret + ret void +}