diff options
author | Francis Visoiu Mistrih <francisvm@yahoo.com> | 2018-04-10 11:29:40 +0000 |
---|---|---|
committer | Francis Visoiu Mistrih <francisvm@yahoo.com> | 2018-04-10 11:29:40 +0000 |
commit | f2c22050e8211f2c768e9ff2bb50c902ec8ac078 (patch) | |
tree | 65d2acf0c967f2a991afb2d3de911eccb18e5e40 | |
parent | 7190a4692ac65c9a5decaf62d403d68ac094dd0c (diff) | |
download | bcm5719-llvm-f2c22050e8211f2c768e9ff2bb50c902ec8ac078.tar.gz bcm5719-llvm-f2c22050e8211f2c768e9ff2bb50c902ec8ac078.zip |
[AArch64] Use FP to access the emergency spill slot
In the presence of variable-sized stack objects, we always picked the
base pointer when resolving frame indices if it was available.
This makes us hit an assert where we can't reach the emergency spill
slot if it's too far away from the base pointer. Since on AArch64 we
decide to place the emergency spill slot at the top of the frame, it
makes more sense to use FP to access it.
The changes here don't affect only emergency spill slots but all the
frame indices. The goal here is to try to choose between FP, BP and SP
so that we minimize the offset and avoid scavenging, or worse, asserting
when trying to access a slot allocated by the scavenger.
Previously discussed here: https://reviews.llvm.org/D40876.
Differential Revision: https://reviews.llvm.org/D45358
llvm-svn: 329691
3 files changed, 51 insertions, 12 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 48c0916c74c..d7399e7f4f4 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1032,20 +1032,36 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && - !RegInfo->needsStackRealignment(MF)) { - // Use SP or FP, whichever gives us the best chance of the offset - // being in range for direct access. If the FPOffset is positive, - // that'll always be best, as the SP will be even further away. + } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) { // If the FPOffset is negative, we have to keep in mind that the // available offset range for negative offsets is smaller than for - // positive ones. If we have variable sized objects, we're stuck with - // using the FP regardless, though, as the SP offset is unknown - // and we don't have a base pointer available. If an offset is + // positive ones. If an offset is // available via the FP and the SP, use whichever is closest. - if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 || - (FPOffset >= -256 && Offset > -FPOffset)) + bool FPOffsetFits = FPOffset >= -256; + PreferFP |= Offset > -FPOffset; + + if (MFI.hasVarSizedObjects()) { + // If we have variable sized objects, we can use either FP or BP, as the + // SP offset is unknown. We can use the base pointer if we have one and + // FP is not preferred. If not, we're stuck with using FP. + bool CanUseBP = RegInfo->hasBasePointer(MF); + if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. + UseFP = PreferFP; + else if (!CanUseBP) // Can't use BP. Forced to use FP. + UseFP = true; + // else we can use BP and FP, but the offset from FP won't fit. + // That will make us scavenge registers which we can probably avoid by + // using BP. If it won't fit for BP either, we'll scavenge anyway. + } else if (PreferFP || FPOffset >= 0) { + // Use SP or FP, whichever gives us the best chance of the offset + // being in range for direct access. If the FPOffset is positive, + // that'll always be best, as the SP will be even further away. UseFP = true; + } else { + // We have the choice between FP and (SP or BP). + if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. + UseFP = true; + } } } @@ -1062,6 +1078,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) FrameReg = RegInfo->getBaseRegister(); else { + assert(!MFI.hasVarSizedObjects() && + "Can't use SP when we have var sized objects."); FrameReg = AArch64::SP; // If we're using the red zone for this function, the SP won't actually // be adjusted, so the offsets will be negative. They're also all diff --git a/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll index fdb37987104..45e32c8aa63 100644 --- a/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll +++ b/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s ; CHECK: foo -; CHECK-DAG: str w[[REG0:[0-9]+]], [x19, #132] -; CHECK-DAG: str w[[REG0]], [x19, #264] +; CHECK-DAG: stur w[[REG0:[0-9]+]], [x29, #-24] +; CHECK-DAG: stur w[[REG0]], [x29, #-20] define i32 @foo(i32 %a) nounwind { %retval = alloca i32, align 4 %a.addr = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir b/llvm/test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir new file mode 100644 index 00000000000..686fbe88911 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/unreachable-emergency-spill-slot.mir @@ -0,0 +1,21 @@ +#RUN: llc -mtriple=aarch64-- -run-pass prologepilog %s -o - | FileCheck %s +# Check that we use the frame pointer to address the emergency spill slot. +# Using the base pointer will result in an assert with "Emergency spill slot is +# out of reach". +--- +name: hugeStack +# CHECK-LABEL: name: hugeStack +tracksRegLiveness: true +frameInfo: + localFrameSize: 256 +stack: + - { id: 0, type: variable-sized, alignment: 1 } + - { id: 1, name: '', size: 32761, alignment: 8 } +body: | + bb.0: + STRXui undef $x8, %stack.0, 0 + ; CHECK: STURXi undef $x8, $fp, -24 + B %bb.1 + bb.1: + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr + RET_ReallyLR implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28 |