3 files changed, 114 insertions, 7 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 85a354d0c12..32ed896bc98 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 
 using namespace llvm;
 
@@ -224,3 +225,21 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
   SavedRegs.set(RISCV::X1);
   SavedRegs.set(RISCV::X8);
 }
+
+void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
+    MachineFunction &MF, RegScavenger *RS) const {
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterClass *RC = &RISCV::GPRRegClass;
+  // estimateStackSize has been observed to under-estimate the final stack
+  // size, so give ourselves wiggle-room by checking for stack size
+  // representable an 11-bit signed field rather than 12-bits.
+  // FIXME: It may be possible to craft a function with a small stack that
+  // still needs an emergency spill slot for branch relaxation. This case
+  // would currently be missed.
+  if (!isInt<11>(MFI.estimateStackSize(MF))) {
+    int RegScavFI = MFI.CreateStackObject(
+        RegInfo->getSpillSize(*RC), RegInfo->getSpillAlignment(*RC), false);
+    RS->addScavengingFrameIndex(RegScavFI);
+  }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index d92bb70c76d..ccf7e247b55 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -36,6 +36,9 @@ public:
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS) const override;
 
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                           RegScavenger *RS) const override;
+
   bool hasFP(const MachineFunction &MF) const override;
 
   MachineBasicBlock::iterator
diff --git a/llvm/test/CodeGen/RISCV/large-stack.ll b/llvm/test/CodeGen/RISCV/large-stack.ll
index 9aa517f2f0f..8ee818bed91 100644
--- a/llvm/test/CodeGen/RISCV/large-stack.ll
+++ b/llvm/test/CodeGen/RISCV/large-stack.ll
@@ -8,31 +8,116 @@ define void @test() nounwind {
 ; RV32I-LABEL: test:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a0, 74565
-; RV32I-NEXT:    addi a0, a0, 1664
+; RV32I-NEXT:    addi a0, a0, 1680
 ; RV32I-NEXT:    sub sp, sp, a0
 ; RV32I-NEXT:    lui a0, 74565
-; RV32I-NEXT:    addi a0, a0, 1660
+; RV32I-NEXT:    addi a0, a0, 1676
 ; RV32I-NEXT:    add a0, sp, a0
 ; RV32I-NEXT:    sw ra, 0(a0)
 ; RV32I-NEXT:    lui a0, 74565
-; RV32I-NEXT:    addi a0, a0, 1656
+; RV32I-NEXT:    addi a0, a0, 1672
 ; RV32I-NEXT:    add a0, sp, a0
 ; RV32I-NEXT:    sw s0, 0(a0)
 ; RV32I-NEXT:    lui a0, 74565
-; RV32I-NEXT:    addi a0, a0, 1664
+; RV32I-NEXT:    addi a0, a0, 1680
 ; RV32I-NEXT:    add s0, sp, a0
 ; RV32I-NEXT:    lui a0, 74565
-; RV32I-NEXT:    addi a0, a0, 1656
+; RV32I-NEXT:    addi a0, a0, 1672
 ; RV32I-NEXT:    add a0, sp, a0
 ; RV32I-NEXT:    lw s0, 0(a0)
 ; RV32I-NEXT:    lui a0, 74565
-; RV32I-NEXT:    addi a0, a0, 1660
+; RV32I-NEXT:    addi a0, a0, 1676
 ; RV32I-NEXT:    add a0, sp, a0
 ; RV32I-NEXT:    lw ra, 0(a0)
 ; RV32I-NEXT:    lui a0, 74565
-; RV32I-NEXT:    addi a0, a0, 1664
+; RV32I-NEXT:    addi a0, a0, 1680
 ; RV32I-NEXT:    add sp, sp, a0
 ; RV32I-NEXT:    ret
   %tmp = alloca [ 305419896 x i8 ] , align 4
   ret void
 }
+
+; This test case artificially produces register pressure which should force
+; use of the emergency spill slot.
+
+define void @test_emergency_spill_slot(i32 %a) nounwind {
+; RV32I-LABEL: test_emergency_spill_slot:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 98
+; RV32I-NEXT:    addi a1, a1, -1376
+; RV32I-NEXT:    sub sp, sp, a1
+; RV32I-NEXT:    lui a1, 98
+; RV32I-NEXT:    addi a1, a1, -1380
+; RV32I-NEXT:    add a1, sp, a1
+; RV32I-NEXT:    sw ra, 0(a1)
+; RV32I-NEXT:    lui a1, 98
+; RV32I-NEXT:    addi a1, a1, -1384
+; RV32I-NEXT:    add a1, sp, a1
+; RV32I-NEXT:    sw s0, 0(a1)
+; RV32I-NEXT:    lui a1, 98
+; RV32I-NEXT:    addi a1, a1, -1388
+; RV32I-NEXT:    add a1, sp, a1
+; RV32I-NEXT:    sw s1, 0(a1)
+; RV32I-NEXT:    lui a1, 98
+; RV32I-NEXT:    addi a1, a1, -1392
+; RV32I-NEXT:    add a1, sp, a1
+; RV32I-NEXT:    sw s2, 0(a1)
+; RV32I-NEXT:    lui a1, 98
+; RV32I-NEXT:    addi a1, a1, -1376
+; RV32I-NEXT:    add s0, sp, a1
+; RV32I-NEXT:    lui a1, 78
+; RV32I-NEXT:    addi a1, a1, 512
+; RV32I-NEXT:    lui a2, 1048478
+; RV32I-NEXT:    addi a2, a2, 1388
+; RV32I-NEXT:    add a2, s0, a2
+; RV32I-NEXT:    mv a2, a2
+; RV32I-NEXT:    add a1, a2, a1
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    nop
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    sw a0, 0(a1)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    nop
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    lui a0, 98
+; RV32I-NEXT:    addi a0, a0, -1392
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    lw s2, 0(a0)
+; RV32I-NEXT:    lui a0, 98
+; RV32I-NEXT:    addi a0, a0, -1388
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    lw s1, 0(a0)
+; RV32I-NEXT:    lui a0, 98
+; RV32I-NEXT:    addi a0, a0, -1384
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    lw s0, 0(a0)
+; RV32I-NEXT:    lui a0, 98
+; RV32I-NEXT:    addi a0, a0, -1380
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    lw ra, 0(a0)
+; RV32I-NEXT:    lui a0, 98
+; RV32I-NEXT:    addi a0, a0, -1376
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    ret
+  %data = alloca [ 100000 x i32 ] , align 4
+  %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000
+  %1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "nop", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r"()
+  %asmresult0 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 0
+  %asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 1
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 2
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 3
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 4
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 5
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 6
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 7
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 8
+  %asmresult9 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 9
+  %asmresult10 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 10
+  %asmresult11 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 11
+  %asmresult12 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 12
+  %asmresult13 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 13
+  %asmresult14 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 14
+  store volatile i32 %a, i32* %ptr
+  tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult0, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14)
+  ret void
+}