diff options
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVFrameLowering.h | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/large-stack.ll | 99 |
3 files changed, 114 insertions, 7 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 85a354d0c12..32ed896bc98 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" using namespace llvm; @@ -224,3 +225,21 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(RISCV::X1); SavedRegs.set(RISCV::X8); } + +void RISCVFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &RISCV::GPRRegClass; + // estimateStackSize has been observed to under-estimate the final stack + // size, so give ourselves wiggle-room by checking for stack size + // representable an 11-bit signed field rather than 12-bits. + // FIXME: It may be possible to craft a function with a small stack that + // still needs an emergency spill slot for branch relaxation. This case + // would currently be missed. + if (!isInt<11>(MFI.estimateStackSize(MF))) { + int RegScavFI = MFI.CreateStackObject( + RegInfo->getSpillSize(*RC), RegInfo->getSpillAlignment(*RC), false); + RS->addScavengingFrameIndex(RegScavFI); + } +} diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index d92bb70c76d..ccf7e247b55 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -36,6 +36,9 @@ public: void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + bool hasFP(const MachineFunction &MF) const override; MachineBasicBlock::iterator diff --git a/llvm/test/CodeGen/RISCV/large-stack.ll b/llvm/test/CodeGen/RISCV/large-stack.ll index 9aa517f2f0f..8ee818bed91 100644 --- a/llvm/test/CodeGen/RISCV/large-stack.ll +++ b/llvm/test/CodeGen/RISCV/large-stack.ll @@ -8,31 +8,116 @@ define void @test() nounwind { ; RV32I-LABEL: test: ; RV32I: # %bb.0: ; RV32I-NEXT: lui a0, 74565 -; RV32I-NEXT: addi a0, a0, 1664 +; RV32I-NEXT: addi a0, a0, 1680 ; RV32I-NEXT: sub sp, sp, a0 ; RV32I-NEXT: lui a0, 74565 -; RV32I-NEXT: addi a0, a0, 1660 +; RV32I-NEXT: addi a0, a0, 1676 ; RV32I-NEXT: add a0, sp, a0 ; RV32I-NEXT: sw ra, 0(a0) ; RV32I-NEXT: lui a0, 74565 -; RV32I-NEXT: addi a0, a0, 1656 +; RV32I-NEXT: addi a0, a0, 1672 ; RV32I-NEXT: add a0, sp, a0 ; RV32I-NEXT: sw s0, 0(a0) ; RV32I-NEXT: lui a0, 74565 -; RV32I-NEXT: addi a0, a0, 1664 +; RV32I-NEXT: addi a0, a0, 1680 ; RV32I-NEXT: add s0, sp, a0 ; RV32I-NEXT: lui a0, 74565 -; RV32I-NEXT: addi a0, a0, 1656 +; RV32I-NEXT: addi a0, a0, 1672 ; RV32I-NEXT: add a0, sp, a0 ; RV32I-NEXT: lw s0, 0(a0) ; RV32I-NEXT: lui a0, 74565 -; RV32I-NEXT: addi a0, a0, 1660 +; RV32I-NEXT: addi a0, a0, 1676 ; RV32I-NEXT: add a0, sp, a0 ; RV32I-NEXT: lw ra, 0(a0) ; RV32I-NEXT: lui a0, 74565 -; RV32I-NEXT: addi a0, a0, 1664 +; RV32I-NEXT: addi a0, a0, 1680 ; RV32I-NEXT: add sp, sp, a0 ; RV32I-NEXT: ret %tmp = alloca [ 305419896 x i8 ] , align 4 ret void } + +; This test case artificially produces register pressure which should force +; use of the emergency spill slot. + +define void @test_emergency_spill_slot(i32 %a) nounwind { +; RV32I-LABEL: test_emergency_spill_slot: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 98 +; RV32I-NEXT: addi a1, a1, -1376 +; RV32I-NEXT: sub sp, sp, a1 +; RV32I-NEXT: lui a1, 98 +; RV32I-NEXT: addi a1, a1, -1380 +; RV32I-NEXT: add a1, sp, a1 +; RV32I-NEXT: sw ra, 0(a1) +; RV32I-NEXT: lui a1, 98 +; RV32I-NEXT: addi a1, a1, -1384 +; RV32I-NEXT: add a1, sp, a1 +; RV32I-NEXT: sw s0, 0(a1) +; RV32I-NEXT: lui a1, 98 +; RV32I-NEXT: addi a1, a1, -1388 +; RV32I-NEXT: add a1, sp, a1 +; RV32I-NEXT: sw s1, 0(a1) +; RV32I-NEXT: lui a1, 98 +; RV32I-NEXT: addi a1, a1, -1392 +; RV32I-NEXT: add a1, sp, a1 +; RV32I-NEXT: sw s2, 0(a1) +; RV32I-NEXT: lui a1, 98 +; RV32I-NEXT: addi a1, a1, -1376 +; RV32I-NEXT: add s0, sp, a1 +; RV32I-NEXT: lui a1, 78 +; RV32I-NEXT: addi a1, a1, 512 +; RV32I-NEXT: lui a2, 1048478 +; RV32I-NEXT: addi a2, a2, 1388 +; RV32I-NEXT: add a2, s0, a2 +; RV32I-NEXT: mv a2, a2 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: #APP +; RV32I-NEXT: nop +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: sw a0, 0(a1) +; RV32I-NEXT: #APP +; RV32I-NEXT: nop +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: lui a0, 98 +; RV32I-NEXT: addi a0, a0, -1392 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s2, 0(a0) +; RV32I-NEXT: lui a0, 98 +; RV32I-NEXT: addi a0, a0, -1388 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s1, 0(a0) +; RV32I-NEXT: lui a0, 98 +; RV32I-NEXT: addi a0, a0, -1384 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s0, 0(a0) +; RV32I-NEXT: lui a0, 98 +; RV32I-NEXT: addi a0, a0, -1380 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw ra, 0(a0) +; RV32I-NEXT: lui a0, 98 +; RV32I-NEXT: addi a0, a0, -1376 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret + %data = alloca [ 100000 x i32 ] , align 4 + %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000 + %1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "nop", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r"() + %asmresult0 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 0 + %asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 1 + %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 2 + %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 3 + %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 4 + %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 5 + %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 6 + %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 7 + %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 8 + %asmresult9 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 9 + %asmresult10 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 10 + %asmresult11 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 11 + %asmresult12 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 12 + %asmresult13 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 13 + %asmresult14 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 14 + store volatile i32 %a, i32* %ptr + tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult0, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14) + ret void +} |