diff options
| author | David Green <david.green@arm.com> | 2019-09-17 15:23:09 +0000 |
|---|---|---|
| committer | David Green <david.green@arm.com> | 2019-09-17 15:23:09 +0000 |
| commit | 22a2209433a40508c2866ce8f547fcf319f83186 (patch) | |
| tree | e248129722c58931288740ea0c72fd0051180681 | |
| parent | d0cc0a39be47cabd1325395197a3b7276f7b9fd9 (diff) | |
| download | bcm5719-llvm-22a2209433a40508c2866ce8f547fcf319f83186.tar.gz bcm5719-llvm-22a2209433a40508c2866ce8f547fcf319f83186.zip | |
[ARM] Reserve an emergency spill slot for fp16 addressing modes that need it
Similar to D67327, but this time for the FP16 VLDR and VSTR instructions that
use the AddrMode5FP16 addressing mode. We need to reserve an emergency spill
slot for instructions that will be out of range to use sp directly.
AddrMode5FP16 is 8 bits with a scale of 2.
Differential Revision: https://reviews.llvm.org/D67483
llvm-svn: 372132
| -rw-r--r-- | llvm/lib/Target/ARM/ARMFrameLowering.cpp | 15 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir | 95 |
2 files changed, 109 insertions, 1 deletions
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index b3413ec0202..03681d5634c 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1512,6 +1512,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, unsigned Limit = (1 << 12) - 1; for (auto &MBB : MF) { for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (!MI.getOperand(i).isFI()) continue; @@ -1522,6 +1524,10 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, Limit = std::min(Limit, (1U << 8) - 1); break; } + // t2ADDri will not require an extra register, it can reuse the + // destination. + if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12) + break; const MCInstrDesc &MCID = MI.getDesc(); const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF); @@ -1530,10 +1536,17 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // Otherwise check the addressing mode. switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode_i12: + case ARMII::AddrMode2: + // Default 12 bit limit. + break; case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: Limit = std::min(Limit, (1U << 8) - 1); break; + case ARMII::AddrMode5FP16: + Limit = std::min(Limit, ((1U << 8) - 1) * 2); + break; case ARMII::AddrMode5: case ARMII::AddrModeT2_i8s4: case ARMII::AddrModeT2_ldrex: @@ -1560,7 +1573,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, Limit = std::min(Limit, ((1U << 7) - 1) * 4); break; default: - break; + llvm_unreachable("Unhandled addressing mode in stack size limit calculation"); } break; // At most one FI per instruction } diff --git a/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir new file mode 100644 index 00000000000..856f307488c --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fullfp16 -run-pass=stack-protector -run-pass=prologepilog | FileCheck %s +--- +name: func0 +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -1200, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: '', type: default, offset: 0, size: 1200, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -2, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +body: | + bb.0: + ; CHECK-LABEL: name: func0 + ; CHECK: liveins: $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 36 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r11, -8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -12 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -16 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -20 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -24 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 + ; CHECK: $sp = frame-setup t2SUBri killed $sp, 1208, 14, $noreg, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 1244 + ; CHECK: $r0 = IMPLICIT_DEF + ; CHECK: $r1 = IMPLICIT_DEF + ; CHECK: $r2 = IMPLICIT_DEF + ; CHECK: $r3 = IMPLICIT_DEF + ; CHECK: $r4 = IMPLICIT_DEF + ; CHECK: $r5 = IMPLICIT_DEF + ; CHECK: $r6 = IMPLICIT_DEF + ; CHECK: $r7 = IMPLICIT_DEF + ; CHECK: $r8 = IMPLICIT_DEF + ; CHECK: $r9 = IMPLICIT_DEF + ; CHECK: $r10 = IMPLICIT_DEF + ; CHECK: $r11 = IMPLICIT_DEF + ; CHECK: $r12 = IMPLICIT_DEF + ; CHECK: $lr = IMPLICIT_DEF + ; CHECK: t2STRi12 killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.2) + ; CHECK: $r0 = t2ADDri killed $sp, 1024, 14, $noreg, $noreg + ; CHECK: renamable $s4 = VLDRH killed $r0, 91, 14, $noreg :: (dereferenceable load 2 from %stack.0) + ; CHECK: $r0 = t2LDRi12 $sp, 0, 14, $noreg :: (load 4 from %stack.2) + ; CHECK: KILL $r0 + ; CHECK: KILL $r1 + ; CHECK: KILL $r2 + ; CHECK: KILL $r3 + ; CHECK: KILL $r4 + ; CHECK: KILL $r5 + ; CHECK: KILL $r6 + ; CHECK: KILL $r7 + ; CHECK: KILL $r8 + ; CHECK: KILL $r9 + ; CHECK: KILL $r10 + ; CHECK: KILL $r11 + ; CHECK: KILL $r12 + ; CHECK: KILL $lr + $r0 = IMPLICIT_DEF + $r1 = IMPLICIT_DEF + $r2 = IMPLICIT_DEF + $r3 = IMPLICIT_DEF + $r4 = IMPLICIT_DEF + $r5 = IMPLICIT_DEF + $r6 = IMPLICIT_DEF + $r7 = IMPLICIT_DEF + $r8 = IMPLICIT_DEF + $r9 = IMPLICIT_DEF + $r10 = IMPLICIT_DEF + $r11 = IMPLICIT_DEF + $r12 = IMPLICIT_DEF + $lr = IMPLICIT_DEF + + renamable $s4 = VLDRH %stack.0, 0, 14, $noreg :: (dereferenceable load 2 from %stack.0) + + KILL $r0 + KILL $r1 + KILL $r2 + KILL $r3 + KILL $r4 + KILL $r5 + KILL $r6 + KILL $r7 + KILL $r8 + KILL $r9 + KILL $r10 + KILL $r11 + KILL $r12 + KILL $lr +... |

