[ARM] Don't reserve R12 on Thumb1 as an emergency spill slot.

The current implementation of ThumbRegisterInfo::saveScavengerRegister is bad for two reasons: one, it's buggy, and two, it blocks using R12 for other optimizations. So this patch gets rid of it, and adds the necessary support for using an ordinary emergency spill slot on Thumb1. (Specifically, I think saveScavengerRegister was broken by r305625, and nobody noticed for two years because the codepath is almost never used. The new code will also probably not be used much, but it now has better tests, and if we fail to emit a necessary emergency spill slot we get a reasonable error message instead of a miscompile.) A rough outline of the changes in the patch: 1. Gets rid of ThumbRegisterInfo::saveScavengerRegister. 2. Modifies ARMFrameLowering::determineCalleeSaves to allocate an emergency spill slot for Thumb1. 3. Implements useFPForScavengingIndex, so the emergency spill slot isn't placed at a negative offset from FP on Thumb1. 4. Modifies the heuristics for allocating an emergency spill slot to support Thumb1. This includes fixing ExtraCSSpill so we don't try to use "lr" as a substitute for allocating an emergency spill slot. 5. Allocates a base pointer in more cases, so the emergency spill slot is always accessible. 6. Modifies ARMFrameLowering::ResolveFrameIndexReference to compute the right offset in the new cases where we're forcing a base pointer. 7. Ensures we never generate a load or store with an offset outside of its frame object. This makes the heuristics more straightforward. 8. Changes Thumb1 prologue and epilogue emission so it never uses register scavenging. Some of the changes to the emergency spill slot heuristics in determineCalleeSaves affect ARM/Thumb2; hopefully, they should allow the compiler to avoid allocating an emergency spill slot in cases where it isn't necessary. The rest of the changes should only affect Thumb1. Differential Revision: https://reviews.llvm.org/D63677 llvm-svn: 364490
author: Eli Friedman <efriedma@quicinc.com> 2019-06-26 23:46:51 +0000
committer: Eli Friedman <efriedma@quicinc.com> 2019-06-26 23:46:51 +0000
commit: ab1d73ee32481e3033ebbe5a68afafb24da3df2e (patch)
tree: 2b16e264cc26eac95a6bc63cebfdee9db999c34d /llvm/test/CodeGen
parent: d7999cbc6eb5e83e712904c1739fba799caf1c6c (diff)
download: bcm5719-llvm-ab1d73ee32481e3033ebbe5a68afafb24da3df2e.tar.gz
bcm5719-llvm-ab1d73ee32481e3033ebbe5a68afafb24da3df2e.zip
6 files changed, 409 insertions, 91 deletions
diff --git a/llvm/test/CodeGen/ARM/ldrex-frame-size.ll b/llvm/test/CodeGen/ARM/ldrex-frame-size.ll
index 595540578a0..f34fb8f4d22 100644
--- a/llvm/test/CodeGen/ARM/ldrex-frame-size.ll
+++ b/llvm/test/CodeGen/ARM/ldrex-frame-size.ll
@@ -11,9 +11,9 @@
 define void @test_large_frame() {
 ; CHECK-LABEL: test_large_frame:
 ; CHECK: push
-; CHECK: sub.w sp, sp, #1004
+; CHECK: sub.w sp, sp, #1008
 
-  %ptr = alloca i32, i32 251
+  %ptr = alloca i32, i32 252
 
   %addr = getelementptr i32, i32* %ptr, i32 1
   call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
@@ -24,9 +24,9 @@ define void @test_large_frame() {
 define void @test_small_frame() {
 ; CHECK-LABEL: test_small_frame:
 ; CHECK-NOT: push
-; CHECK: sub.w sp, sp, #1000
+; CHECK: sub.w sp, sp, #1004
 
-  %ptr = alloca i32, i32 250
+  %ptr = alloca i32, i32 251
 
   %addr = getelementptr i32, i32* %ptr, i32 1
   call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
diff --git a/llvm/test/CodeGen/ARM/scavenging.mir b/llvm/test/CodeGen/ARM/scavenging.mir
deleted file mode 100644
index 5e0cbfb4abb..00000000000
--- a/llvm/test/CodeGen/ARM/scavenging.mir
+++ /dev/null
@@ -1,66 +0,0 @@
-# RUN: llc -o - %s -mtriple=thumb-arm-none-eabi -mcpu=cortex-m0 -run-pass scavenger-test | FileCheck %s
----
-# CHECK-LABEL: name: scavengebug0
-# Make sure we are not spilling/using a physreg used in the very last
-# instruction of the scavenging range.
-# CHECK-NOT: tSTRi {{.*}}$r0,{{.*}}$r0
-# CHECK-NOT: tSTRi {{.*}}$r1,{{.*}}$r1
-# CHECK-NOT: tSTRi {{.*}}$r2,{{.*}}$r2
-# CHECK-NOT: tSTRi {{.*}}$r3,{{.*}}$r3
-# CHECK-NOT: tSTRi {{.*}}$r4,{{.*}}$r4
-# CHECK-NOT: tSTRi {{.*}}$r5,{{.*}}$r5
-# CHECK-NOT: tSTRi {{.*}}$r6,{{.*}}$r6
-# CHECK-NOT: tSTRi {{.*}}$r7,{{.*}}$r7
-name: scavengebug0
-body: |
-  bb.0:
-    ; Bring up register pressure to force emergency spilling
-    $r0 = IMPLICIT_DEF
-    $r1 = IMPLICIT_DEF
-    $r2 = IMPLICIT_DEF
-    $r3 = IMPLICIT_DEF
-    $r4 = IMPLICIT_DEF
-    $r5 = IMPLICIT_DEF
-    $r6 = IMPLICIT_DEF
-    $r7 = IMPLICIT_DEF
-
-    %0 : tgpr = IMPLICIT_DEF
-    %0 = tADDhirr %0, $sp, 14, $noreg
-    tSTRi $r0, %0, 0, 14, $noreg
-
-    %1 : tgpr = IMPLICIT_DEF
-    %1 = tADDhirr %1, $sp, 14, $noreg
-    tSTRi $r1, %1, 0, 14, $noreg
-
-    %2 : tgpr = IMPLICIT_DEF
-    %2 = tADDhirr %2, $sp, 14, $noreg
-    tSTRi $r2, %2, 0, 14, $noreg
-
-    %3 : tgpr = IMPLICIT_DEF
-    %3 = tADDhirr %3, $sp, 14, $noreg
-    tSTRi $r3, %3, 0, 14, $noreg
-
-    %4 : tgpr = IMPLICIT_DEF
-    %4 = tADDhirr %4, $sp, 14, $noreg
-    tSTRi $r4, %4, 0, 14, $noreg
-
-    %5 : tgpr = IMPLICIT_DEF
-    %5 = tADDhirr %5, $sp, 14, $noreg
-    tSTRi $r5, %5, 0, 14, $noreg
-
-    %6 : tgpr = IMPLICIT_DEF
-    %6 = tADDhirr %6, $sp, 14, $noreg
-    tSTRi $r6, %6, 0, 14, $noreg
-
-    %7 : tgpr = IMPLICIT_DEF
-    %7 = tADDhirr %7, $sp, 14, $noreg
-    tSTRi $r7, %7, 0, 14, $noreg
-
-    KILL $r0
-    KILL $r1
-    KILL $r2
-    KILL $r3
-    KILL $r4
-    KILL $r5
-    KILL $r6
-    KILL $r7
diff --git a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
index 3787c4282b2..0e8b6c09896 100644
--- a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -34,9 +34,10 @@ bb2:
 	
 bb3:
 	%.0 = phi i8* [ %0, %entry ], [ %6, %bb2 ], [ %3, %bb1 ]
-; CHECK: subs    r4, #5
+; CHECK:      subs    r4, r7, #7
+; CHECK-NEXT: subs    r4, #1
 ; CHECK-NEXT: mov     sp, r4
-; CHECK-NEXT: pop     {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop     {r4, r6, r7, pc}
 	ret i8* %.0
 }
 
diff --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
new file mode 100644
index 00000000000..13d28514a71
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
@@ -0,0 +1,380 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+target triple = "thumbv6m-unknown-unknown-eabi"
+
+define void @vla_emergency_spill(i32 %n) {
+; CHECK-LABEL: vla_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #4100
+; CHECK-NEXT:    ldr r6, .LCPI0_0
+; CHECK-NEXT:    add sp, r6
+; CHECK-NEXT:    mov r6, sp
+; CHECK-NEXT:    adds r0, r0, #7
+; CHECK-NEXT:    movs r1, #7
+; CHECK-NEXT:    bics r0, r1
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    subs r0, r1, r0
+; CHECK-NEXT:    mov sp, r0
+; CHECK-NEXT:    adds r1, r6, #4
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [r6]
+; CHECK-NEXT:    ldr r0, .LCPI0_1
+; CHECK-NEXT:    str r5, [r0, r6]
+; CHECK-NEXT:    ldr r0, [r6]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long 4294963196 @ 0xffffeffc
+; CHECK-NEXT:  .LCPI0_1:
+; CHECK-NEXT:    .long 1024 @ 0x400
+entry:
+  %x = alloca [1024 x i32], align 4
+  %vla = alloca i8, i32 %n, align 1
+  %asm1 = call { i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},0,1,2,3,4,5"(i8* %vla, [1024 x i32]* %x, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* %x, i32 0, i32 255
+  store i32 %asmresult5, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5}"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5) #2
+  ret void
+}
+
+define void @simple_emergency_spill(i32 %n) {
+; CHECK-LABEL: simple_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #8196
+; CHECK-NEXT:    ldr r7, .LCPI1_0
+; CHECK-NEXT:    add sp, r7
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    ldr r1, .LCPI1_2
+; CHECK-NEXT:    add r1, sp
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp]
+; CHECK-NEXT:    ldr r0, .LCPI1_3
+; CHECK-NEXT:    add r0, sp
+; CHECK-NEXT:    str r5, [r0]
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    ldr r7, .LCPI1_1
+; CHECK-NEXT:    add sp, r7
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI1_0:
+; CHECK-NEXT:    .long 4294959100 @ 0xffffdffc
+; CHECK-NEXT:  .LCPI1_1:
+; CHECK-NEXT:    .long 8196 @ 0x2004
+; CHECK-NEXT:  .LCPI1_2:
+; CHECK-NEXT:    .long 4100 @ 0x1004
+; CHECK-NEXT:  .LCPI1_3:
+; CHECK-NEXT:    .long 5120 @ 0x1400
+entry:
+  %x = alloca [1024 x i32], align 4
+  %y = alloca [1024 x i32], align 4
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},={r7},0,1,2,3,4,5,6,7"([1024 x i32]* %y, [1024 x i32]* %x, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 7
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* %x, i32 0, i32 255
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8)
+  ret void
+}
+
+; We have some logic to try to spill registers instead of allocating an
+; emergency spill slot, but for targets where the stack alignment is 8,
+; it only triggers when there are two available registers.  (This is
+; maybe worth looking into, to improve the generated code quality.)
+;
+; The scavenger itself only cares whether a register is allocatable, not
+; whether it was actually spilled in the prologue, and r7 is first on
+; the priority list, so we use it anyway.  This is likely to confuse
+; debuggers, so maybe worth changing at some point.
+define void @simple_emergency_spill_nor7(i32 %n) {
+; CHECK-LABEL: simple_emergency_spill_nor7:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .pad #8196
+; CHECK-NEXT:    ldr r6, .LCPI2_0
+; CHECK-NEXT:    add sp, r6
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    ldr r1, .LCPI2_2
+; CHECK-NEXT:    add r1, sp
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r7, [sp]
+; CHECK-NEXT:    ldr r7, .LCPI2_3
+; CHECK-NEXT:    add r7, sp
+; CHECK-NEXT:    str r5, [r7]
+; CHECK-NEXT:    ldr r7, [sp]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    ldr r6, .LCPI2_1
+; CHECK-NEXT:    add sp, r6
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI2_0:
+; CHECK-NEXT:    .long 4294959100 @ 0xffffdffc
+; CHECK-NEXT:  .LCPI2_1:
+; CHECK-NEXT:    .long 8196 @ 0x2004
+; CHECK-NEXT:  .LCPI2_2:
+; CHECK-NEXT:    .long 4100 @ 0x1004
+; CHECK-NEXT:  .LCPI2_3:
+; CHECK-NEXT:    .long 5120 @ 0x1400
+entry:
+  %x = alloca [1024 x i32], align 4
+  %y = alloca [1024 x i32], align 4
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([1024 x i32]* %y, [1024 x i32]* %x, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* %x, i32 0, i32 255
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
+
+define void @arg_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [252 x i32]* byval %p) {
+; CHECK-LABEL: arg_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    add r0, sp, #24
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp]
+; CHECK-NEXT:    ldr r0, .LCPI3_0
+; CHECK-NEXT:    add r0, sp
+; CHECK-NEXT:    str r5, [r0]
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI3_0:
+; CHECK-NEXT:    .long 1028 @ 0x404
+entry:
+  %pp = getelementptr inbounds [252 x i32], [252 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},={r7},0,1,2,3,4,5,6,7"(i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 7
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 251
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8)
+  ret void
+}
+
+; We currently overestimate the amount of required stack space by 16 bytes,
+; so this is the largest stack that doesn't require an emergency spill slot.
+define void @arg_no_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [248 x i32]* byval %p) {
+; CHECK-LABEL: arg_no_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    add r0, sp, #20
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r5, [sp, #1008]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %pp = getelementptr inbounds [248 x i32], [248 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},={r7},0,1,2,3,4,5,6,7"(i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 7
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 247
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8)
+  ret void
+}
+
+define void @aligned_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [31 x i32]* byval %p) {
+; CHECK-LABEL: aligned_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #44
+; CHECK-NEXT:    sub sp, #44
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    lsrs r4, r4, #4
+; CHECK-NEXT:    lsls r4, r4, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    adds r1, r7, #7
+; CHECK-NEXT:    adds r1, #1
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp, #12]
+; CHECK-NEXT:    ldr r0, .LCPI5_0
+; CHECK-NEXT:    str r5, [r0, r7]
+; CHECK-NEXT:    ldr r0, [sp, #12]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI5_0:
+; CHECK-NEXT:    .long 128 @ 0x80
+entry:
+  %y = alloca [4 x i32], align 16
+  %pp = getelementptr inbounds [31 x i32], [31 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([4 x i32]* %y, i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) #3
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 30
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
+
+; This function should have no emergency spill slot, so its stack should be
+; smaller than @aligned_emergency_spill.
+define void @aligned_no_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [30 x i32]* byval %p) {
+; CHECK-LABEL: aligned_no_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #28
+; CHECK-NEXT:    sub sp, #28
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    lsrs r4, r4, #4
+; CHECK-NEXT:    lsls r4, r4, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    adds r1, r7, #7
+; CHECK-NEXT:    adds r1, #1
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r5, [r7, #124]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %y = alloca [4 x i32], align 16
+  %pp = getelementptr inbounds [30 x i32], [30 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([4 x i32]* %y, i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) #3
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 29
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
+
+; This function shouldn't fail to compile.  (It's UB, so it doesn't really
+; matter what it compiles to, exactly, but we need to check at some point
+; so we don't generate code that requires an emergency spill slot we never
+; allocated.  If the store gets eliminated, this testcase probably needs
+; to be rewritten.)
+define void @aligned_out_of_range_access(i32 %n, i32 %n2, i32 %n3, i32 %n4, [30 x i32]* byval %p) {
+; CHECK-LABEL: aligned_out_of_range_access:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #44
+; CHECK-NEXT:    sub sp, #44
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    lsrs r4, r4, #4
+; CHECK-NEXT:    lsls r4, r4, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    adds r1, r7, #7
+; CHECK-NEXT:    adds r1, #1
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    str r5, [r0, #120]
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %y = alloca [4 x i32], align 16
+  %pp = getelementptr inbounds [30 x i32], [30 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([4 x i32]* %y, i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) #3
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 30
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll
index b472d235d39..ff1d57db944 100644
--- a/llvm/test/CodeGen/Thumb/frame-access.ll
+++ b/llvm/test/CodeGen/Thumb/frame-access.ll
@@ -124,7 +124,7 @@ entry:
 ; CHECK-NEXT:  lsls r4, r4, #4
 ; CHECK-NEXT:  mov  sp, r4
 ; Incoming register varargs stored via FP
-; CHECK: mov	r0, r7
+; CHECK:      mov r0, r7
 ; CHECK-NEXT: adds r0, #8
 ; CHECK-NEXT: stm r0!, {r1, r2, r3}
 ; VLAs present, access via FP
@@ -199,11 +199,13 @@ entry:
 ; CHECK:       push {r4, r5, r6, r7, lr}
 ; 20 bytes locals
 ; CHECK:       sub sp, #20
+; Setup base pointer
+; CHECK:       mov r6, sp
 ; Allocate outgoing arguments space
 ; CHECK:       sub sp, #508
 ; CHECK:       sub sp, #4
-; Load `e` via SP, 552 = 512 + 20 + 20
-; CHECK:       ldr r3, [sp, #552]
+; Load `e` via BP, 40 = 20 + 20
+; CHECK:       ldr r3, [r6, #40]
 ; CHECK:       bl  f
 ; Stack restored before next call
 ; CHECK-NEXT:  add sp, #508
@@ -235,11 +237,12 @@ entry:
 ; Three incoming register varargs
 ; CHECK:       sub sp, #12
 ; 16 bytes callee-saves
-; CHECK:       push {r4, r5, r7, lr}
+; CHECK:       push {r4, r5, r6, lr}
 ; 20 bytes locals
 ; CHECK:       sub sp, #20
-; Incoming varargs stored via SP, 36 = 20 + 16
-; CHECK:       add r0, sp, #36
+; Incoming varargs stored via BP, 36 = 20 + 16
+; CHECK:       mov r0, r6
+; CHECK-NEXT:  adds r0, #36
 ; CHECK-NEXT:  stm r0!, {r1, r2, r3}
 
 ;
@@ -394,17 +397,19 @@ entry:
 ; CHECK-LABEL: test_local_moving_sp
 ; Locals area
 ; CHECK:      sub sp, #36
+; Setup BP
+; CHECK:      mov r6, sp
 ; Outoging arguments
 ; CHECK:      sub sp, #508
 ; CHECK-NEXT: sub sp, #508
 ; CHECK-NEXT: sub sp, #8
-; Argument addresses computed relative to SP
-; CHECK:      add  r4, sp, #1020
-; CHECK-NEXT: adds r4, #24
-; CHECK:      add  r1, sp, #1020
-; CHECK-NEXT: adds r1, #20
-; CHECK:      add  r5, sp, #1020
-; CHECK-NEXT: adds r5, #16
+; Argument addresses computed relative to BP
+; CHECK:      adds r0, r6, #7
+; CHECK-NEXT: adds r0, #13
+; CHECK:      adds r1, r6, #7
+; CHECK-NEXT: adds r1, #9
+; CHECK:      adds r5, r6, #7
+; CHECK-NEXT: adds r5, #5
 ; CHECK:      bl   u
 ; Stack restored before next call
 ; CHECK:      add  sp, #508
diff --git a/llvm/test/CodeGen/Thumb/large-stack.ll b/llvm/test/CodeGen/Thumb/large-stack.ll
index f35bffba5ca..923d2c06d23 100644
--- a/llvm/test/CodeGen/Thumb/large-stack.ll
+++ b/llvm/test/CodeGen/Thumb/large-stack.ll
@@ -33,9 +33,8 @@ define void @test100_nofpelim() "no-frame-pointer-elim"="true" {
 ; CHECK: sub sp, #508
 ; CHECK: sub sp, #508
 ; CHECK: sub sp, #508
-; ALIGN4: subs r4, r7, #4
-; ALIGN8: subs r4, r7, #7
-; ALIGN8: subs r4, #1
+; CHECK: subs r4, r7, #7
+; CHECK: subs r4, #1
 ; CHECK: mov sp, r4
     %tmp = alloca [ 1524 x i8 ] , align 4
     ret void
@@ -57,9 +56,8 @@ define void @test2_nofpelim() "no-frame-pointer-elim"="true" {
 ; CHECK-LABEL: test2_nofpelim:
 ; CHECK: ldr [[TEMP:r[0-7]]],
 ; CHECK: add sp, [[TEMP]]
-; ALIGN4: subs r4, r7, #4
-; ALIGN8: subs r4, r7, #7
-; ALIGN8: subs r4, #1
+; CHECK: subs r4, r7, #7
+; CHECK: subs r4, #1
 ; CHECK: mov sp, r4
     %tmp = alloca [ 1528 x i8 ] , align 4
     ret void
author	Eli Friedman <efriedma@quicinc.com>	2019-06-26 23:46:51 +0000
committer	Eli Friedman <efriedma@quicinc.com>	2019-06-26 23:46:51 +0000
commit	ab1d73ee32481e3033ebbe5a68afafb24da3df2e (patch)
tree	2b16e264cc26eac95a6bc63cebfdee9db999c34d /llvm/test/CodeGen
parent	d7999cbc6eb5e83e712904c1739fba799caf1c6c (diff)
download	bcm5719-llvm-ab1d73ee32481e3033ebbe5a68afafb24da3df2e.tar.gz bcm5719-llvm-ab1d73ee32481e3033ebbe5a68afafb24da3df2e.zip