6 files changed, 409 insertions, 91 deletions
diff --git a/llvm/test/CodeGen/ARM/ldrex-frame-size.ll b/llvm/test/CodeGen/ARM/ldrex-frame-size.ll
index 595540578a0..f34fb8f4d22 100644
--- a/llvm/test/CodeGen/ARM/ldrex-frame-size.ll
+++ b/llvm/test/CodeGen/ARM/ldrex-frame-size.ll
@@ -11,9 +11,9 @@
 define void @test_large_frame() {
 ; CHECK-LABEL: test_large_frame:
 ; CHECK: push
-; CHECK: sub.w sp, sp, #1004
+; CHECK: sub.w sp, sp, #1008
 
-  %ptr = alloca i32, i32 251
+  %ptr = alloca i32, i32 252
 
   %addr = getelementptr i32, i32* %ptr, i32 1
   call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
@@ -24,9 +24,9 @@ define void @test_large_frame() {
 define void @test_small_frame() {
 ; CHECK-LABEL: test_small_frame:
 ; CHECK-NOT: push
-; CHECK: sub.w sp, sp, #1000
+; CHECK: sub.w sp, sp, #1004
 
-  %ptr = alloca i32, i32 250
+  %ptr = alloca i32, i32 251
 
   %addr = getelementptr i32, i32* %ptr, i32 1
   call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
diff --git a/llvm/test/CodeGen/ARM/scavenging.mir b/llvm/test/CodeGen/ARM/scavenging.mir
deleted file mode 100644
index 5e0cbfb4abb..00000000000
--- a/llvm/test/CodeGen/ARM/scavenging.mir
+++ /dev/null
@@ -1,66 +0,0 @@
-# RUN: llc -o - %s -mtriple=thumb-arm-none-eabi -mcpu=cortex-m0 -run-pass scavenger-test | FileCheck %s
----
-# CHECK-LABEL: name: scavengebug0
-# Make sure we are not spilling/using a physreg used in the very last
-# instruction of the scavenging range.
-# CHECK-NOT: tSTRi {{.*}}$r0,{{.*}}$r0
-# CHECK-NOT: tSTRi {{.*}}$r1,{{.*}}$r1
-# CHECK-NOT: tSTRi {{.*}}$r2,{{.*}}$r2
-# CHECK-NOT: tSTRi {{.*}}$r3,{{.*}}$r3
-# CHECK-NOT: tSTRi {{.*}}$r4,{{.*}}$r4
-# CHECK-NOT: tSTRi {{.*}}$r5,{{.*}}$r5
-# CHECK-NOT: tSTRi {{.*}}$r6,{{.*}}$r6
-# CHECK-NOT: tSTRi {{.*}}$r7,{{.*}}$r7
-name: scavengebug0
-body: |
-  bb.0:
-    ; Bring up register pressure to force emergency spilling
-    $r0 = IMPLICIT_DEF
-    $r1 = IMPLICIT_DEF
-    $r2 = IMPLICIT_DEF
-    $r3 = IMPLICIT_DEF
-    $r4 = IMPLICIT_DEF
-    $r5 = IMPLICIT_DEF
-    $r6 = IMPLICIT_DEF
-    $r7 = IMPLICIT_DEF
-
-    %0 : tgpr = IMPLICIT_DEF
-    %0 = tADDhirr %0, $sp, 14, $noreg
-    tSTRi $r0, %0, 0, 14, $noreg
-
-    %1 : tgpr = IMPLICIT_DEF
-    %1 = tADDhirr %1, $sp, 14, $noreg
-    tSTRi $r1, %1, 0, 14, $noreg
-
-    %2 : tgpr = IMPLICIT_DEF
-    %2 = tADDhirr %2, $sp, 14, $noreg
-    tSTRi $r2, %2, 0, 14, $noreg
-
-    %3 : tgpr = IMPLICIT_DEF
-    %3 = tADDhirr %3, $sp, 14, $noreg
-    tSTRi $r3, %3, 0, 14, $noreg
-
-    %4 : tgpr = IMPLICIT_DEF
-    %4 = tADDhirr %4, $sp, 14, $noreg
-    tSTRi $r4, %4, 0, 14, $noreg
-
-    %5 : tgpr = IMPLICIT_DEF
-    %5 = tADDhirr %5, $sp, 14, $noreg
-    tSTRi $r5, %5, 0, 14, $noreg
-
-    %6 : tgpr = IMPLICIT_DEF
-    %6 = tADDhirr %6, $sp, 14, $noreg
-    tSTRi $r6, %6, 0, 14, $noreg
-
-    %7 : tgpr = IMPLICIT_DEF
-    %7 = tADDhirr %7, $sp, 14, $noreg
-    tSTRi $r7, %7, 0, 14, $noreg
-
-    KILL $r0
-    KILL $r1
-    KILL $r2
-    KILL $r3
-    KILL $r4
-    KILL $r5
-    KILL $r6
-    KILL $r7
diff --git a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
index 3787c4282b2..0e8b6c09896 100644
--- a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -34,9 +34,10 @@ bb2:
 	
 bb3:
 	%.0 = phi i8* [ %0, %entry ], [ %6, %bb2 ], [ %3, %bb1 ]
-; CHECK: subs    r4, #5
+; CHECK:      subs    r4, r7, #7
+; CHECK-NEXT: subs    r4, #1
 ; CHECK-NEXT: mov     sp, r4
-; CHECK-NEXT: pop     {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop     {r4, r6, r7, pc}
 	ret i8* %.0
 }
 
diff --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
new file mode 100644
index 00000000000..13d28514a71
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll
@@ -0,0 +1,380 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+target triple = "thumbv6m-unknown-unknown-eabi"
+
+define void @vla_emergency_spill(i32 %n) {
+; CHECK-LABEL: vla_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #4100
+; CHECK-NEXT:    ldr r6, .LCPI0_0
+; CHECK-NEXT:    add sp, r6
+; CHECK-NEXT:    mov r6, sp
+; CHECK-NEXT:    adds r0, r0, #7
+; CHECK-NEXT:    movs r1, #7
+; CHECK-NEXT:    bics r0, r1
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    subs r0, r1, r0
+; CHECK-NEXT:    mov sp, r0
+; CHECK-NEXT:    adds r1, r6, #4
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [r6]
+; CHECK-NEXT:    ldr r0, .LCPI0_1
+; CHECK-NEXT:    str r5, [r0, r6]
+; CHECK-NEXT:    ldr r0, [r6]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long 4294963196 @ 0xffffeffc
+; CHECK-NEXT:  .LCPI0_1:
+; CHECK-NEXT:    .long 1024 @ 0x400
+entry:
+  %x = alloca [1024 x i32], align 4
+  %vla = alloca i8, i32 %n, align 1
+  %asm1 = call { i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},0,1,2,3,4,5"(i8* %vla, [1024 x i32]* %x, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* %x, i32 0, i32 255
+  store i32 %asmresult5, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5}"(i32 %asmresult, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5) #2
+  ret void
+}
+
+define void @simple_emergency_spill(i32 %n) {
+; CHECK-LABEL: simple_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #8196
+; CHECK-NEXT:    ldr r7, .LCPI1_0
+; CHECK-NEXT:    add sp, r7
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    ldr r1, .LCPI1_2
+; CHECK-NEXT:    add r1, sp
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp]
+; CHECK-NEXT:    ldr r0, .LCPI1_3
+; CHECK-NEXT:    add r0, sp
+; CHECK-NEXT:    str r5, [r0]
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    ldr r7, .LCPI1_1
+; CHECK-NEXT:    add sp, r7
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI1_0:
+; CHECK-NEXT:    .long 4294959100 @ 0xffffdffc
+; CHECK-NEXT:  .LCPI1_1:
+; CHECK-NEXT:    .long 8196 @ 0x2004
+; CHECK-NEXT:  .LCPI1_2:
+; CHECK-NEXT:    .long 4100 @ 0x1004
+; CHECK-NEXT:  .LCPI1_3:
+; CHECK-NEXT:    .long 5120 @ 0x1400
+entry:
+  %x = alloca [1024 x i32], align 4
+  %y = alloca [1024 x i32], align 4
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},={r7},0,1,2,3,4,5,6,7"([1024 x i32]* %y, [1024 x i32]* %x, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 7
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* %x, i32 0, i32 255
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8)
+  ret void
+}
+
+; We have some logic to try to spill registers instead of allocating an
+; emergency spill slot, but for targets where the stack alignment is 8,
+; it only triggers when there are two available registers.  (This is
+; maybe worth looking into, to improve the generated code quality.)
+;
+; The scavenger itself only cares whether a register is allocatable, not
+; whether it was actually spilled in the prologue, and r7 is first on
+; the priority list, so we use it anyway.  This is likely to confuse
+; debuggers, so maybe worth changing at some point.
+define void @simple_emergency_spill_nor7(i32 %n) {
+; CHECK-LABEL: simple_emergency_spill_nor7:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .pad #8196
+; CHECK-NEXT:    ldr r6, .LCPI2_0
+; CHECK-NEXT:    add sp, r6
+; CHECK-NEXT:    add r0, sp, #4
+; CHECK-NEXT:    ldr r1, .LCPI2_2
+; CHECK-NEXT:    add r1, sp
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r7, [sp]
+; CHECK-NEXT:    ldr r7, .LCPI2_3
+; CHECK-NEXT:    add r7, sp
+; CHECK-NEXT:    str r5, [r7]
+; CHECK-NEXT:    ldr r7, [sp]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    ldr r6, .LCPI2_1
+; CHECK-NEXT:    add sp, r6
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI2_0:
+; CHECK-NEXT:    .long 4294959100 @ 0xffffdffc
+; CHECK-NEXT:  .LCPI2_1:
+; CHECK-NEXT:    .long 8196 @ 0x2004
+; CHECK-NEXT:  .LCPI2_2:
+; CHECK-NEXT:    .long 4100 @ 0x1004
+; CHECK-NEXT:  .LCPI2_3:
+; CHECK-NEXT:    .long 5120 @ 0x1400
+entry:
+  %x = alloca [1024 x i32], align 4
+  %y = alloca [1024 x i32], align 4
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([1024 x i32]* %y, [1024 x i32]* %x, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* %x, i32 0, i32 255
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
+
+define void @arg_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [252 x i32]* byval %p) {
+; CHECK-LABEL: arg_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    add r0, sp, #24
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp]
+; CHECK-NEXT:    ldr r0, .LCPI3_0
+; CHECK-NEXT:    add r0, sp
+; CHECK-NEXT:    str r5, [r0]
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI3_0:
+; CHECK-NEXT:    .long 1028 @ 0x404
+entry:
+  %pp = getelementptr inbounds [252 x i32], [252 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},={r7},0,1,2,3,4,5,6,7"(i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 7
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 251
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8)
+  ret void
+}
+
+; We currently overestimate the amount of required stack space by 16 bytes,
+; so this is the largest stack that doesn't require an emergency spill slot.
+define void @arg_no_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [248 x i32]* byval %p) {
+; CHECK-LABEL: arg_no_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    add r0, sp, #20
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r5, [sp, #1008]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %pp = getelementptr inbounds [248 x i32], [248 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},={r7},0,1,2,3,4,5,6,7"(i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef)
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm1, 7
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 247
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8)
+  ret void
+}
+
+define void @aligned_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [31 x i32]* byval %p) {
+; CHECK-LABEL: aligned_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #44
+; CHECK-NEXT:    sub sp, #44
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    lsrs r4, r4, #4
+; CHECK-NEXT:    lsls r4, r4, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    adds r1, r7, #7
+; CHECK-NEXT:    adds r1, #1
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp, #12]
+; CHECK-NEXT:    ldr r0, .LCPI5_0
+; CHECK-NEXT:    str r5, [r0, r7]
+; CHECK-NEXT:    ldr r0, [sp, #12]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI5_0:
+; CHECK-NEXT:    .long 128 @ 0x80
+entry:
+  %y = alloca [4 x i32], align 16
+  %pp = getelementptr inbounds [31 x i32], [31 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([4 x i32]* %y, i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) #3
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 30
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
+
+; This function should have no emergency spill slot, so its stack should be
+; smaller than @aligned_emergency_spill.
+define void @aligned_no_emergency_spill(i32 %n, i32 %n2, i32 %n3, i32 %n4, [30 x i32]* byval %p) {
+; CHECK-LABEL: aligned_no_emergency_spill:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #28
+; CHECK-NEXT:    sub sp, #28
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    lsrs r4, r4, #4
+; CHECK-NEXT:    lsls r4, r4, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    adds r1, r7, #7
+; CHECK-NEXT:    adds r1, #1
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r5, [r7, #124]
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %y = alloca [4 x i32], align 16
+  %pp = getelementptr inbounds [30 x i32], [30 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([4 x i32]* %y, i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) #3
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 29
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
+
+; This function shouldn't fail to compile.  (It's UB, so it doesn't really
+; matter what it compiles to, exactly, but we need to check at some point
+; so we don't generate code that requires an emergency spill slot we never
+; allocated.  If the store gets eliminated, this testcase probably needs
+; to be rewritten.)
+define void @aligned_out_of_range_access(i32 %n, i32 %n2, i32 %n3, i32 %n4, [30 x i32]* byval %p) {
+; CHECK-LABEL: aligned_out_of_range_access:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .pad #44
+; CHECK-NEXT:    sub sp, #44
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    lsrs r4, r4, #4
+; CHECK-NEXT:    lsls r4, r4, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    adds r1, r7, #7
+; CHECK-NEXT:    adds r1, #1
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    str r5, [r0, #120]
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    @APP
+; CHECK-NEXT:    @NO_APP
+; CHECK-NEXT:    subs r4, r7, #7
+; CHECK-NEXT:    subs r4, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %y = alloca [4 x i32], align 16
+  %pp = getelementptr inbounds [30 x i32], [30 x i32]* %p, i32 0, i32 0
+  %asm1 = call { i32, i32, i32, i32, i32, i32, i32 } asm "", "={r0},={r1},={r2},={r3},={r4},={r5},={r6},0,1,2,3,4,5,6"([4 x i32]* %y, i32* %pp, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef) #3
+  %asmresult = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 0
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 1
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 2
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 3
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 4
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 5
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32 } %asm1, 6
+  %arrayidx = getelementptr inbounds i32, i32* %pp, i32 30
+  store i32 %asmresult6, i32* %arrayidx, align 4
+  call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6}"(i32 %asmresult, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7)
+  ret void
+}
diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll
index b472d235d39..ff1d57db944 100644
--- a/llvm/test/CodeGen/Thumb/frame-access.ll
+++ b/llvm/test/CodeGen/Thumb/frame-access.ll
@@ -124,7 +124,7 @@ entry:
 ; CHECK-NEXT:  lsls r4, r4, #4
 ; CHECK-NEXT:  mov  sp, r4
 ; Incoming register varargs stored via FP
-; CHECK: mov	r0, r7
+; CHECK:      mov r0, r7
 ; CHECK-NEXT: adds r0, #8
 ; CHECK-NEXT: stm r0!, {r1, r2, r3}
 ; VLAs present, access via FP
@@ -199,11 +199,13 @@ entry:
 ; CHECK:       push {r4, r5, r6, r7, lr}
 ; 20 bytes locals
 ; CHECK:       sub sp, #20
+; Setup base pointer
+; CHECK:       mov r6, sp
 ; Allocate outgoing arguments space
 ; CHECK:       sub sp, #508
 ; CHECK:       sub sp, #4
-; Load `e` via SP, 552 = 512 + 20 + 20
-; CHECK:       ldr r3, [sp, #552]
+; Load `e` via BP, 40 = 20 + 20
+; CHECK:       ldr r3, [r6, #40]
 ; CHECK:       bl  f
 ; Stack restored before next call
 ; CHECK-NEXT:  add sp, #508
@@ -235,11 +237,12 @@ entry:
 ; Three incoming register varargs
 ; CHECK:       sub sp, #12
 ; 16 bytes callee-saves
-; CHECK:       push {r4, r5, r7, lr}
+; CHECK:       push {r4, r5, r6, lr}
 ; 20 bytes locals
 ; CHECK:       sub sp, #20
-; Incoming varargs stored via SP, 36 = 20 + 16
-; CHECK:       add r0, sp, #36
+; Incoming varargs stored via BP, 36 = 20 + 16
+; CHECK:       mov r0, r6
+; CHECK-NEXT:  adds r0, #36
 ; CHECK-NEXT:  stm r0!, {r1, r2, r3}
 
 ;
@@ -394,17 +397,19 @@ entry:
 ; CHECK-LABEL: test_local_moving_sp
 ; Locals area
 ; CHECK:      sub sp, #36
+; Setup BP
+; CHECK:      mov r6, sp
 ; Outoging arguments
 ; CHECK:      sub sp, #508
 ; CHECK-NEXT: sub sp, #508
 ; CHECK-NEXT: sub sp, #8
-; Argument addresses computed relative to SP
-; CHECK:      add  r4, sp, #1020
-; CHECK-NEXT: adds r4, #24
-; CHECK:      add  r1, sp, #1020
-; CHECK-NEXT: adds r1, #20
-; CHECK:      add  r5, sp, #1020
-; CHECK-NEXT: adds r5, #16
+; Argument addresses computed relative to BP
+; CHECK:      adds r0, r6, #7
+; CHECK-NEXT: adds r0, #13
+; CHECK:      adds r1, r6, #7
+; CHECK-NEXT: adds r1, #9
+; CHECK:      adds r5, r6, #7
+; CHECK-NEXT: adds r5, #5
 ; CHECK:      bl   u
 ; Stack restored before next call
 ; CHECK:      add  sp, #508
diff --git a/llvm/test/CodeGen/Thumb/large-stack.ll b/llvm/test/CodeGen/Thumb/large-stack.ll
index f35bffba5ca..923d2c06d23 100644
--- a/llvm/test/CodeGen/Thumb/large-stack.ll
+++ b/llvm/test/CodeGen/Thumb/large-stack.ll
@@ -33,9 +33,8 @@ define void @test100_nofpelim() "no-frame-pointer-elim"="true" {
 ; CHECK: sub sp, #508
 ; CHECK: sub sp, #508
 ; CHECK: sub sp, #508
-; ALIGN4: subs r4, r7, #4
-; ALIGN8: subs r4, r7, #7
-; ALIGN8: subs r4, #1
+; CHECK: subs r4, r7, #7
+; CHECK: subs r4, #1
 ; CHECK: mov sp, r4
     %tmp = alloca [ 1524 x i8 ] , align 4
     ret void
@@ -57,9 +56,8 @@ define void @test2_nofpelim() "no-frame-pointer-elim"="true" {
 ; CHECK-LABEL: test2_nofpelim:
 ; CHECK: ldr [[TEMP:r[0-7]]],
 ; CHECK: add sp, [[TEMP]]
-; ALIGN4: subs r4, r7, #4
-; ALIGN8: subs r4, r7, #7
-; ALIGN8: subs r4, #1
+; CHECK: subs r4, r7, #7
+; CHECK: subs r4, #1
 ; CHECK: mov sp, r4
     %tmp = alloca [ 1528 x i8 ] , align 4
     ret void