4 files changed, 272 insertions, 41 deletions
diff --git a/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/llvm/lib/Target/Mips/MipsFrameLowering.cpp
index ef05166503b..27a85970da6 100644
--- a/llvm/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -107,38 +107,31 @@ bool MipsFrameLowering::hasBP(const MachineFunction &MF) const {
   return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
 }
 
+// Estimate the size of the stack, including the incoming arguments. We need to
+// account for register spills, local objects, reserved call frame and incoming
+// arguments. This is required to determine the largest possible positive offset
+// from $sp so that it can be determined if an emergency spill slot for stack
+// addresses is required.
 uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
-  int64_t Offset = 0;
+  int64_t Size = 0;
 
-  // Iterate over fixed sized objects.
+  // Iterate over fixed sized objects which are incoming arguments.
   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
-    Offset = std::max(Offset, -MFI.getObjectOffset(I));
+    if (MFI.getObjectOffset(I) > 0)
+      Size += MFI.getObjectSize(I);
 
   // Conservatively assume all callee-saved registers will be saved.
   for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
-    unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
-    Offset = alignTo(Offset + Size, Size);
+    unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
+    Size = alignTo(Size + RegSize, RegSize);
   }
 
-  unsigned MaxAlign = MFI.getMaxAlignment();
-
-  // Check that MaxAlign is not zero if there is a stack object that is not a
-  // callee-saved spill.
-  assert(!MFI.getObjectIndexEnd() || MaxAlign);
-
-  // Iterate over other objects.
-  for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
-    Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign);
-
-  // Call frame.
-  if (MFI.adjustsStack() && hasReservedCallFrame(MF))
-    Offset = alignTo(Offset + MFI.getMaxCallFrameSize(),
-                     std::max(MaxAlign, getStackAlignment()));
-
-  return alignTo(Offset, getStackAlignment());
+  // Get the size of the rest of the frame objects and any possible reserved
+  // call frame, accounting for alignment.
+  return Size + MFI.estimateStackSize(MF);
 }
 
 // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 0b19b18449e..ca19089c912 100644
--- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -893,10 +893,12 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
   }
 
   // Set scavenging frame index if necessary.
-  uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
-    estimateStackSize(MF);
+  uint64_t MaxSPOffset = estimateStackSize(MF);
 
-  if (isInt<16>(MaxSPOffset))
+  // MSA has a minimum offset of 10 bits signed. If there is a variable
+  // sized object on the stack, the estimation cannot account for it.
+  if (isIntN(STI.hasMSA() ? 10 : 16, MaxSPOffset) &&
+      !MF.getFrameInfo().hasVarSizedObjects())
     return;
 
   const TargetRegisterClass &RC =
diff --git a/llvm/test/CodeGen/Mips/msa/emergency-spill.mir b/llvm/test/CodeGen/Mips/msa/emergency-spill.mir
new file mode 100644
index 00000000000..502b60f673e
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/msa/emergency-spill.mir
@@ -0,0 +1,221 @@
+# RUN: llc %s -start-after=shrink-wrap -march=mips64 -mcpu=mips64r6 -mattr=+fp64,+msa -o /dev/null
+
+# Test that estimated size of the stack leads to the creation of an emergency
+# spill when MSA is in use. Previously, this test case would fail during
+# register scavenging due to the lack of a spill slot.
+--- |
+  define inreg { i64, i64 } @test(i64 inreg %a.coerce0, i64 inreg %a.coerce1, i64 inreg %b.coerce0, i64 inreg %b.coerce1, i32 signext %c) #0 {
+  entry:
+    %retval = alloca <16 x i8>, align 16
+    %a = alloca <16 x i8>, align 16
+    %b = alloca <16 x i8>, align 16
+    %a.addr = alloca <16 x i8>, align 16
+    %b.addr = alloca <16 x i8>, align 16
+    %c.addr = alloca i32, align 4
+    %g = alloca <16 x i8>*, align 8
+    %d = alloca i8*, align 8
+    %0 = bitcast <16 x i8>* %a to { i64, i64 }*
+    %1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 0
+    store i64 %a.coerce0, i64* %1, align 16
+    %2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 1
+    store i64 %a.coerce1, i64* %2, align 8
+    %a1 = load <16 x i8>, <16 x i8>* %a, align 16
+    %3 = bitcast <16 x i8>* %b to { i64, i64 }*
+    %4 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 0
+    store i64 %b.coerce0, i64* %4, align 16
+    %5 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 1
+    store i64 %b.coerce1, i64* %5, align 8
+    %b2 = load <16 x i8>, <16 x i8>* %b, align 16
+    store <16 x i8> %a1, <16 x i8>* %a.addr, align 16
+    store <16 x i8> %b2, <16 x i8>* %b.addr, align 16
+    store i32 %c, i32* %c.addr, align 4
+    %6 = alloca i8, i64 6400, align 16
+    %7 = bitcast i8* %6 to <16 x i8>*
+    store <16 x i8>* %7, <16 x i8>** %g, align 8
+    %8 = load <16 x i8>*, <16 x i8>** %g, align 8
+    call void @h(<16 x i8>* %b.addr, <16 x i8>* %8)
+    %9 = load <16 x i8>*, <16 x i8>** %g, align 8
+    %10 = bitcast <16 x i8>* %9 to i8*
+    store i8* %10, i8** %d, align 8
+    %11 = load <16 x i8>, <16 x i8>* %a.addr, align 16
+    %12 = load i8*, i8** %d, align 8
+    %arrayidx = getelementptr inbounds i8, i8* %12, i64 0
+    %13 = load i8, i8* %arrayidx, align 1
+    %conv = sext i8 %13 to i32
+    %14 = call <16 x i8> @llvm.mips.fill.b(i32 %conv)
+    %add = add <16 x i8> %11, %14
+    %15 = load i8*, i8** %d, align 8
+    %arrayidx3 = getelementptr inbounds i8, i8* %15, i64 1
+    %16 = load i8, i8* %arrayidx3, align 1
+    %conv4 = sext i8 %16 to i32
+    %17 = call <16 x i8> @llvm.mips.fill.b(i32 %conv4)
+    %add5 = add <16 x i8> %add, %17
+    %18 = load <16 x i8>, <16 x i8>* %b.addr, align 16
+    %add6 = add <16 x i8> %18, %add5
+    store <16 x i8> %add6, <16 x i8>* %b.addr, align 16
+    %19 = load <16 x i8>, <16 x i8>* %b.addr, align 16
+    store <16 x i8> %19, <16 x i8>* %retval, align 16
+    %20 = bitcast <16 x i8>* %retval to { i64, i64 }*
+    %21 = load { i64, i64 }, { i64, i64 }* %20, align 16
+    ret { i64, i64 } %21
+  }
+
+  declare void @h(<16 x i8>*, <16 x i8>*)
+
+  declare <16 x i8> @llvm.mips.fill.b(i32)
+
+  declare void @llvm.stackprotector(i8*, i8**)
+
+...
+---
+name:            test
+alignment:       3
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+liveins:
+  - { reg: '%a0_64', virtual-reg: '' }
+  - { reg: '%a1_64', virtual-reg: '' }
+  - { reg: '%a2_64', virtual-reg: '' }
+  - { reg: '%a3_64', virtual-reg: '' }
+  - { reg: '%t0_64', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    16
+  adjustsStack:    false
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+stack:
+  - { id: 0, name: retval, type: default, offset: 0, size: 16, alignment: 16,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 1, name: a, type: default, offset: 0, size: 16, alignment: 16,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 2, name: b, type: default, offset: 0, size: 16, alignment: 16,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 3, name: a.addr, type: default, offset: 0, size: 16, alignment: 16,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 4, name: b.addr, type: default, offset: 0, size: 16, alignment: 16,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 5, name: c.addr, type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 6, name: g, type: default, offset: 0, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 7, name: d, type: default, offset: 0, size: 8, alignment: 8,
+      stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 8, name: '', type: default, offset: 0, size: 6400,
+      alignment: 16, stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+constants:
+body:             |
+  bb.0.entry:
+    liveins: %a0_64, %a1_64, %a2_64, %a3_64, %t0_64
+
+    SD killed %a0_64, %stack.1.a, 0 :: (store 8 into %ir.1, align 16)
+    SD killed %a1_64, %stack.1.a, 8 :: (store 8 into %ir.2)
+    %w0 = LD_B %stack.1.a, 0 :: (dereferenceable load 16 from %ir.a)
+    SD killed %a2_64, %stack.2.b, 0 :: (store 8 into %ir.4, align 16)
+    SD killed %a3_64, %stack.2.b, 8 :: (store 8 into %ir.5)
+    %w1 = LD_B %stack.2.b, 0 :: (dereferenceable load 16 from %ir.b)
+    ST_B killed %w0, %stack.3.a.addr, 0 :: (store 16 into %ir.a.addr)
+    ST_B killed %w1, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr)
+    SW %t0, %stack.5.c.addr, 0, implicit killed %t0_64 :: (store 4 into %ir.c.addr)
+    %at_64 = LEA_ADDiu64 %stack.8, 0
+    SD killed %at_64, %stack.6.g, 0 :: (store 8 into %ir.g)
+    %a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp
+    %a0_64 = LEA_ADDiu64 %stack.4.b.addr, 0
+    JAL @h, csr_n64, implicit-def dead %ra, implicit %a0_64, implicit %a1_64, implicit-def %sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp
+    %at_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %v0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %v1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %a0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %a2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %a3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %s7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t8_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %t9_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %ra_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g)
+    %w0 = LD_B %stack.3.a.addr, 0 :: (dereferenceable load 16 from %ir.a.addr)
+    SD %at_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %v0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %v1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %a0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %a1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %a2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %a3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t4_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t5_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t6_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t7_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s0_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s1_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s2_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s3_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s4_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s5_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s6_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %s7_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t8_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %t9_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    SD %ra_64, %stack.7.d, 0 :: (store 8 into %ir.d)
+    %at_64 = LD %stack.7.d, 0 :: (dereferenceable load 8 from %ir.d)
+    %v0 = LB %at_64, 0 :: (load 1 from %ir.arrayidx)
+    %w1 = FILL_B killed %v0
+    %w0 = ADDV_B killed %w0, killed %w1
+    %at = LB killed %at_64, 1 :: (load 1 from %ir.arrayidx3)
+    %w1 = FILL_B killed %at
+    %w0 = ADDV_B killed %w0, killed %w1
+    %w1 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr)
+    %w0 = ADDV_B killed %w1, killed %w0
+    ST_B killed %w0, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr)
+    %w0 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr)
+    ST_B killed %w0, %stack.0.retval, 0 :: (store 16 into %ir.retval)
+    %v0_64 = LD %stack.0.retval, 0 :: (dereferenceable load 8 from %ir.20, align 16)
+    %v1_64 = LD %stack.0.retval, 8 :: (dereferenceable load 8 from %ir.20 + 8, align 16)
+    RetRA implicit %v0_64, implicit %v1_64
+
+...
diff --git a/llvm/test/CodeGen/Mips/msa/frameindex.ll b/llvm/test/CodeGen/Mips/msa/frameindex.ll
index f903381f9ef..9c2228d3bf6 100644
--- a/llvm/test/CodeGen/Mips/msa/frameindex.ll
+++ b/llvm/test/CodeGen/Mips/msa/frameindex.ll
@@ -18,7 +18,8 @@ define void @loadstore_v16i8_just_under_simm10() nounwind {
   ; MIPS32-AE: loadstore_v16i8_just_under_simm10:
 
   %1 = alloca <16 x i8>
-  %2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
+  %2 = alloca [492 x i8] ; Push the frame--acounting for the emergency spill
+                         ; slot--right up to 512 bytes
 
   %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
@@ -33,7 +34,8 @@ define void @loadstore_v16i8_just_over_simm10() nounwind {
   ; MIPS32-AE: loadstore_v16i8_just_over_simm10:
 
   %1 = alloca <16 x i8>
-  %2 = alloca [497 x i8] ; Push the frame just over 512 bytes
+  %2 = alloca [497 x i8] ; Push the frame--acounting for the emergency spill
+                         ; slot--right up to 512 bytes
 
   %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
@@ -50,7 +52,8 @@ define void @loadstore_v16i8_just_under_simm16() nounwind {
   ; MIPS32-AE: loadstore_v16i8_just_under_simm16:
 
   %1 = alloca <16 x i8>
-  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+  %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot--right up to 32768 bytes
 
   %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -69,7 +72,8 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
   ; MIPS32-AE: loadstore_v16i8_just_over_simm16:
 
   %1 = alloca <16 x i8>
-  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+  %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot--just over 32768 bytes
 
   %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -121,7 +125,8 @@ define void @loadstore_v8i16_just_under_simm10() nounwind {
   ; MIPS32-AE: loadstore_v8i16_just_under_simm10:
 
   %1 = alloca <8 x i16>
-  %2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes
+  %2 = alloca [1004 x i8] ; Push the frame--acounting for the emergency spill
+                          ; slot--right up to 1024 bytes
 
   %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp)
@@ -136,7 +141,8 @@ define void @loadstore_v8i16_just_over_simm10() nounwind {
   ; MIPS32-AE: loadstore_v8i16_just_over_simm10:
 
   %1 = alloca <8 x i16>
-  %2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
+  %2 = alloca [1009 x i8] ; Push the frame--acounting for the emergency spill
+                          ; slot--just over 1024 bytes
 
   %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
@@ -153,7 +159,8 @@ define void @loadstore_v8i16_just_under_simm16() nounwind {
   ; MIPS32-AE: loadstore_v8i16_just_under_simm16:
 
   %1 = alloca <8 x i16>
-  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+  %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot--right up to 32768 bytes
 
   %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -172,7 +179,8 @@ define void @loadstore_v8i16_just_over_simm16() nounwind {
   ; MIPS32-AE: loadstore_v8i16_just_over_simm16:
 
   %1 = alloca <8 x i16>
-  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+  %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot--just over 32768 bytes
 
   %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -224,7 +232,8 @@ define void @loadstore_v4i32_just_under_simm10() nounwind {
   ; MIPS32-AE: loadstore_v4i32_just_under_simm10:
 
   %1 = alloca <4 x i32>
-  %2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes
+  %2 = alloca [2028 x i8] ; Push the frame--acounting for the emergency spill
+                          ; slot--right up to 2048 bytes
 
   %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp)
@@ -239,7 +248,8 @@ define void @loadstore_v4i32_just_over_simm10() nounwind {
   ; MIPS32-AE: loadstore_v4i32_just_over_simm10:
 
   %1 = alloca <4 x i32>
-  %2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
+  %2 = alloca [2033 x i8] ; Push the frame--acounting for the emergency spill
+                          ; slot--just over 2048 bytes
 
   %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
@@ -256,7 +266,8 @@ define void @loadstore_v4i32_just_under_simm16() nounwind {
   ; MIPS32-AE: loadstore_v4i32_just_under_simm16:
 
   %1 = alloca <4 x i32>
-  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+  %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot-- right up to 32768 bytes
 
   %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -275,7 +286,8 @@ define void @loadstore_v4i32_just_over_simm16() nounwind {
   ; MIPS32-AE: loadstore_v4i32_just_over_simm16:
 
   %1 = alloca <4 x i32>
-  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+  %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot--just over 32768 bytes
 
   %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -327,8 +339,8 @@ define void @loadstore_v2i64_just_under_simm10() nounwind {
   ; MIPS32-AE: loadstore_v2i64_just_under_simm10:
 
   %1 = alloca <2 x i64>
-  %2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes
-
+  %2 = alloca [4076 x i8] ; Push the frame--acounting for the emergency spill
+                          ; slot--right up to 4096 bytes
   %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp)
   store volatile <2 x i64> %3, <2 x i64>* %1
@@ -342,7 +354,8 @@ define void @loadstore_v2i64_just_over_simm10() nounwind {
   ; MIPS32-AE: loadstore_v2i64_just_over_simm10:
 
   %1 = alloca <2 x i64>
-  %2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
+  %2 = alloca [4081 x i8] ; Push the frame--acounting for the emergency spill
+                          ; slot--just over 4096 bytes
 
   %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
@@ -359,7 +372,8 @@ define void @loadstore_v2i64_just_under_simm16() nounwind {
   ; MIPS32-AE: loadstore_v2i64_just_under_simm16:
 
   %1 = alloca <2 x i64>
-  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+  %2 = alloca [32752 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot--right up to 32768 bytes
 
   %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
@@ -378,7 +392,8 @@ define void @loadstore_v2i64_just_over_simm16() nounwind {
   ; MIPS32-AE: loadstore_v2i64_just_over_simm16:
 
   %1 = alloca <2 x i64>
-  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+  %2 = alloca [32753 x i8] ; Push the frame--acounting for the emergency spill
+                           ; slot--just over 32768 bytes
 
   %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768