summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/CallingConvLower.h17
-rw-r--r--llvm/lib/CodeGen/CallingConvLower.cpp3
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/X86/win32-spill-xmm.ll40
5 files changed, 60 insertions, 4 deletions
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 1fd4eeb46b3..9df41dd0257 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -201,6 +201,7 @@ private:
LLVMContext &Context;
unsigned StackOffset;
+ unsigned MaxStackArgAlign;
SmallVector<uint32_t, 16> UsedRegs;
SmallVector<CCValAssign, 4> PendingLocs;
@@ -270,7 +271,18 @@ public:
CallingConv::ID getCallingConv() const { return CallingConv; }
bool isVarArg() const { return IsVarArg; }
- unsigned getNextStackOffset() const { return StackOffset; }
+ /// getNextStackOffset - Return the next stack offset such that all stack
+ /// slots satisfy their alignment requirements.
+ unsigned getNextStackOffset() const {
+ return StackOffset;
+ }
+
+ /// getAlignedCallFrameSize - Return the size of the call frame needed to
+ /// be able to store all arguments and such that the alignment requirement
+ /// of each of the arguments is satisfied.
+ unsigned getAlignedCallFrameSize() const {
+ return RoundUpToAlignment(StackOffset, MaxStackArgAlign);
+ }
/// isAllocated - Return true if the specified register (or an alias) is
/// allocated.
@@ -400,9 +412,10 @@ public:
/// and alignment.
unsigned AllocateStack(unsigned Size, unsigned Align) {
assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
- StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
+ StackOffset = RoundUpToAlignment(StackOffset, Align);
unsigned Result = StackOffset;
StackOffset += Size;
+ MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
MF.getFrameInfo()->ensureMaxAlignment(Align);
return Result;
}
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index fb29b1db7a4..23c0d542560 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
CallOrPrologue(Unknown) {
// No stack is used.
StackOffset = 0;
+ MaxStackArgAlign = 1;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
unsigned SavedStackOffset = StackOffset;
+ unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
unsigned NumLocs = Locs.size();
// Set the 'inreg' flag if it is used for this calling convention.
@@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// as allocated so that future queries don't return the same registers, i.e.
// when i64 and f64 are both passed in GPRs.
StackOffset = SavedStackOffset;
+ MaxStackArgAlign = SavedMaxStackArgAlign;
Locs.resize(NumLocs);
}
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index a843a27d1b3..5eabd7a756f 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -2906,7 +2906,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 517295aedbd..e4e32c688ea 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3019,7 +3019,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
diff --git a/llvm/test/CodeGen/X86/win32-spill-xmm.ll b/llvm/test/CodeGen/X86/win32-spill-xmm.ll
new file mode 100644
index 00000000000..0db97cfe20f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/win32-spill-xmm.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
+
+; Check proper alignment of spilled vector
+
+; CHECK-LABEL: spill_ok
+; CHECK: subl $32, %esp
+; CHECK: movaps %xmm3, (%esp)
+; CHECK: movl $0, 16(%esp)
+; CHECK: calll _bar
+define void @spill_ok(i32, <16 x float> *) {
+entry:
+ %2 = alloca i32, i32 %0
+ %3 = load <16 x float>, <16 x float> * %1, align 64
+ tail call void @bar(<16 x float> %3, i32 0) nounwind
+ ret void
+}
+
+declare void @bar(<16 x float> %a, i32 %b)
+
+; Check that proper alignment of spilled vector does not affect vargs
+
+; CHECK-LABEL: vargs_not_affected
+; CHECK: leal 28(%ebp), %eax
+define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
+entry:
+ %ap = alloca i8*, align 4
+ %0 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %0)
+ %argp.cur = load i8*, i8** %ap, align 4
+ %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4
+ store i8* %argp.next, i8** %ap, align 4
+ %1 = bitcast i8* %argp.cur to i32*
+ %2 = load i32, i32* %1, align 4
+ call void @llvm.va_end(i8* %0)
+ ret i32 %2
+}
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
OpenPOWER on IntegriCloud