diff options
-rw-r--r-- | llvm/include/llvm/CodeGen/CallingConvLower.h | 17 | ||||
-rw-r--r-- | llvm/lib/CodeGen/CallingConvLower.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/win32-spill-xmm.ll | 40 |
5 files changed, 60 insertions, 4 deletions
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 1fd4eeb46b3..9df41dd0257 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -201,6 +201,7 @@ private: LLVMContext &Context; unsigned StackOffset; + unsigned MaxStackArgAlign; SmallVector<uint32_t, 16> UsedRegs; SmallVector<CCValAssign, 4> PendingLocs; @@ -270,7 +271,18 @@ public: CallingConv::ID getCallingConv() const { return CallingConv; } bool isVarArg() const { return IsVarArg; } - unsigned getNextStackOffset() const { return StackOffset; } + /// getNextStackOffset - Return the next stack offset such that all stack + /// slots satisfy their alignment requirements. + unsigned getNextStackOffset() const { + return StackOffset; + } + + /// getAlignedCallFrameSize - Return the size of the call frame needed to + /// be able to store all arguments and such that the alignment requirement + /// of each of the arguments is satisfied. + unsigned getAlignedCallFrameSize() const { + return RoundUpToAlignment(StackOffset, MaxStackArgAlign); + } /// isAllocated - Return true if the specified register (or an alias) is /// allocated. @@ -400,9 +412,10 @@ public: /// and alignment. unsigned AllocateStack(unsigned Size, unsigned Align) { assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2. - StackOffset = ((StackOffset + Align - 1) & ~(Align - 1)); + StackOffset = RoundUpToAlignment(StackOffset, Align); unsigned Result = StackOffset; StackOffset += Size; + MaxStackArgAlign = std::max(Align, MaxStackArgAlign); MF.getFrameInfo()->ensureMaxAlignment(Align); return Result; } diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index fb29b1db7a4..23c0d542560 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, CallOrPrologue(Unknown) { // No stack is used. StackOffset = 0; + MaxStackArgAlign = 1; clearByValRegsInfo(); UsedRegs.resize((TRI.getNumRegs()+31)/32); @@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, MVT VT, CCAssignFn Fn) { unsigned SavedStackOffset = StackOffset; + unsigned SavedMaxStackArgAlign = MaxStackArgAlign; unsigned NumLocs = Locs.size(); // Set the 'inreg' flag if it is used for this calling convention. @@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, // as allocated so that future queries don't return the same registers, i.e. // when i64 and f64 are both passed in GPRs. StackOffset = SavedStackOffset; + MaxStackArgAlign = SavedMaxStackArgAlign; Locs.resize(NumLocs); } diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index a843a27d1b3..5eabd7a756f 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2906,7 +2906,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86); // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 517295aedbd..e4e32c688ea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3019,7 +3019,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCInfo.AnalyzeCallOperands(Outs, CC_X86); // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); if (IsSibcall) // This is a sibcall. The memory operands are available in caller's // own caller's stack. diff --git a/llvm/test/CodeGen/X86/win32-spill-xmm.ll b/llvm/test/CodeGen/X86/win32-spill-xmm.ll new file mode 100644 index 00000000000..0db97cfe20f --- /dev/null +++ b/llvm/test/CodeGen/X86/win32-spill-xmm.ll @@ -0,0 +1,40 @@ +; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s + +; Check proper alignment of spilled vector + +; CHECK-LABEL: spill_ok +; CHECK: subl $32, %esp +; CHECK: movaps %xmm3, (%esp) +; CHECK: movl $0, 16(%esp) +; CHECK: calll _bar +define void @spill_ok(i32, <16 x float> *) { +entry: + %2 = alloca i32, i32 %0 + %3 = load <16 x float>, <16 x float> * %1, align 64 + tail call void @bar(<16 x float> %3, i32 0) nounwind + ret void +} + +declare void @bar(<16 x float> %a, i32 %b) + +; Check that proper alignment of spilled vector does not affect vargs + +; CHECK-LABEL: vargs_not_affected +; CHECK: leal 28(%ebp), %eax +define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) { +entry: + %ap = alloca i8*, align 4 + %0 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %0) + %argp.cur = load i8*, i8** %ap, align 4 + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %ap, align 4 + %1 = bitcast i8* %argp.cur to i32* + %2 = load i32, i32* %1, align 4 + call void @llvm.va_end(i8* %0) + ret i32 %2 +} + +declare void @llvm.va_start(i8*) + +declare void @llvm.va_end(i8*) |