diff options
author | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2015-02-01 16:15:07 +0000 |
---|---|---|
committer | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2015-02-01 16:15:07 +0000 |
commit | e86aa9a8a455f90bf3d6dc567b9e39b2e4510246 (patch) | |
tree | e727b02e6da5596b9d3cce6b6f77155337ae2aa3 /llvm/lib/Target/X86/X86FrameLowering.cpp | |
parent | e6698d5305a29a186e035ab737931a139d70bcd2 (diff) | |
download | bcm5719-llvm-e86aa9a8a455f90bf3d6dc567b9e39b2e4510246.tar.gz bcm5719-llvm-e86aa9a8a455f90bf3d6dc567b9e39b2e4510246.zip |
Revert r227728 due to bad line endings.
llvm-svn: 227746
Diffstat (limited to 'llvm/lib/Target/X86/X86FrameLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86FrameLowering.cpp | 4123 |
1 files changed, 2103 insertions, 2020 deletions
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 80b141654c0..930163c3688 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1,2020 +1,2103 @@ -//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the X86 implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86FrameLowering.h"
-#include "X86InstrBuilder.h"
-#include "X86InstrInfo.h"
-#include "X86MachineFunctionInfo.h"
-#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include <cstdlib>
-
-using namespace llvm;
-
-// FIXME: completely move here.
-extern cl::opt<bool> ForceStackAlign;
-
-bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- return !MF.getFrameInfo()->hasVarSizedObjects() &&
- !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
-}
-
-/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
-/// call frame pseudos can be simplified. Having a FP, as in the default
-/// implementation, is not sufficient here since we can't always use it.
-/// Use a more nuanced condition.
-bool
-X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
- const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
- (MF.getSubtarget().getRegisterInfo());
- return hasReservedCallFrame(MF) ||
- (hasFP(MF) && !TRI->needsStackRealignment(MF))
- || TRI->hasBasePointer(MF);
-}
-
-// needsFrameIndexResolution - Do we need to perform FI resolution for
-// this function. Normally, this is required only when the function
-// has any stack objects. However, FI resolution actually has another job,
-// not apparent from the title - it resolves callframesetup/destroy
-// that were not simplified earlier.
-// So, this is required for x86 functions that have push sequences even
-// when there are no stack objects.
-bool
-X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
- return MF.getFrameInfo()->hasStackObjects() ||
- MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
-}
-
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register. This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const MachineModuleInfo &MMI = MF.getMMI();
- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-
- return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- RegInfo->needsStackRealignment(MF) ||
- MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
- MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit() || MMI.callsEHReturn() ||
- MFI->hasStackMap() || MFI->hasPatchPoint());
-}
-
-static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
- if (IsLP64) {
- if (isInt<8>(Imm))
- return X86::SUB64ri8;
- return X86::SUB64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::SUB32ri8;
- return X86::SUB32ri;
- }
-}
-
-static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
- if (IsLP64) {
- if (isInt<8>(Imm))
- return X86::ADD64ri8;
- return X86::ADD64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::ADD32ri8;
- return X86::ADD32ri;
- }
-}
-
-static unsigned getSUBrrOpcode(unsigned isLP64) {
- return isLP64 ? X86::SUB64rr : X86::SUB32rr;
-}
-
-static unsigned getADDrrOpcode(unsigned isLP64) {
- return isLP64 ? X86::ADD64rr : X86::ADD32rr;
-}
-
-static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
- if (IsLP64) {
- if (isInt<8>(Imm))
- return X86::AND64ri8;
- return X86::AND64ri32;
- }
- if (isInt<8>(Imm))
- return X86::AND32ri8;
- return X86::AND32ri;
-}
-
-static unsigned getLEArOpcode(unsigned IsLP64) {
- return IsLP64 ? X86::LEA64r : X86::LEA32r;
-}
-
-/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
-/// when it reaches the "return" instruction. We can then pop a stack object
-/// to this register without worry about clobbering it.
-static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const TargetRegisterInfo &TRI,
- bool Is64Bit) {
- const MachineFunction *MF = MBB.getParent();
- const Function *F = MF->getFunction();
- if (!F || MF->getMMI().callsEHReturn())
- return 0;
-
- static const uint16_t CallerSavedRegs32Bit[] = {
- X86::EAX, X86::EDX, X86::ECX, 0
- };
-
- static const uint16_t CallerSavedRegs64Bit[] = {
- X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
- X86::R8, X86::R9, X86::R10, X86::R11, 0
- };
-
- unsigned Opc = MBBI->getOpcode();
- switch (Opc) {
- default: return 0;
- case X86::RETL:
- case X86::RETQ:
- case X86::RETIL:
- case X86::RETIQ:
- case X86::TCRETURNdi:
- case X86::TCRETURNri:
- case X86::TCRETURNmi:
- case X86::TCRETURNdi64:
- case X86::TCRETURNri64:
- case X86::TCRETURNmi64:
- case X86::EH_RETURN:
- case X86::EH_RETURN64: {
- SmallSet<uint16_t, 8> Uses;
- for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MBBI->getOperand(i);
- if (!MO.isReg() || MO.isDef())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
- Uses.insert(*AI);
- }
-
- const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
- for (; *CS; ++CS)
- if (!Uses.count(*CS))
- return *CS;
- }
- }
-
- return 0;
-}
-
-static bool isEAXLiveIn(MachineFunction &MF) {
- for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
- EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
- unsigned Reg = II->first;
-
- if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL)
- return true;
- }
-
- return false;
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, int64_t NumBytes,
- bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
- const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
- bool isSub = NumBytes < 0;
- uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc;
- if (UseLEA)
- Opc = getLEArOpcode(Is64BitStackPtr);
- else
- Opc = isSub
- ? getSUBriOpcode(Is64BitStackPtr, Offset)
- : getADDriOpcode(Is64BitStackPtr, Offset);
-
- uint64_t Chunk = (1LL << 31) - 1;
- DebugLoc DL = MBB.findDebugLoc(MBBI);
-
- while (Offset) {
- if (Offset > Chunk) {
- // Rather than emit a long series of instructions for large offsets,
- // load the offset into a register and do one sub/add
- unsigned Reg = 0;
-
- if (isSub && !isEAXLiveIn(*MBB.getParent()))
- Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
- else
- Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
-
- if (Reg) {
- Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
- BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
- .addImm(Offset);
- Opc = isSub
- ? getSUBrrOpcode(Is64BitTarget)
- : getADDrrOpcode(Is64BitTarget);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addReg(Reg);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- Offset = 0;
- continue;
- }
- }
-
- uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
- if (ThisVal == (Is64BitTarget ? 8 : 4)) {
- // Use push / pop instead.
- unsigned Reg = isSub
- ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
- : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
- if (Reg) {
- Opc = isSub
- ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
- : (Is64BitTarget ? X86::POP64r : X86::POP32r);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
- .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
- if (isSub)
- MI->setFlag(MachineInstr::FrameSetup);
- Offset -= ThisVal;
- continue;
- }
- }
-
- MachineInstr *MI = nullptr;
-
- if (UseLEA) {
- MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
- StackPtr, false, isSub ? -ThisVal : ThisVal);
- } else {
- MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- }
-
- if (isSub)
- MI->setFlag(MachineInstr::FrameSetup);
-
- Offset -= ThisVal;
- }
-}
-
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
-static
-void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = nullptr) {
- if (MBBI == MBB.begin()) return;
-
- MachineBasicBlock::iterator PI = std::prev(MBBI);
- unsigned Opc = PI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
- Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += PI->getOperand(2).getImm();
- MBB.erase(PI);
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- }
-}
-
-/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
-/// iterator.
-static
-void mergeSPUpdatesDown(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = nullptr) {
- // FIXME: THIS ISN'T RUN!!!
- return;
-
- if (MBBI == MBB.end()) return;
-
- MachineBasicBlock::iterator NI = std::next(MBBI);
- if (NI == MBB.end()) return;
-
- unsigned Opc = NI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- NI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= NI->getOperand(2).getImm();
- MBB.erase(NI);
- MBBI = NI;
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- NI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += NI->getOperand(2).getImm();
- MBB.erase(NI);
- MBBI = NI;
- }
-}
-
-/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
-/// the stack adjustment is returned as a positive value for ADD/LEA and a
-/// negative for SUB.
-static int mergeSPUpdates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
- bool doMergeWithPrevious) {
- if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
- (!doMergeWithPrevious && MBBI == MBB.end()))
- return 0;
-
- MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
- MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
- : std::next(MBBI);
- unsigned Opc = PI->getOpcode();
- int Offset = 0;
-
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
- Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
- PI->getOperand(0).getReg() == StackPtr){
- Offset += PI->getOperand(2).getImm();
- MBB.erase(PI);
- if (!doMergeWithPrevious) MBBI = NI;
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- Offset -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- if (!doMergeWithPrevious) MBBI = NI;
- }
-
- return Offset;
-}
-
-void
-X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- if (CSI.empty()) return;
-
- // Calculate offsets.
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
-
- unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
- Offset));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-}
-
-/// usesTheStack - This function checks if any of the users of EFLAGS
-/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
-/// to use the stack, and if we don't adjust the stack we clobber the first
-/// frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(const MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- for (MachineRegisterInfo::reg_instr_iterator
- ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
- ri != re; ++ri)
- if (ri->isCopy())
- return true;
-
- return false;
-}
-
-void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL) {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
- bool Is64Bit = STI.is64Bit();
- bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
-
- unsigned CallOp;
- if (Is64Bit)
- CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
- else
- CallOp = X86::CALLpcrel32;
-
- const char *Symbol;
- if (Is64Bit) {
- if (STI.isTargetCygMing()) {
- Symbol = "___chkstk_ms";
- } else {
- Symbol = "__chkstk";
- }
- } else if (STI.isTargetCygMing())
- Symbol = "_alloca";
- else
- Symbol = "_chkstk";
-
- MachineInstrBuilder CI;
-
- // All current stack probes take AX and SP as input, clobber flags, and
- // preserve all registers. x86_64 probes leave RSP unmodified.
- if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
- // For the large code model, we have to call through a register. Use R11,
- // as it is scratch in all supported calling conventions.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
- .addExternalSymbol(Symbol);
- CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
- } else {
- CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
- }
-
- unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
- unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
- CI.addReg(AX, RegState::Implicit)
- .addReg(SP, RegState::Implicit)
- .addReg(AX, RegState::Define | RegState::Implicit)
- .addReg(SP, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-
- if (Is64Bit) {
- // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
- // themselves. It also does not clobber %rax so we can reuse it when
- // adjusting %rsp.
- BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
- .addReg(X86::RSP)
- .addReg(X86::RAX);
- }
-}
-
-/// emitPrologue - Push callee-saved registers onto the stack, which
-/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
-/// space for local variables. Also emit labels used by the exception handler to
-/// generate the exception handling frames.
-
-/*
- Here's a gist of what gets emitted:
-
- ; Establish frame pointer, if needed
- [if needs FP]
- push %rbp
- .cfi_def_cfa_offset 16
- .cfi_offset %rbp, -16
- .seh_pushreg %rpb
- mov %rsp, %rbp
- .cfi_def_cfa_register %rbp
-
- ; Spill general-purpose registers
- [for all callee-saved GPRs]
- pushq %<reg>
- [if not needs FP]
- .cfi_def_cfa_offset (offset from RETADDR)
- .seh_pushreg %<reg>
-
- ; If the required stack alignment > default stack alignment
- ; rsp needs to be re-aligned. This creates a "re-alignment gap"
- ; of unknown size in the stack frame.
- [if stack needs re-alignment]
- and $MASK, %rsp
-
- ; Allocate space for locals
- [if target is Windows and allocated space > 4096 bytes]
- ; Windows needs special care for allocations larger
- ; than one page.
- mov $NNN, %rax
- call ___chkstk_ms/___chkstk
- sub %rax, %rsp
- [else]
- sub $NNN, %rsp
-
- [if needs FP]
- .seh_stackalloc (size of XMM spill slots)
- .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
- [else]
- .seh_stackalloc NNN
-
- ; Spill XMMs
- ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
- ; they may get spilled on any platform, if the current function
- ; calls @llvm.eh.unwind.init
- [if needs FP]
- [for all callee-saved XMM registers]
- movaps %<xmm reg>, -MMM(%rbp)
- [for all callee-saved XMM registers]
- .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
- ; i.e. the offset relative to (%rbp - SEHFrameOffset)
- [else]
- [for all callee-saved XMM registers]
- movaps %<xmm reg>, KKK(%rsp)
- [for all callee-saved XMM registers]
- .seh_savexmm %<xmm reg>, KKK
-
- .seh_endprologue
-
- [if needs base pointer]
- mov %rsp, %rbx
- [if needs to restore base pointer]
- mov %rsp, -MMM(%rbp)
-
- ; Emit CFI info
- [if needs FP]
- [for all callee-saved registers]
- .cfi_offset %<reg>, (offset from %rbp)
- [else]
- .cfi_def_cfa_offset (offset from RETADDR)
- [for all callee-saved registers]
- .cfi_offset %<reg>, (offset from %rsp)
-
- Notes:
- - .seh directives are emitted only for Windows 64 ABI
- - .cfi directives are emitted for all other ABIs
- - for 32-bit code, substitute %e?? registers for %r??
-*/
-
-void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function *Fn = MF.getFunction();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
- uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
- bool HasFP = hasFP(MF);
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
- bool Is64Bit = STI.is64Bit();
- // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
- const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
- bool IsWin64 = STI.isTargetWin64();
- // Not necessarily synonymous with IsWin64.
- bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
- bool NeedsDwarfCFI =
- !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
- bool UseLEA = STI.useLeaForSP();
- unsigned StackAlign = getStackAlignment();
- unsigned SlotSize = RegInfo->getSlotSize();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
- const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
- getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
- unsigned StackPtr = RegInfo->getStackRegister();
- unsigned BasePtr = RegInfo->getBaseRegister();
- DebugLoc DL;
-
- // If we're forcing a stack realignment we can't rely on just the frame
- // info, we need to know the ABI stack alignment as well in case we
- // have a call out. Otherwise just make sure we have some alignment - we'll
- // go with the minimum SlotSize.
- if (ForceStackAlign) {
- if (MFI->hasCalls())
- MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
- else if (MaxAlign < SlotSize)
- MaxAlign = SlotSize;
- }
-
- // Add RETADDR move area to callee saved frame size.
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
- X86FI->setCalleeSavedFrameSize(
- X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
-
- bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO());
-
- // The default stack probe size is 4096 if the function has no stackprobesize
- // attribute.
- unsigned StackProbeSize = 4096;
- if (Fn->hasFnAttribute("stack-probe-size"))
- Fn->getFnAttribute("stack-probe-size")
- .getValueAsString()
- .getAsInteger(0, StackProbeSize);
-
- // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
- // function, and use up to 128 bytes of stack space, don't have a frame
- // pointer, calls, or dynamic alloca then we do not need to adjust the
- // stack pointer (we fit in the Red Zone). We also check that we don't
- // push and pop from the stack.
- if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoRedZone) &&
- !RegInfo->needsStackRealignment(MF) &&
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- !IsWin64 && // Win64 has no Red Zone
- !usesTheStack(MF) && // Don't push and pop.
- !MF.shouldSplitStack()) { // Regular stack
- uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
- if (HasFP) MinSize += SlotSize;
- StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
- MFI->setStackSize(StackSize);
- }
-
- // Insert stack pointer adjustment for later moving of return addr. Only
- // applies to tail call optimized functions where the callee argument stack
- // size is bigger than the callers.
- if (TailCallReturnAddrDelta < 0) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(-TailCallReturnAddrDelta)
- .setMIFlag(MachineInstr::FrameSetup);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- }
-
- // Mapping for machine moves:
- //
- // DST: VirtualFP AND
- // SRC: VirtualFP => DW_CFA_def_cfa_offset
- // ELSE => DW_CFA_def_cfa
- //
- // SRC: VirtualFP AND
- // DST: Register => DW_CFA_def_cfa_register
- //
- // ELSE
- // OFFSET < 0 => DW_CFA_offset_extended_sf
- // REG < 64 => DW_CFA_offset + Reg
- // ELSE => DW_CFA_offset_extended
-
- uint64_t NumBytes = 0;
- int stackGrowth = -SlotSize;
-
- if (HasFP) {
- // Calculate required stack adjustment.
- uint64_t FrameSize = StackSize - SlotSize;
- // If required, include space for extra hidden slot for stashing base pointer.
- if (X86FI->getRestoreBasePointer())
- FrameSize += SlotSize;
- if (RegInfo->needsStackRealignment(MF)) {
- // Callee-saved registers are pushed on stack before the stack
- // is realigned.
- FrameSize -= X86FI->getCalleeSavedFrameSize();
- NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
- } else {
- NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
- }
-
- // Get the offset of the stack slot for the EBP register, which is
- // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- // Update the frame offset adjustment.
- MFI->setOffsetAdjustment(-NumBytes);
-
- // Save EBP/RBP into the appropriate stack slot.
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
- .addReg(MachineFramePtr, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
-
- if (NeedsDwarfCFI) {
- // Mark the place where EBP/RBP was saved.
- // Define the current CFA rule to use the provided offset.
- assert(StackSize);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Change the rule for the FramePtr to be an "offset" rule.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr,
- DwarfFramePtr, 2 * stackGrowth));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-
- if (NeedsWinEH) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
- .addImm(FramePtr)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // Update EBP with the new base value.
- BuildMI(MBB, MBBI, DL,
- TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
- .addReg(StackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
-
- if (NeedsDwarfCFI) {
- // Mark effective beginning of when frame pointer becomes valid.
- // Define the current CFA to use the EBP/RBP register.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-
- // Mark the FramePtr as live-in in every block.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- I->addLiveIn(MachineFramePtr);
- } else {
- NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
- }
-
- // Skip the callee-saved push instructions.
- bool PushedRegs = false;
- int StackOffset = 2 * stackGrowth;
-
- while (MBBI != MBB.end() &&
- (MBBI->getOpcode() == X86::PUSH32r ||
- MBBI->getOpcode() == X86::PUSH64r)) {
- PushedRegs = true;
- unsigned Reg = MBBI->getOperand(0).getReg();
- ++MBBI;
-
- if (!HasFP && NeedsDwarfCFI) {
- // Mark callee-saved push instruction.
- // Define the current CFA rule to use the provided offset.
- assert(StackSize);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- StackOffset += stackGrowth;
- }
-
- if (NeedsWinEH) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
- MachineInstr::FrameSetup);
- }
- }
-
- // Realign stack after we pushed callee-saved registers (so that we'll be
- // able to calculate their offsets from the frame pointer).
- if (RegInfo->needsStackRealignment(MF)) {
- assert(HasFP && "There should be a frame pointer if stack is realigned.");
- uint64_t Val = -MaxAlign;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
- .addReg(StackPtr)
- .addImm(Val)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
-
- // If there is an SUB32ri of ESP immediately before this instruction, merge
- // the two. This can be the case when tail call elimination is enabled and
- // the callee has more arguments then the caller.
- NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
-
- // If there is an ADD32ri or SUB32ri of ESP immediately after this
- // instruction, merge the two instructions.
- mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
- // Adjust stack pointer: ESP -= numbytes.
-
- // Windows and cygwin/mingw require a prologue helper routine when allocating
- // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
- // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
- // stack and adjust the stack pointer in one go. The 64-bit version of
- // __chkstk is only responsible for probing the stack. The 64-bit prologue is
- // responsible for adjusting the stack pointer. Touching the stack at 4K
- // increments is necessary to ensure that the guard pages used by the OS
- // virtual memory manager are allocated in correct sequence.
- if (NumBytes >= StackProbeSize && UseStackProbe) {
- // Check whether EAX is livein for this function.
- bool isEAXAlive = isEAXLiveIn(MF);
-
- if (isEAXAlive) {
- // Sanity check that EAX is not livein for this function.
- // It should not be, so throw an assert.
- assert(!Is64Bit && "EAX is livein in x64 case!");
-
- // Save EAX
- BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- if (Is64Bit) {
- // Handle the 64-bit Windows ABI case where we need to call __chkstk.
- // Function prologue is responsible for adjusting the stack pointer.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
- .addImm(NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
- } else {
- // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
- // We'll also use 4 already allocated bytes for EAX.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // Save a pointer to the MI where we set AX.
- MachineBasicBlock::iterator SetRAX = MBBI;
- --SetRAX;
-
- // Call __chkstk, __chkstk_ms, or __alloca.
- emitStackProbeCall(MF, MBB, MBBI, DL);
-
- // Apply the frame setup flag to all inserted instrs.
- for (; SetRAX != MBBI; ++SetRAX)
- SetRAX->setFlag(MachineInstr::FrameSetup);
-
- if (isEAXAlive) {
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
- MI->setFlag(MachineInstr::FrameSetup);
- MBB.insert(MBBI, MI);
- }
- } else if (NumBytes) {
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
- UseLEA, TII, *RegInfo);
- }
-
- int SEHFrameOffset = 0;
- if (NeedsWinEH) {
- if (HasFP) {
- // We need to set frame base offset low enough such that all saved
- // register offsets would be positive relative to it, but we can't
- // just use NumBytes, because .seh_setframe offset must be <=240.
- // So we pretend to have only allocated enough space to spill the
- // non-volatile registers.
- // We don't care about the rest of stack allocation, because unwinder
- // will restore SP to (BP - SEHFrameOffset)
- for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
- int offset = MFI->getObjectOffset(Info.getFrameIdx());
- SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
- }
- SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
-
- // This only needs to account for XMM spill slots, GPR slots
- // are covered by the .seh_pushreg's emitted above.
- unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
- if (Size) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
- .addImm(Size)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
- .addImm(FramePtr)
- .addImm(SEHFrameOffset)
- .setMIFlag(MachineInstr::FrameSetup);
- } else {
- // SP will be the base register for restoring XMMs
- if (NumBytes) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
- .addImm(NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
- }
- }
- }
-
- // Skip the rest of register spilling code
- while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
- ++MBBI;
-
- // Emit SEH info for non-GPRs
- if (NeedsWinEH) {
- for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
- unsigned Reg = Info.getReg();
- if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
- continue;
- assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
-
- int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
- Offset += SEHFrameOffset;
-
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
- .addImm(Reg)
- .addImm(Offset)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // If we need a base pointer, set it up here. It's whatever the value
- // of the stack pointer is at this point. Any variable size objects
- // will be allocated after this, so we can still use the base pointer
- // to reference locals.
- if (RegInfo->hasBasePointer(MF)) {
- // Update the base pointer with the current stack pointer.
- unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
- BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
- .addReg(StackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
- if (X86FI->getRestoreBasePointer()) {
- // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain.
- unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
- addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
- FramePtr, true, X86FI->getRestoreBasePointerOffset())
- .addReg(StackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
- }
- }
-
- if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
- // Mark end of stack pointer adjustment.
- if (!HasFP && NumBytes) {
- // Define the current CFA rule to use the provided offset.
- assert(StackSize);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr,
- -StackSize + stackGrowth));
-
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-
- // Emit DWARF info specifying the offsets of the callee-saved registers.
- if (PushedRegs)
- emitCalleeSavedFrameMoves(MBB, MBBI, DL);
- }
-}
-
-void X86FrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI != MBB.end() && "Returning block has no instructions");
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc DL = MBBI->getDebugLoc();
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
- bool Is64Bit = STI.is64Bit();
- // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
- const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
- const bool Is64BitILP32 = STI.isTarget64BitILP32();
- bool UseLEA = STI.useLeaForSP();
- unsigned StackAlign = getStackAlignment();
- unsigned SlotSize = RegInfo->getSlotSize();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
- unsigned MachineFramePtr = Is64BitILP32 ?
- getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
- unsigned StackPtr = RegInfo->getStackRegister();
-
- bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
- bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
-
- switch (RetOpcode) {
- default:
- llvm_unreachable("Can only insert epilog into returning blocks");
- case X86::RETQ:
- case X86::RETL:
- case X86::RETIL:
- case X86::RETIQ:
- case X86::TCRETURNdi:
- case X86::TCRETURNri:
- case X86::TCRETURNmi:
- case X86::TCRETURNdi64:
- case X86::TCRETURNri64:
- case X86::TCRETURNmi64:
- case X86::EH_RETURN:
- case X86::EH_RETURN64:
- break; // These are ok
- }
-
- // Get the number of bytes to allocate from the FrameInfo.
- uint64_t StackSize = MFI->getStackSize();
- uint64_t MaxAlign = MFI->getMaxAlignment();
- unsigned CSSize = X86FI->getCalleeSavedFrameSize();
- uint64_t NumBytes = 0;
-
- // If we're forcing a stack realignment we can't rely on just the frame
- // info, we need to know the ABI stack alignment as well in case we
- // have a call out. Otherwise just make sure we have some alignment - we'll
- // go with the minimum.
- if (ForceStackAlign) {
- if (MFI->hasCalls())
- MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
- else
- MaxAlign = MaxAlign ? MaxAlign : 4;
- }
-
- if (hasFP(MF)) {
- // Calculate required stack adjustment.
- uint64_t FrameSize = StackSize - SlotSize;
- if (RegInfo->needsStackRealignment(MF)) {
- // Callee-saved registers were pushed on stack before the stack
- // was realigned.
- FrameSize -= CSSize;
- NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
- } else {
- NumBytes = FrameSize - CSSize;
- }
-
- // Pop EBP.
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
- } else {
- NumBytes = StackSize - CSSize;
- }
-
- // Skip the callee-saved pop instructions.
- while (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PI = std::prev(MBBI);
- unsigned Opc = PI->getOpcode();
-
- if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
- !PI->isTerminator())
- break;
-
- --MBBI;
- }
- MachineBasicBlock::iterator FirstCSPop = MBBI;
-
- DL = MBBI->getDebugLoc();
-
- // If there is an ADD32ri or SUB32ri of ESP immediately before this
- // instruction, merge the two instructions.
- if (NumBytes || MFI->hasVarSizedObjects())
- mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
-
- // If dynamic alloca is used, then reset esp to point to the last callee-saved
- // slot before popping them off! Same applies for the case, when stack was
- // realigned.
- if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
- if (RegInfo->needsStackRealignment(MF))
- MBBI = FirstCSPop;
- if (CSSize != 0) {
- unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
- addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
- --MBBI;
- } else {
- unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
- BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(FramePtr);
- --MBBI;
- }
- } else if (NumBytes) {
- // Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
- TII, *RegInfo);
- --MBBI;
- }
-
- // Windows unwinder will not invoke function's exception handler if IP is
- // either in prologue or in epilogue. This behavior causes a problem when a
- // call immediately precedes an epilogue, because the return address points
- // into the epilogue. To cope with that, we insert an epilogue marker here,
- // then replace it with a 'nop' if it ends up immediately after a CALL in the
- // final emitted code.
- if (NeedsWinEH)
- BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
-
- // We're returning from function via eh_return.
- if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &DestAddr = MBBI->getOperand(0);
- assert(DestAddr.isReg() && "Offset should be in register!");
- BuildMI(MBB, MBBI, DL,
- TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(DestAddr.getReg());
- } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
- RetOpcode == X86::TCRETURNmi ||
- RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
- RetOpcode == X86::TCRETURNmi64) {
- bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
- assert(StackAdjust.isImm() && "Expecting immediate value.");
-
- // Adjust stack pointer.
- int StackAdj = StackAdjust.getImm();
- int MaxTCDelta = X86FI->getTCReturnAddrDelta();
- int Offset = 0;
- assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
-
- // Incoporate the retaddr area.
- Offset = StackAdj-MaxTCDelta;
- assert(Offset >= 0 && "Offset should never be negative");
-
- if (Offset) {
- // Check for possible merge with preceding ADD instruction.
- Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
- UseLEA, TII, *RegInfo);
- }
-
- // Jump to label or value in register.
- bool IsWin64 = STI.isTargetWin64();
- if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
- unsigned Op = (RetOpcode == X86::TCRETURNdi)
- ? X86::TAILJMPd
- : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64);
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op));
- if (JumpTarget.isGlobal())
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- else {
- assert(JumpTarget.isSymbol());
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
- } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
- unsigned Op = (RetOpcode == X86::TCRETURNmi)
- ? X86::TAILJMPm
- : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op));
- for (unsigned i = 0; i != 5; ++i)
- MIB.addOperand(MBBI->getOperand(i));
- } else if (RetOpcode == X86::TCRETURNri64) {
- BuildMI(MBB, MBBI, DL,
- TII.get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
- .addReg(JumpTarget.getReg(), RegState::Kill);
- } else {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = std::prev(MBBI);
- NewMI->copyImplicitOps(MF, MBBI);
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL ||
- RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) &&
- (X86FI->getTCReturnAddrDelta() < 0)) {
- // Add the return addr area delta back since we are not tail calling.
- int delta = -1*X86FI->getTCReturnAddrDelta();
- MBBI = MBB.getLastNonDebugInstr();
-
- // Check for possible merge with preceding ADD instruction.
- delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
- *RegInfo);
- }
-}
-
-int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
- uint64_t StackSize = MFI->getStackSize();
-
- if (RegInfo->hasBasePointer(MF)) {
- assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
- if (FI < 0) {
- // Skip the saved EBP.
- return Offset + RegInfo->getSlotSize();
- } else {
- assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
- return Offset + StackSize;
- }
- } else if (RegInfo->needsStackRealignment(MF)) {
- if (FI < 0) {
- // Skip the saved EBP.
- return Offset + RegInfo->getSlotSize();
- } else {
- assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
- return Offset + StackSize;
- }
- // FIXME: Support tail calls
- } else {
- if (!hasFP(MF))
- return Offset + StackSize;
-
- // Skip the saved EBP.
- Offset += RegInfo->getSlotSize();
-
- // Skip the RETADDR move area
- const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
- Offset -= TailCallReturnAddrDelta;
- }
-
- return Offset;
-}
-
-int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- // We can't calculate offset from frame pointer if the stack is realigned,
- // so enforce usage of stack/base pointer. The base pointer is used when we
- // have dynamic allocas in addition to dynamic realignment.
- if (RegInfo->hasBasePointer(MF))
- FrameReg = RegInfo->getBaseRegister();
- else if (RegInfo->needsStackRealignment(MF))
- FrameReg = RegInfo->getStackRegister();
- else
- FrameReg = RegInfo->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
-}
-
-// Simplified from getFrameIndexOffset keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- // Does not include any dynamic realign.
- const uint64_t StackSize = MFI->getStackSize();
- {
-#ifndef NDEBUG
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
- // Note: LLVM arranges the stack as:
- // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
- // > "Stack Slots" (<--SP)
- // We can always address StackSlots from RSP. We can usually (unless
- // needsStackRealignment) address CSRs from RSP, but sometimes need to
- // address them from RBP. FixedObjects can be placed anywhere in the stack
- // frame depending on their specific requirements (i.e. we can actually
- // refer to arguments to the function which are stored in the *callers*
- // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
- // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
-
- assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
-
- // We don't handle tail calls, and shouldn't be seeing them
- // either.
- int TailCallReturnAddrDelta =
- MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
- assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
-#endif
- }
-
- // This is how the math works out:
- //
- // %rsp grows (i.e. gets lower) left to right. Each box below is
- // one word (eight bytes). Obj0 is the stack slot we're trying to
- // get to.
- //
- // ----------------------------------
- // | BP | Obj0 | Obj1 | ... | ObjN |
- // ----------------------------------
- // ^ ^ ^ ^
- // A B C E
- //
- // A is the incoming stack pointer.
- // (B - A) is the local area offset (-8 for x86-64) [1]
- // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
- //
- // |(E - B)| is the StackSize (absolute value, positive). For a
- // stack that grown down, this works out to be (B - E). [3]
- //
- // E is also the value of %rsp after stack has been set up, and we
- // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
- // (C - E) == (C - A) - (B - A) + (B - E)
- // { Using [1], [2] and [3] above }
- // == getObjectOffset - LocalAreaOffset + StackSize
- //
-
- // Get the Offset from the StackPointer
- int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
-
- return Offset + StackSize;
-}
-// Simplified from getFrameIndexReference keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
-
- assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
-
- FrameReg = RegInfo->getStackRegister();
- return getFrameIndexOffsetFromSP(MF, FI);
-}
-
-bool X86FrameLowering::assignCalleeSavedSpillSlots(
- MachineFunction &MF, const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- unsigned SlotSize = RegInfo->getSlotSize();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-
- unsigned CalleeSavedFrameSize = 0;
- int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
-
- if (hasFP(MF)) {
- // emitPrologue always spills frame register the first thing.
- SpillSlotOffset -= SlotSize;
- MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
-
- // Since emitPrologue and emitEpilogue will handle spilling and restoring of
- // the frame register, we can delete it from CSI list and not have to worry
- // about avoiding it later.
- unsigned FPReg = RegInfo->getFrameRegister(MF);
- for (unsigned i = 0; i < CSI.size(); ++i) {
- if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
- CSI.erase(CSI.begin() + i);
- break;
- }
- }
- }
-
- // Assign slots for GPRs. It increases frame size.
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i - 1].getReg();
-
- if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
- continue;
-
- SpillSlotOffset -= SlotSize;
- CalleeSavedFrameSize += SlotSize;
-
- int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
- CSI[i - 1].setFrameIdx(SlotIndex);
- }
-
- X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
-
- // Assign slots for XMMs.
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i - 1].getReg();
- if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
- continue;
-
- const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
- // ensure alignment
- SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
- // spill into slot
- SpillSlotOffset -= RC->getSize();
- int SlotIndex =
- MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
- CSI[i - 1].setFrameIdx(SlotIndex);
- MFI->ensureMaxAlignment(RC->getAlignment());
- }
-
- return true;
-}
-
-bool X86FrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
-
- // Push GPRs. It increases frame size.
- unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i - 1].getReg();
-
- if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
- continue;
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
-
- BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
- // It can be done by spilling XMMs to stack frame.
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- if (X86::GR64RegClass.contains(Reg) ||
- X86::GR32RegClass.contains(Reg))
- continue;
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
- TRI);
- --MI;
- MI->setFlag(MachineInstr::FrameSetup);
- ++MI;
- }
-
- return true;
-}
-
-bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
-
- // Reload XMMs from stack frame.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (X86::GR64RegClass.contains(Reg) ||
- X86::GR32RegClass.contains(Reg))
- continue;
-
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
- }
-
- // POP GPRs.
- unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (!X86::GR64RegClass.contains(Reg) &&
- !X86::GR32RegClass.contains(Reg))
- continue;
-
- BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
- }
- return true;
-}
-
-void
-X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- unsigned SlotSize = RegInfo->getSlotSize();
-
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
- if (TailCallReturnAddrDelta < 0) {
- // create RETURNADDR area
- // arg
- // arg
- // RETADDR
- // { ...
- // RETADDR area
- // ...
- // }
- // [EBP]
- MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- TailCallReturnAddrDelta - SlotSize, true);
- }
-
- // Spill the BasePtr if it's used.
- if (RegInfo->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
-}
-
-static bool
-HasNestArgument(const MachineFunction *MF) {
- const Function *F = MF->getFunction();
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; I++) {
- if (I->hasNestAttr())
- return true;
- }
- return false;
-}
-
-/// GetScratchRegister - Get a temp register for performing work in the
-/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
-/// and the properties of the function either one or two registers will be
-/// needed. Set primary to true for the first register, false for the second.
-static unsigned
-GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
- CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
-
- // Erlang stuff.
- if (CallingConvention == CallingConv::HiPE) {
- if (Is64Bit)
- return Primary ? X86::R14 : X86::R13;
- else
- return Primary ? X86::EBX : X86::EDI;
- }
-
- if (Is64Bit) {
- if (IsLP64)
- return Primary ? X86::R11 : X86::R12;
- else
- return Primary ? X86::R11D : X86::R12D;
- }
-
- bool IsNested = HasNestArgument(&MF);
-
- if (CallingConvention == CallingConv::X86_FastCall ||
- CallingConvention == CallingConv::Fast) {
- if (IsNested)
- report_fatal_error("Segmented stacks does not support fastcall with "
- "nested function.");
- return Primary ? X86::EAX : X86::ECX;
- }
- if (IsNested)
- return Primary ? X86::EDX : X86::EAX;
- return Primary ? X86::ECX : X86::EAX;
-}
-
-// The stack limit in the TCB is set to this many bytes above the actual stack
-// limit.
-static const uint64_t kSplitStackAvailable = 256;
-
-void
-X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
- MachineBasicBlock &prologueMBB = MF.front();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- uint64_t StackSize;
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
- bool Is64Bit = STI.is64Bit();
- const bool IsLP64 = STI.isTarget64BitLP64();
- unsigned TlsReg, TlsOffset;
- DebugLoc DL;
-
- unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
- assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
- "Scratch register is live-in");
-
- if (MF.getFunction()->isVarArg())
- report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
- !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
- !STI.isTargetDragonFly())
- report_fatal_error("Segmented stacks not supported on this platform.");
-
- // Eventually StackSize will be calculated by a link-time pass; which will
- // also decide whether checking code needs to be injected into this particular
- // prologue.
- StackSize = MFI->getStackSize();
-
- // Do not generate a prologue for functions with a stack of size zero
- if (StackSize == 0)
- return;
-
- MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
- MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- bool IsNested = false;
-
- // We need to know if the function has a nest argument only in 64 bit mode.
- if (Is64Bit)
- IsNested = HasNestArgument(&MF);
-
- // The MOV R10, RAX needs to be in a different block, since the RET we emit in
- // allocMBB needs to be last (terminating) instruction.
-
- for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
- e = prologueMBB.livein_end(); i != e; i++) {
- allocMBB->addLiveIn(*i);
- checkMBB->addLiveIn(*i);
- }
-
- if (IsNested)
- allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
-
- MF.push_front(allocMBB);
- MF.push_front(checkMBB);
-
- // When the frame size is less than 256 we just compare the stack
- // boundary directly to the value of the stack pointer, per gcc.
- bool CompareStackPointer = StackSize < kSplitStackAvailable;
-
- // Read the limit off the current stacklet off the stack_guard location.
- if (Is64Bit) {
- if (STI.isTargetLinux()) {
- TlsReg = X86::FS;
- TlsOffset = IsLP64 ? 0x70 : 0x40;
- } else if (STI.isTargetDarwin()) {
- TlsReg = X86::GS;
- TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
- } else if (STI.isTargetWin64()) {
- TlsReg = X86::GS;
- TlsOffset = 0x28; // pvArbitrary, reserved for application use
- } else if (STI.isTargetFreeBSD()) {
- TlsReg = X86::FS;
- TlsOffset = 0x18;
- } else if (STI.isTargetDragonFly()) {
- TlsReg = X86::FS;
- TlsOffset = 0x20; // use tls_tcb.tcb_segstack
- } else {
- report_fatal_error("Segmented stacks not supported on this platform.");
- }
-
- if (CompareStackPointer)
- ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
- else
- BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
- .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
-
- BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
- } else {
- if (STI.isTargetLinux()) {
- TlsReg = X86::GS;
- TlsOffset = 0x30;
- } else if (STI.isTargetDarwin()) {
- TlsReg = X86::GS;
- TlsOffset = 0x48 + 90*4;
- } else if (STI.isTargetWin32()) {
- TlsReg = X86::FS;
- TlsOffset = 0x14; // pvArbitrary, reserved for application use
- } else if (STI.isTargetDragonFly()) {
- TlsReg = X86::FS;
- TlsOffset = 0x10; // use tls_tcb.tcb_segstack
- } else if (STI.isTargetFreeBSD()) {
- report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
- } else {
- report_fatal_error("Segmented stacks not supported on this platform.");
- }
-
- if (CompareStackPointer)
- ScratchReg = X86::ESP;
- else
- BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
- .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
-
- if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
- STI.isTargetDragonFly()) {
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
- } else if (STI.isTargetDarwin()) {
-
- // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
- unsigned ScratchReg2;
- bool SaveScratch2;
- if (CompareStackPointer) {
- // The primary scratch register is available for holding the TLS offset.
- ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
- SaveScratch2 = false;
- } else {
- // Need to use a second register to hold the TLS offset
- ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
-
- // Unfortunately, with fastcc the second scratch register may hold an
- // argument.
- SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
- }
-
- // If Scratch2 is live-in then it needs to be saved.
- assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
- "Scratch register is live-in and not saved");
-
- if (SaveScratch2)
- BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
- .addReg(ScratchReg2, RegState::Kill);
-
- BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
- .addImm(TlsOffset);
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
- .addReg(ScratchReg)
- .addReg(ScratchReg2).addImm(1).addReg(0)
- .addImm(0)
- .addReg(TlsReg);
-
- if (SaveScratch2)
- BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
- }
- }
-
- // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
- // It jumps to normal execution of the function body.
- BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB);
-
- // On 32 bit we first push the arguments size and then the frame size. On 64
- // bit, we pass the stack frame size in r10 and the argument size in r11.
- if (Is64Bit) {
- // Functions with nested arguments use R10, so it needs to be saved across
- // the call to _morestack
-
- const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
- const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
- const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
- const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
- const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
-
- if (IsNested)
- BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
-
- BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
- .addImm(StackSize);
- BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
- .addImm(X86FI->getArgumentStackSize());
- MF.getRegInfo().setPhysRegUsed(Reg10);
- MF.getRegInfo().setPhysRegUsed(Reg11);
- } else {
- BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
- .addImm(X86FI->getArgumentStackSize());
- BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
- .addImm(StackSize);
- }
-
- // __morestack is in libgcc
- if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
- // Under the large code model, we cannot assume that __morestack lives
- // within 2^31 bytes of the call site, so we cannot use pc-relative
- // addressing. We cannot perform the call via a temporary register,
- // as the rax register may be used to store the static chain, and all
- // other suitable registers may be either callee-save or used for
- // parameter passing. We cannot use the stack at this point either
- // because __morestack manipulates the stack directly.
- //
- // To avoid these issues, perform an indirect call via a read-only memory
- // location containing the address.
- //
- // This solution is not perfect, as it assumes that the .rodata section
- // is laid out within 2^31 bytes of each function body, but this seems
- // to be sufficient for JIT.
- BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
- .addReg(X86::RIP)
- .addImm(0)
- .addReg(0)
- .addExternalSymbol("__morestack_addr")
- .addReg(0);
- MF.getMMI().setUsesMorestackAddr(true);
- } else {
- if (Is64Bit)
- BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
- .addExternalSymbol("__morestack");
- else
- BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("__morestack");
- }
-
- if (IsNested)
- BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
- else
- BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
-
- allocMBB->addSuccessor(&prologueMBB);
-
- checkMBB->addSuccessor(allocMBB);
- checkMBB->addSuccessor(&prologueMBB);
-
-#ifdef XDEBUG
- MF.verify();
-#endif
-}
-
-/// Erlang programs may need a special prologue to handle the stack size they
-/// might need at runtime. That is because Erlang/OTP does not implement a C
-/// stack but uses a custom implementation of hybrid stack/heap architecture.
-/// (for more information see Eric Stenman's Ph.D. thesis:
-/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
-///
-/// CheckStack:
-/// temp0 = sp - MaxStack
-/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
-/// OldStart:
-/// ...
-/// IncStack:
-/// call inc_stack # doubles the stack space
-/// temp0 = sp - MaxStack
-/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
-void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const unsigned SlotSize =
- static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo())
- ->getSlotSize();
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
- const bool Is64Bit = STI.is64Bit();
- const bool IsLP64 = STI.isTarget64BitLP64();
- DebugLoc DL;
- // HiPE-specific values
- const unsigned HipeLeafWords = 24;
- const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
- const unsigned Guaranteed = HipeLeafWords * SlotSize;
- unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
- MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
- unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
-
- assert(STI.isTargetLinux() &&
- "HiPE prologue is only supported on Linux operating systems.");
-
- // Compute the largest caller's frame that is needed to fit the callees'
- // frames. This 'MaxStack' is computed from:
- //
- // a) the fixed frame size, which is the space needed for all spilled temps,
- // b) outgoing on-stack parameter areas, and
- // c) the minimum stack space this function needs to make available for the
- // functions it calls (a tunable ABI property).
- if (MFI->hasCalls()) {
- unsigned MoreStackForCalls = 0;
-
- for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
- MBBI != MBBE; ++MBBI)
- for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
- MI != ME; ++MI) {
- if (!MI->isCall())
- continue;
-
- // Get callee operand.
- const MachineOperand &MO = MI->getOperand(0);
-
- // Only take account of global function calls (no closures etc.).
- if (!MO.isGlobal())
- continue;
-
- const Function *F = dyn_cast<Function>(MO.getGlobal());
- if (!F)
- continue;
-
- // Do not update 'MaxStack' for primitive and built-in functions
- // (encoded with names either starting with "erlang."/"bif_" or not
- // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
- // "_", such as the BIF "suspend_0") as they are executed on another
- // stack.
- if (F->getName().find("erlang.") != StringRef::npos ||
- F->getName().find("bif_") != StringRef::npos ||
- F->getName().find_first_of("._") == StringRef::npos)
- continue;
-
- unsigned CalleeStkArity =
- F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
- if (HipeLeafWords - 1 > CalleeStkArity)
- MoreStackForCalls = std::max(MoreStackForCalls,
- (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
- }
- MaxStack += MoreStackForCalls;
- }
-
- // If the stack frame needed is larger than the guaranteed then runtime checks
- // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
- if (MaxStack > Guaranteed) {
- MachineBasicBlock &prologueMBB = MF.front();
- MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
- MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
-
- for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
- E = prologueMBB.livein_end(); I != E; I++) {
- stackCheckMBB->addLiveIn(*I);
- incStackMBB->addLiveIn(*I);
- }
-
- MF.push_front(incStackMBB);
- MF.push_front(stackCheckMBB);
-
- unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
- unsigned LEAop, CMPop, CALLop;
- if (Is64Bit) {
- SPReg = X86::RSP;
- PReg = X86::RBP;
- LEAop = X86::LEA64r;
- CMPop = X86::CMP64rm;
- CALLop = X86::CALL64pcrel32;
- SPLimitOffset = 0x90;
- } else {
- SPReg = X86::ESP;
- PReg = X86::EBP;
- LEAop = X86::LEA32r;
- CMPop = X86::CMP32rm;
- CALLop = X86::CALLpcrel32;
- SPLimitOffset = 0x4c;
- }
-
- ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
- assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
- "HiPE prologue scratch register is live-in");
-
- // Create new MBB for StackCheck:
- addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
- SPReg, false, -MaxStack);
- // SPLimitOffset is in a fixed heap location (pointed by BP).
- addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
- .addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB);
-
- // Create new MBB for IncStack:
- BuildMI(incStackMBB, DL, TII.get(CALLop)).
- addExternalSymbol("inc_stack_0");
- addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
- SPReg, false, -MaxStack);
- addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
- .addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
-
- stackCheckMBB->addSuccessor(&prologueMBB, 99);
- stackCheckMBB->addSuccessor(incStackMBB, 1);
- incStackMBB->addSuccessor(&prologueMBB, 99);
- incStackMBB->addSuccessor(incStackMBB, 1);
- }
-#ifdef XDEBUG
- MF.verify();
-#endif
-}
-
-void X86FrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- unsigned StackPtr = RegInfo.getStackRegister();
- bool reserveCallFrame = hasReservedCallFrame(MF);
- int Opcode = I->getOpcode();
- bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
- const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
- bool IsLP64 = STI.isTarget64BitLP64();
- DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
- uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
- I = MBB.erase(I);
-
- if (!reserveCallFrame) {
- // If the stack pointer can be changed after prologue, turn the
- // adjcallstackup instruction into a 'sub ESP, <amt>' and the
- // adjcallstackdown instruction into 'add ESP, <amt>'
- if (Amount == 0)
- return;
-
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned StackAlign = MF.getTarget()
- .getSubtargetImpl()
- ->getFrameLowering()
- ->getStackAlignment();
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = nullptr;
-
- // Factor out the amount that gets handled inside the sequence
- // (Pushes of argument for frame setup, callee pops for frame destroy)
- Amount -= InternalAmt;
-
- if (Amount) {
- if (Opcode == TII.getCallFrameSetupOpcode()) {
- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
- .addReg(StackPtr).addImm(Amount);
- } else {
- assert(Opcode == TII.getCallFrameDestroyOpcode());
-
- unsigned Opc = getADDriOpcode(IsLP64, Amount);
- New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(Amount);
- }
- }
-
- if (New) {
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // Replace the pseudo instruction with a new instruction.
- MBB.insert(I, New);
- }
-
- return;
- }
-
- if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
- unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
- MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(InternalAmt);
-
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // We are not tracking the stack pointer adjustment by the callee, so make
- // sure we restore the stack pointer immediately after the call, there may
- // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
- MachineBasicBlock::iterator B = MBB.begin();
- while (I != B && !std::prev(I)->isCall())
- --I;
- MBB.insert(I, New);
- }
-}
-
+//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "X86FrameLowering.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include <cstdlib> + +using namespace llvm; + +// FIXME: completely move here. +extern cl::opt<bool> ForceStackAlign; + +bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +bool X86FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineModuleInfo &MMI = MF.getMMI(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + + return (MF.getTarget().Options.DisableFramePointerElim(MF) || + RegInfo->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || + MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || + MMI.callsUnwindInit() || MMI.callsEHReturn() || + MFI->hasStackMap() || MFI->hasPatchPoint()); +} + +static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + +static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { + if (IsLP64) { + if (isInt<8>(Imm)) + return X86::ADD64ri8; + return X86::ADD64ri32; + } else { + if (isInt<8>(Imm)) + return X86::ADD32ri8; + return X86::ADD32ri; + } +} + +static unsigned getSUBrrOpcode(unsigned isLP64) { + return isLP64 ? X86::SUB64rr : X86::SUB32rr; +} + +static unsigned getADDrrOpcode(unsigned isLP64) { + return isLP64 ? X86::ADD64rr : X86::ADD32rr; +} + +static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { + if (IsLP64) { + if (isInt<8>(Imm)) + return X86::AND64ri8; + return X86::AND64ri32; + } + if (isInt<8>(Imm)) + return X86::AND32ri8; + return X86::AND32ri; +} + +static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) { + // We don't support LP64 for now. + assert(!IsLP64); + + if (MO.isImm() && isInt<8>(MO.getImm())) + return X86::PUSH32i8; + + return X86::PUSHi32;; +} + +static unsigned getLEArOpcode(unsigned IsLP64) { + return IsLP64 ? X86::LEA64r : X86::LEA32r; +} + +/// findDeadCallerSavedReg - Return a caller-saved register that isn't live +/// when it reaches the "return" instruction. We can then pop a stack object +/// to this register without worry about clobbering it. +static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetRegisterInfo &TRI, + bool Is64Bit) { + const MachineFunction *MF = MBB.getParent(); + const Function *F = MF->getFunction(); + if (!F || MF->getMMI().callsEHReturn()) + return 0; + + static const uint16_t CallerSavedRegs32Bit[] = { + X86::EAX, X86::EDX, X86::ECX, 0 + }; + + static const uint16_t CallerSavedRegs64Bit[] = { + X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, + X86::R8, X86::R9, X86::R10, X86::R11, 0 + }; + + unsigned Opc = MBBI->getOpcode(); + switch (Opc) { + default: return 0; + case X86::RETL: + case X86::RETQ: + case X86::RETIL: + case X86::RETIQ: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: { + SmallSet<uint16_t, 8> Uses; + for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MBBI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } + + const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; + for (; *CS; ++CS) + if (!Uses.count(*CS)) + return *CS; + } + } + + return 0; +} + +static bool isEAXLiveIn(MachineFunction &MF) { + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), + EE = MF.getRegInfo().livein_end(); II != EE; ++II) { + unsigned Reg = II->first; + + if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL) + return true; + } + + return false; +} + +/// emitSPUpdate - Emit a series of instructions to increment / decrement the +/// stack pointer by a constant value. +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, int64_t NumBytes, + bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA, + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { + bool isSub = NumBytes < 0; + uint64_t Offset = isSub ? -NumBytes : NumBytes; + unsigned Opc; + if (UseLEA) + Opc = getLEArOpcode(Is64BitStackPtr); + else + Opc = isSub + ? getSUBriOpcode(Is64BitStackPtr, Offset) + : getADDriOpcode(Is64BitStackPtr, Offset); + + uint64_t Chunk = (1LL << 31) - 1; + DebugLoc DL = MBB.findDebugLoc(MBBI); + + while (Offset) { + if (Offset > Chunk) { + // Rather than emit a long series of instructions for large offsets, + // load the offset into a register and do one sub/add + unsigned Reg = 0; + + if (isSub && !isEAXLiveIn(*MBB.getParent())) + Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX); + else + Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); + + if (Reg) { + Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri; + BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg) + .addImm(Offset); + Opc = isSub + ? getSUBrrOpcode(Is64BitTarget) + : getADDrrOpcode(Is64BitTarget); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addReg(Reg); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + Offset = 0; + continue; + } + } + + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; + if (ThisVal == (Is64BitTarget ? 8 : 4)) { + // Use push / pop instead. + unsigned Reg = isSub + ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX) + : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); + if (Reg) { + Opc = isSub + ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r) + : (Is64BitTarget ? X86::POP64r : X86::POP32r); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) + .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); + if (isSub) + MI->setFlag(MachineInstr::FrameSetup); + Offset -= ThisVal; + continue; + } + } + + MachineInstr *MI = nullptr; + + if (UseLEA) { + MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), + StackPtr, false, isSub ? -ThisVal : ThisVal); + } else { + MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(ThisVal); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + } + + if (isSub) + MI->setFlag(MachineInstr::FrameSetup); + + Offset -= ThisVal; + } +} + +/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. +static +void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = nullptr) { + if (MBBI == MBB.begin()) return; + + MachineBasicBlock::iterator PI = std::prev(MBBI); + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || + Opc == X86::LEA32r || Opc == X86::LEA64_32r) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += PI->getOperand(2).getImm(); + MBB.erase(PI); + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= PI->getOperand(2).getImm(); + MBB.erase(PI); + } +} + +/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower +/// iterator. +static +void mergeSPUpdatesDown(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = nullptr) { + // FIXME: THIS ISN'T RUN!!! + return; + + if (MBBI == MBB.end()) return; + + MachineBasicBlock::iterator NI = std::next(MBBI); + if (NI == MBB.end()) return; + + unsigned Opc = NI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } +} + +/// mergeSPUpdates - Checks the instruction before/after the passed +/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and +/// the stack adjustment is returned as a positive value for ADD/LEA and a +/// negative for SUB. +static int mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, unsigned StackPtr, + bool doMergeWithPrevious) { + if ((doMergeWithPrevious && MBBI == MBB.begin()) || + (!doMergeWithPrevious && MBBI == MBB.end())) + return 0; + + MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; + MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr + : std::next(MBBI); + unsigned Opc = PI->getOpcode(); + int Offset = 0; + + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || + Opc == X86::LEA32r || Opc == X86::LEA64_32r) && + PI->getOperand(0).getReg() == StackPtr){ + Offset += PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + Offset -= PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } + + return Offset; +} + +void +X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + if (CSI.empty()) return; + + // Calculate offsets. + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, + Offset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } +} + +/// usesTheStack - This function checks if any of the users of EFLAGS +/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has +/// to use the stack, and if we don't adjust the stack we clobber the first +/// frame index. +/// See X86InstrInfo::copyPhysReg. +static bool usesTheStack(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (MachineRegisterInfo::reg_instr_iterator + ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); + ri != re; ++ri) + if (ri->isCopy()) + return true; + + return false; +} + +void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + bool Is64Bit = STI.is64Bit(); + bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; + + unsigned CallOp; + if (Is64Bit) + CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; + else + CallOp = X86::CALLpcrel32; + + const char *Symbol; + if (Is64Bit) { + if (STI.isTargetCygMing()) { + Symbol = "___chkstk_ms"; + } else { + Symbol = "__chkstk"; + } + } else if (STI.isTargetCygMing()) + Symbol = "_alloca"; + else + Symbol = "_chkstk"; + + MachineInstrBuilder CI; + + // All current stack probes take AX and SP as input, clobber flags, and + // preserve all registers. x86_64 probes leave RSP unmodified. + if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { + // For the large code model, we have to call through a register. Use R11, + // as it is scratch in all supported calling conventions. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) + .addExternalSymbol(Symbol); + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); + } else { + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); + } + + unsigned AX = Is64Bit ? X86::RAX : X86::EAX; + unsigned SP = Is64Bit ? X86::RSP : X86::ESP; + CI.addReg(AX, RegState::Implicit) + .addReg(SP, RegState::Implicit) + .addReg(AX, RegState::Define | RegState::Implicit) + .addReg(SP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + if (Is64Bit) { + // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp + // themselves. It also does not clobber %rax so we can reuse it when + // adjusting %rsp. + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(X86::RAX); + } +} + +/// emitPrologue - Push callee-saved registers onto the stack, which +/// automatically adjust the stack pointer. Adjust the stack pointer to allocate +/// space for local variables. Also emit labels used by the exception handler to +/// generate the exception handling frames. + +/* + Here's a gist of what gets emitted: + + ; Establish frame pointer, if needed + [if needs FP] + push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + .seh_pushreg %rpb + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + + ; Spill general-purpose registers + [for all callee-saved GPRs] + pushq %<reg> + [if not needs FP] + .cfi_def_cfa_offset (offset from RETADDR) + .seh_pushreg %<reg> + + ; If the required stack alignment > default stack alignment + ; rsp needs to be re-aligned. This creates a "re-alignment gap" + ; of unknown size in the stack frame. + [if stack needs re-alignment] + and $MASK, %rsp + + ; Allocate space for locals + [if target is Windows and allocated space > 4096 bytes] + ; Windows needs special care for allocations larger + ; than one page. + mov $NNN, %rax + call ___chkstk_ms/___chkstk + sub %rax, %rsp + [else] + sub $NNN, %rsp + + [if needs FP] + .seh_stackalloc (size of XMM spill slots) + .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots + [else] + .seh_stackalloc NNN + + ; Spill XMMs + ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, + ; they may get spilled on any platform, if the current function + ; calls @llvm.eh.unwind.init + [if needs FP] + [for all callee-saved XMM registers] + movaps %<xmm reg>, -MMM(%rbp) + [for all callee-saved XMM registers] + .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) + ; i.e. the offset relative to (%rbp - SEHFrameOffset) + [else] + [for all callee-saved XMM registers] + movaps %<xmm reg>, KKK(%rsp) + [for all callee-saved XMM registers] + .seh_savexmm %<xmm reg>, KKK + + .seh_endprologue + + [if needs base pointer] + mov %rsp, %rbx + [if needs to restore base pointer] + mov %rsp, -MMM(%rbp) + + ; Emit CFI info + [if needs FP] + [for all callee-saved registers] + .cfi_offset %<reg>, (offset from %rbp) + [else] + .cfi_def_cfa_offset (offset from RETADDR) + [for all callee-saved registers] + .cfi_offset %<reg>, (offset from %rsp) + + Notes: + - .seh directives are emitted only for Windows 64 ABI + - .cfi directives are emitted for all other ABIs + - for 32-bit code, substitute %e?? registers for %r?? +*/ + +void X86FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *Fn = MF.getFunction(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool HasFP = hasFP(MF); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + bool Is64Bit = STI.is64Bit(); + // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. + const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); + bool IsWin64 = STI.isTargetWin64(); + // Not necessarily synonymous with IsWin64. + bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry(); + bool NeedsDwarfCFI = + !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + bool UseLEA = STI.useLeaForSP(); + unsigned StackAlign = getStackAlignment(); + unsigned SlotSize = RegInfo->getSlotSize(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + const unsigned MachineFramePtr = STI.isTarget64BitILP32() ? + getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr; + unsigned StackPtr = RegInfo->getStackRegister(); + unsigned BasePtr = RegInfo->getBaseRegister(); + DebugLoc DL; + + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum SlotSize. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + + // Add RETADDR move area to callee saved frame size. + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + X86FI->setCalleeSavedFrameSize( + X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); + + bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO()); + + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + if (Fn->hasFnAttribute("stack-probe-size")) + Fn->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + + // If this is x86-64 and the Red Zone is not disabled, if we are a leaf + // function, and use up to 128 bytes of stack space, don't have a frame + // pointer, calls, or dynamic alloca then we do not need to adjust the + // stack pointer (we fit in the Red Zone). We also check that we don't + // push and pop from the stack. + if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoRedZone) && + !RegInfo->needsStackRealignment(MF) && + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + !IsWin64 && // Win64 has no Red Zone + !usesTheStack(MF) && // Don't push and pop. + !MF.shouldSplitStack()) { // Regular stack + uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); + if (HasFP) MinSize += SlotSize; + StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); + MFI->setStackSize(StackSize); + } + + // Insert stack pointer adjustment for later moving of return addr. Only + // applies to tail call optimized functions where the callee argument stack + // size is bigger than the callers. + if (TailCallReturnAddrDelta < 0) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)), + StackPtr) + .addReg(StackPtr) + .addImm(-TailCallReturnAddrDelta) + .setMIFlag(MachineInstr::FrameSetup); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + } + + // Mapping for machine moves: + // + // DST: VirtualFP AND + // SRC: VirtualFP => DW_CFA_def_cfa_offset + // ELSE => DW_CFA_def_cfa + // + // SRC: VirtualFP AND + // DST: Register => DW_CFA_def_cfa_register + // + // ELSE + // OFFSET < 0 => DW_CFA_offset_extended_sf + // REG < 64 => DW_CFA_offset + Reg + // ELSE => DW_CFA_offset_extended + + uint64_t NumBytes = 0; + int stackGrowth = -SlotSize; + + if (HasFP) { + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + // If required, include space for extra hidden slot for stashing base pointer. + if (X86FI->getRestoreBasePointer()) + FrameSize += SlotSize; + if (RegInfo->needsStackRealignment(MF)) { + // Callee-saved registers are pushed on stack before the stack + // is realigned. + FrameSize -= X86FI->getCalleeSavedFrameSize(); + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; + } else { + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + } + + // Get the offset of the stack slot for the EBP register, which is + // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. + // Update the frame offset adjustment. + MFI->setOffsetAdjustment(-NumBytes); + + // Save EBP/RBP into the appropriate stack slot. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(MachineFramePtr, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + + if (NeedsDwarfCFI) { + // Mark the place where EBP/RBP was saved. + // Define the current CFA rule to use the provided offset. + assert(StackSize); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Change the rule for the FramePtr to be an "offset" rule. + unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, + DwarfFramePtr, 2 * stackGrowth)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + if (NeedsWinEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Update EBP with the new base value. + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + + if (NeedsDwarfCFI) { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + // Mark the FramePtr as live-in in every block. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + I->addLiveIn(MachineFramePtr); + } else { + NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); + } + + // Skip the callee-saved push instructions. + bool PushedRegs = false; + int StackOffset = 2 * stackGrowth; + + while (MBBI != MBB.end() && + (MBBI->getOpcode() == X86::PUSH32r || + MBBI->getOpcode() == X86::PUSH64r)) { + PushedRegs = true; + unsigned Reg = MBBI->getOperand(0).getReg(); + ++MBBI; + + if (!HasFP && NeedsDwarfCFI) { + // Mark callee-saved push instruction. + // Define the current CFA rule to use the provided offset. + assert(StackSize); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + StackOffset += stackGrowth; + } + + if (NeedsWinEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( + MachineInstr::FrameSetup); + } + } + + // Realign stack after we pushed callee-saved registers (so that we'll be + // able to calculate their offsets from the frame pointer). + if (RegInfo->needsStackRealignment(MF)) { + assert(HasFP && "There should be a frame pointer if stack is realigned."); + uint64_t Val = -MaxAlign; + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr) + .addReg(StackPtr) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } + + // If there is an SUB32ri of ESP immediately before this instruction, merge + // the two. This can be the case when tail call elimination is enabled and + // the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + + // If there is an ADD32ri or SUB32ri of ESP immediately after this + // instruction, merge the two instructions. + mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + + // Adjust stack pointer: ESP -= numbytes. + + // Windows and cygwin/mingw require a prologue helper routine when allocating + // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw + // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the + // stack and adjust the stack pointer in one go. The 64-bit version of + // __chkstk is only responsible for probing the stack. The 64-bit prologue is + // responsible for adjusting the stack pointer. Touching the stack at 4K + // increments is necessary to ensure that the guard pages used by the OS + // virtual memory manager are allocated in correct sequence. + if (NumBytes >= StackProbeSize && UseStackProbe) { + // Check whether EAX is livein for this function. + bool isEAXAlive = isEAXLiveIn(MF); + + if (isEAXAlive) { + // Sanity check that EAX is not livein for this function. + // It should not be, so throw an assert. + assert(!Is64Bit && "EAX is livein in x64 case!"); + + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + } + + if (Is64Bit) { + // Handle the 64-bit Windows ABI case where we need to call __chkstk. + // Function prologue is responsible for adjusting the stack pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. + // We'll also use 4 already allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Save a pointer to the MI where we set AX. + MachineBasicBlock::iterator SetRAX = MBBI; + --SetRAX; + + // Call __chkstk, __chkstk_ms, or __alloca. + emitStackProbeCall(MF, MBB, MBBI, DL); + + // Apply the frame setup flag to all inserted instrs. + for (; SetRAX != MBBI; ++SetRAX) + SetRAX->setFlag(MachineInstr::FrameSetup); + + if (isEAXAlive) { + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MI->setFlag(MachineInstr::FrameSetup); + MBB.insert(MBBI, MI); + } + } else if (NumBytes) { + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr, + UseLEA, TII, *RegInfo); + } + + int SEHFrameOffset = 0; + if (NeedsWinEH) { + if (HasFP) { + // We need to set frame base offset low enough such that all saved + // register offsets would be positive relative to it, but we can't + // just use NumBytes, because .seh_setframe offset must be <=240. + // So we pretend to have only allocated enough space to spill the + // non-volatile registers. + // We don't care about the rest of stack allocation, because unwinder + // will restore SP to (BP - SEHFrameOffset) + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + int offset = MFI->getObjectOffset(Info.getFrameIdx()); + SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset)); + } + SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant + + // This only needs to account for XMM spill slots, GPR slots + // are covered by the .seh_pushreg's emitted above. + unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize(); + if (Size) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(Size) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) + .addImm(FramePtr) + .addImm(SEHFrameOffset) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // SP will be the base register for restoring XMMs + if (NumBytes) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } + + // Skip the rest of register spilling code + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + // Emit SEH info for non-GPRs + if (NeedsWinEH) { + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + unsigned Reg = Info.getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class"); + + int Offset = getFrameIndexOffset(MF, Info.getFrameIdx()); + Offset += SEHFrameOffset; + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) + .addImm(Reg) + .addImm(Offset) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) + .setMIFlag(MachineInstr::FrameSetup); + } + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (RegInfo->hasBasePointer(MF)) { + // Update the base pointer with the current stack pointer. + unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; + BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + if (X86FI->getRestoreBasePointer()) { + // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain. + unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), + FramePtr, true, X86FI->getRestoreBasePointerOffset()) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + } + } + + if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { + // Mark end of stack pointer adjustment. + if (!HasFP && NumBytes) { + // Define the current CFA rule to use the provided offset. + assert(StackSize); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, + -StackSize + stackGrowth)); + + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + // Emit DWARF info specifying the offsets of the callee-saved registers. + if (PushedRegs) + emitCalleeSavedFrameMoves(MBB, MBBI, DL); + } +} + +void X86FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI != MBB.end() && "Returning block has no instructions"); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc DL = MBBI->getDebugLoc(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + bool Is64Bit = STI.is64Bit(); + // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. + const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); + const bool Is64BitILP32 = STI.isTarget64BitILP32(); + bool UseLEA = STI.useLeaForSP(); + unsigned StackAlign = getStackAlignment(); + unsigned SlotSize = RegInfo->getSlotSize(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned MachineFramePtr = Is64BitILP32 ? + getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr; + unsigned StackPtr = RegInfo->getStackRegister(); + + bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry(); + + switch (RetOpcode) { + default: + llvm_unreachable("Can only insert epilog into returning blocks"); + case X86::RETQ: + case X86::RETL: + case X86::RETIL: + case X86::RETIQ: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: + break; // These are ok + } + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t StackSize = MFI->getStackSize(); + uint64_t MaxAlign = MFI->getMaxAlignment(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + uint64_t NumBytes = 0; + + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else + MaxAlign = MaxAlign ? MaxAlign : 4; + } + + if (hasFP(MF)) { + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + if (RegInfo->needsStackRealignment(MF)) { + // Callee-saved registers were pushed on stack before the stack + // was realigned. + FrameSize -= CSSize; + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; + } else { + NumBytes = FrameSize - CSSize; + } + + // Pop EBP. + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr); + } else { + NumBytes = StackSize - CSSize; + } + + // Skip the callee-saved pop instructions. + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = std::prev(MBBI); + unsigned Opc = PI->getOpcode(); + + if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && + !PI->isTerminator()) + break; + + --MBBI; + } + MachineBasicBlock::iterator FirstCSPop = MBBI; + + DL = MBBI->getDebugLoc(); + + // If there is an ADD32ri or SUB32ri of ESP immediately before this + // instruction, merge the two instructions. + if (NumBytes || MFI->hasVarSizedObjects()) + mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + + // If dynamic alloca is used, then reset esp to point to the last callee-saved + // slot before popping them off! Same applies for the case, when stack was + // realigned. + if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { + if (RegInfo->needsStackRealignment(MF)) + MBBI = FirstCSPop; + if (CSSize != 0) { + unsigned Opc = getLEArOpcode(Uses64BitFramePtr); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); + --MBBI; + } else { + unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(FramePtr); + --MBBI; + } + } else if (NumBytes) { + // Adjust stack pointer back: ESP += numbytes. + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA, + TII, *RegInfo); + --MBBI; + } + + // Windows unwinder will not invoke function's exception handler if IP is + // either in prologue or in epilogue. This behavior causes a problem when a + // call immediately precedes an epilogue, because the return address points + // into the epilogue. To cope with that, we insert an epilogue marker here, + // then replace it with a 'nop' if it ends up immediately after a CALL in the + // final emitted code. + if (NeedsWinEH) + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); + + // We're returning from function via eh_return. + if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &DestAddr = MBBI->getOperand(0); + assert(DestAddr.isReg() && "Offset should be in register!"); + BuildMI(MBB, MBBI, DL, + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(DestAddr.getReg()); + } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || + RetOpcode == X86::TCRETURNmi || + RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || + RetOpcode == X86::TCRETURNmi64) { + bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int MaxTCDelta = X86FI->getTCReturnAddrDelta(); + int Offset = 0; + assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); + + // Incoporate the retaddr area. + Offset = StackAdj-MaxTCDelta; + assert(Offset >= 0 && "Offset should never be negative"); + + if (Offset) { + // Check for possible merge with preceding ADD instruction. + Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr, + UseLEA, TII, *RegInfo); + } + + // Jump to label or value in register. + bool IsWin64 = STI.isTargetWin64(); + if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { + unsigned Op = (RetOpcode == X86::TCRETURNdi) + ? X86::TAILJMPd + : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); + if (JumpTarget.isGlobal()) + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + else { + assert(JumpTarget.isSymbol()); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { + unsigned Op = (RetOpcode == X86::TCRETURNmi) + ? X86::TAILJMPm + : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op)); + for (unsigned i = 0; i != 5; ++i) + MIB.addOperand(MBBI->getOperand(i)); + } else if (RetOpcode == X86::TCRETURNri64) { + BuildMI(MBB, MBBI, DL, + TII.get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) + .addReg(JumpTarget.getReg(), RegState::Kill); + } else { + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = std::prev(MBBI); + NewMI->copyImplicitOps(MF, MBBI); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL || + RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) && + (X86FI->getTCReturnAddrDelta() < 0)) { + // Add the return addr area delta back since we are not tail calling. + int delta = -1*X86FI->getTCReturnAddrDelta(); + MBBI = MBB.getLastNonDebugInstr(); + + // Check for possible merge with preceding ADD instruction. + delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII, + *RegInfo); + } +} + +int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); + uint64_t StackSize = MFI->getStackSize(); + + if (RegInfo->hasBasePointer(MF)) { + assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); + if (FI < 0) { + // Skip the saved EBP. + return Offset + RegInfo->getSlotSize(); + } else { + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); + return Offset + StackSize; + } + } else if (RegInfo->needsStackRealignment(MF)) { + if (FI < 0) { + // Skip the saved EBP. + return Offset + RegInfo->getSlotSize(); + } else { + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); + return Offset + StackSize; + } + // FIXME: Support tail calls + } else { + if (!hasFP(MF)) + return Offset + StackSize; + + // Skip the saved EBP. + Offset += RegInfo->getSlotSize(); + + // Skip the RETADDR move area + const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + Offset -= TailCallReturnAddrDelta; + } + + return Offset; +} + +int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + // We can't calculate offset from frame pointer if the stack is realigned, + // so enforce usage of stack/base pointer. The base pointer is used when we + // have dynamic allocas in addition to dynamic realignment. + if (RegInfo->hasBasePointer(MF)) + FrameReg = RegInfo->getBaseRegister(); + else if (RegInfo->needsStackRealignment(MF)) + FrameReg = RegInfo->getStackRegister(); + else + FrameReg = RegInfo->getFrameRegister(MF); + return getFrameIndexOffset(MF, FI); +} + +// Simplified from getFrameIndexOffset keeping only StackPointer cases +int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // Does not include any dynamic realign. + const uint64_t StackSize = MFI->getStackSize(); + { +#ifndef NDEBUG + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo()); + // Note: LLVM arranges the stack as: + // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) + // > "Stack Slots" (<--SP) + // We can always address StackSlots from RSP. We can usually (unless + // needsStackRealignment) address CSRs from RSP, but sometimes need to + // address them from RBP. FixedObjects can be placed anywhere in the stack + // frame depending on their specific requirements (i.e. we can actually + // refer to arguments to the function which are stored in the *callers* + // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs + // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. + + assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); + + // We don't handle tail calls, and shouldn't be seeing them + // either. + int TailCallReturnAddrDelta = + MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta(); + assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); +#endif + } + + // This is how the math works out: + // + // %rsp grows (i.e. gets lower) left to right. Each box below is + // one word (eight bytes). Obj0 is the stack slot we're trying to + // get to. + // + // ---------------------------------- + // | BP | Obj0 | Obj1 | ... | ObjN | + // ---------------------------------- + // ^ ^ ^ ^ + // A B C E + // + // A is the incoming stack pointer. + // (B - A) is the local area offset (-8 for x86-64) [1] + // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2] + // + // |(E - B)| is the StackSize (absolute value, positive). For a + // stack that grown down, this works out to be (B - E). [3] + // + // E is also the value of %rsp after stack has been set up, and we + // want (C - E) -- the value we can add to %rsp to get to Obj0. Now + // (C - E) == (C - A) - (B - A) + (B - E) + // { Using [1], [2] and [3] above } + // == getObjectOffset - LocalAreaOffset + StackSize + // + + // Get the Offset from the StackPointer + int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); + + return Offset + StackSize; +} +// Simplified from getFrameIndexReference keeping only StackPointer cases +int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo()); + + assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case"); + + FrameReg = RegInfo->getStackRegister(); + return getFrameIndexOffsetFromSP(MF, FI); +} + +bool X86FrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + unsigned SlotSize = RegInfo->getSlotSize(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + + unsigned CalleeSavedFrameSize = 0; + int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); + + if (hasFP(MF)) { + // emitPrologue always spills frame register the first thing. + SpillSlotOffset -= SlotSize; + MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + + // Since emitPrologue and emitEpilogue will handle spilling and restoring of + // the frame register, we can delete it from CSI list and not have to worry + // about avoiding it later. + unsigned FPReg = RegInfo->getFrameRegister(MF); + for (unsigned i = 0; i < CSI.size(); ++i) { + if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { + CSI.erase(CSI.begin() + i); + break; + } + } + } + + // Assign slots for GPRs. It increases frame size. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) + continue; + + SpillSlotOffset -= SlotSize; + CalleeSavedFrameSize += SlotSize; + + int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + } + + X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); + + // Assign slots for XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + // ensure alignment + SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); + // spill into slot + SpillSlotOffset -= RC->getSize(); + int SlotIndex = + MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + MFI->ensureMaxAlignment(RC->getAlignment()); + } + + return true; +} + +bool X86FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + + // Push GPRs. It increases frame size. + unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) + continue; + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Make XMM regs spilled. X86 does not have ability of push/pop XMM. + // It can be done by spilling XMMs to stack frame. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, + TRI); + --MI; + MI->setFlag(MachineInstr::FrameSetup); + ++MI; + } + + return true; +} + +bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + + // Reload XMMs from stack frame. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); + } + + // POP GPRs. + unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; + + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); + } + return true; +} + +void +X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + unsigned SlotSize = RegInfo->getSlotSize(); + + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + if (TailCallReturnAddrDelta < 0) { + // create RETURNADDR area + // arg + // arg + // RETADDR + // { ... + // RETADDR area + // ... + // } + // [EBP] + MFI->CreateFixedObject(-TailCallReturnAddrDelta, + TailCallReturnAddrDelta - SlotSize, true); + } + + // Spill the BasePtr if it's used. + if (RegInfo->hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); +} + +static bool +HasNestArgument(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; I++) { + if (I->hasNestAttr()) + return true; + } + return false; +} + +/// GetScratchRegister - Get a temp register for performing work in the +/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform +/// and the properties of the function either one or two registers will be +/// needed. Set primary to true for the first register, false for the second. +static unsigned +GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + + // Erlang stuff. + if (CallingConvention == CallingConv::HiPE) { + if (Is64Bit) + return Primary ? X86::R14 : X86::R13; + else + return Primary ? X86::EBX : X86::EDI; + } + + if (Is64Bit) { + if (IsLP64) + return Primary ? X86::R11 : X86::R12; + else + return Primary ? X86::R11D : X86::R12D; + } + + bool IsNested = HasNestArgument(&MF); + + if (CallingConvention == CallingConv::X86_FastCall || + CallingConvention == CallingConv::Fast) { + if (IsNested) + report_fatal_error("Segmented stacks does not support fastcall with " + "nested function."); + return Primary ? X86::EAX : X86::ECX; + } + if (IsNested) + return Primary ? X86::EDX : X86::EAX; + return Primary ? X86::ECX : X86::EAX; +} + +// The stack limit in the TCB is set to this many bytes above the actual stack +// limit. +static const uint64_t kSplitStackAvailable = 256; + +void +X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { + MachineBasicBlock &prologueMBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + uint64_t StackSize; + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + bool Is64Bit = STI.is64Bit(); + const bool IsLP64 = STI.isTarget64BitLP64(); + unsigned TlsReg, TlsOffset; + DebugLoc DL; + + unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "Scratch register is live-in"); + + if (MF.getFunction()->isVarArg()) + report_fatal_error("Segmented stacks do not support vararg functions."); + if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && + !STI.isTargetWin64() && !STI.isTargetFreeBSD() && + !STI.isTargetDragonFly()) + report_fatal_error("Segmented stacks not supported on this platform."); + + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + bool IsNested = false; + + // We need to know if the function has a nest argument only in 64 bit mode. + if (Is64Bit) + IsNested = HasNestArgument(&MF); + + // The MOV R10, RAX needs to be in a different block, since the RET we emit in + // allocMBB needs to be last (terminating) instruction. + + for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), + e = prologueMBB.livein_end(); i != e; i++) { + allocMBB->addLiveIn(*i); + checkMBB->addLiveIn(*i); + } + + if (IsNested) + allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); + + MF.push_front(allocMBB); + MF.push_front(checkMBB); + + // When the frame size is less than 256 we just compare the stack + // boundary directly to the value of the stack pointer, per gcc. + bool CompareStackPointer = StackSize < kSplitStackAvailable; + + // Read the limit off the current stacklet off the stack_guard location. + if (Is64Bit) { + if (STI.isTargetLinux()) { + TlsReg = X86::FS; + TlsOffset = IsLP64 ? 0x70 : 0x40; + } else if (STI.isTargetDarwin()) { + TlsReg = X86::GS; + TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. + } else if (STI.isTargetWin64()) { + TlsReg = X86::GS; + TlsOffset = 0x28; // pvArbitrary, reserved for application use + } else if (STI.isTargetFreeBSD()) { + TlsReg = X86::FS; + TlsOffset = 0x18; + } else if (STI.isTargetDragonFly()) { + TlsReg = X86::FS; + TlsOffset = 0x20; // use tls_tcb.tcb_segstack + } else { + report_fatal_error("Segmented stacks not supported on this platform."); + } + + if (CompareStackPointer) + ScratchReg = IsLP64 ? X86::RSP : X86::ESP; + else + BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) + .addImm(1).addReg(0).addImm(-StackSize).addReg(0); + + BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else { + if (STI.isTargetLinux()) { + TlsReg = X86::GS; + TlsOffset = 0x30; + } else if (STI.isTargetDarwin()) { + TlsReg = X86::GS; + TlsOffset = 0x48 + 90*4; + } else if (STI.isTargetWin32()) { + TlsReg = X86::FS; + TlsOffset = 0x14; // pvArbitrary, reserved for application use + } else if (STI.isTargetDragonFly()) { + TlsReg = X86::FS; + TlsOffset = 0x10; // use tls_tcb.tcb_segstack + } else if (STI.isTargetFreeBSD()) { + report_fatal_error("Segmented stacks not supported on FreeBSD i386."); + } else { + report_fatal_error("Segmented stacks not supported on this platform."); + } + + if (CompareStackPointer) + ScratchReg = X86::ESP; + else + BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) + .addImm(1).addReg(0).addImm(-StackSize).addReg(0); + + if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || + STI.isTargetDragonFly()) { + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else if (STI.isTargetDarwin()) { + + // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. + unsigned ScratchReg2; + bool SaveScratch2; + if (CompareStackPointer) { + // The primary scratch register is available for holding the TLS offset. + ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); + SaveScratch2 = false; + } else { + // Need to use a second register to hold the TLS offset + ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); + + // Unfortunately, with fastcc the second scratch register may hold an + // argument. + SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); + } + + // If Scratch2 is live-in then it needs to be saved. + assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && + "Scratch register is live-in and not saved"); + + if (SaveScratch2) + BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) + .addReg(ScratchReg2, RegState::Kill); + + BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) + .addImm(TlsOffset); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) + .addReg(ScratchReg) + .addReg(ScratchReg2).addImm(1).addReg(0) + .addImm(0) + .addReg(TlsReg); + + if (SaveScratch2) + BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); + } + } + + // This jump is taken if SP >= (Stacklet Limit + Stack Space required). + // It jumps to normal execution of the function body. + BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB); + + // On 32 bit we first push the arguments size and then the frame size. On 64 + // bit, we pass the stack frame size in r10 and the argument size in r11. + if (Is64Bit) { + // Functions with nested arguments use R10, so it needs to be saved across + // the call to _morestack + + const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; + const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; + const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; + const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; + const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; + + if (IsNested) + BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); + + BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) + .addImm(StackSize); + BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) + .addImm(X86FI->getArgumentStackSize()); + MF.getRegInfo().setPhysRegUsed(Reg10); + MF.getRegInfo().setPhysRegUsed(Reg11); + } else { + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(X86FI->getArgumentStackSize()); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(StackSize); + } + + // __morestack is in libgcc + if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { + // Under the large code model, we cannot assume that __morestack lives + // within 2^31 bytes of the call site, so we cannot use pc-relative + // addressing. We cannot perform the call via a temporary register, + // as the rax register may be used to store the static chain, and all + // other suitable registers may be either callee-save or used for + // parameter passing. We cannot use the stack at this point either + // because __morestack manipulates the stack directly. + // + // To avoid these issues, perform an indirect call via a read-only memory + // location containing the address. + // + // This solution is not perfect, as it assumes that the .rodata section + // is laid out within 2^31 bytes of each function body, but this seems + // to be sufficient for JIT. + BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addExternalSymbol("__morestack_addr") + .addReg(0); + MF.getMMI().setUsesMorestackAddr(true); + } else { + if (Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack"); + else + BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("__morestack"); + } + + if (IsNested) + BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); + else + BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); + + allocMBB->addSuccessor(&prologueMBB); + + checkMBB->addSuccessor(allocMBB); + checkMBB->addSuccessor(&prologueMBB); + +#ifdef XDEBUG + MF.verify(); +#endif +} + +/// Erlang programs may need a special prologue to handle the stack size they +/// might need at runtime. That is because Erlang/OTP does not implement a C +/// stack but uses a custom implementation of hybrid stack/heap architecture. +/// (for more information see Eric Stenman's Ph.D. thesis: +/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) +/// +/// CheckStack: +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// OldStart: +/// ... +/// IncStack: +/// call inc_stack # doubles the stack space +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const unsigned SlotSize = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()) + ->getSlotSize(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + const bool Is64Bit = STI.is64Bit(); + const bool IsLP64 = STI.isTarget64BitLP64(); + DebugLoc DL; + // HiPE-specific values + const unsigned HipeLeafWords = 24; + const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; + const unsigned Guaranteed = HipeLeafWords * SlotSize; + unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? + MF.getFunction()->arg_size() - CCRegisteredArgs : 0; + unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; + + assert(STI.isTargetLinux() && + "HiPE prologue is only supported on Linux operating systems."); + + // Compute the largest caller's frame that is needed to fit the callees' + // frames. This 'MaxStack' is computed from: + // + // a) the fixed frame size, which is the space needed for all spilled temps, + // b) outgoing on-stack parameter areas, and + // c) the minimum stack space this function needs to make available for the + // functions it calls (a tunable ABI property). + if (MFI->hasCalls()) { + unsigned MoreStackForCalls = 0; + + for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); + MBBI != MBBE; ++MBBI) + for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); + MI != ME; ++MI) { + if (!MI->isCall()) + continue; + + // Get callee operand. + const MachineOperand &MO = MI->getOperand(0); + + // Only take account of global function calls (no closures etc.). + if (!MO.isGlobal()) + continue; + + const Function *F = dyn_cast<Function>(MO.getGlobal()); + if (!F) + continue; + + // Do not update 'MaxStack' for primitive and built-in functions + // (encoded with names either starting with "erlang."/"bif_" or not + // having a ".", such as a simple <Module>.<Function>.<Arity>, or an + // "_", such as the BIF "suspend_0") as they are executed on another + // stack. + if (F->getName().find("erlang.") != StringRef::npos || + F->getName().find("bif_") != StringRef::npos || + F->getName().find_first_of("._") == StringRef::npos) + continue; + + unsigned CalleeStkArity = + F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; + if (HipeLeafWords - 1 > CalleeStkArity) + MoreStackForCalls = std::max(MoreStackForCalls, + (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); + } + MaxStack += MoreStackForCalls; + } + + // If the stack frame needed is larger than the guaranteed then runtime checks + // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. + if (MaxStack > Guaranteed) { + MachineBasicBlock &prologueMBB = MF.front(); + MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); + + for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), + E = prologueMBB.livein_end(); I != E; I++) { + stackCheckMBB->addLiveIn(*I); + incStackMBB->addLiveIn(*I); + } + + MF.push_front(incStackMBB); + MF.push_front(stackCheckMBB); + + unsigned ScratchReg, SPReg, PReg, SPLimitOffset; + unsigned LEAop, CMPop, CALLop; + if (Is64Bit) { + SPReg = X86::RSP; + PReg = X86::RBP; + LEAop = X86::LEA64r; + CMPop = X86::CMP64rm; + CALLop = X86::CALL64pcrel32; + SPLimitOffset = 0x90; + } else { + SPReg = X86::ESP; + PReg = X86::EBP; + LEAop = X86::LEA32r; + CMPop = X86::CMP32rm; + CALLop = X86::CALLpcrel32; + SPLimitOffset = 0x4c; + } + + ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "HiPE prologue scratch register is live-in"); + + // Create new MBB for StackCheck: + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + // SPLimitOffset is in a fixed heap location (pointed by BP). + addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB); + + // Create new MBB for IncStack: + BuildMI(incStackMBB, DL, TII.get(CALLop)). + addExternalSymbol("inc_stack_0"); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), + SPReg, false, -MaxStack); + addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) + .addReg(ScratchReg), PReg, false, SPLimitOffset); + BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); + + stackCheckMBB->addSuccessor(&prologueMBB, 99); + stackCheckMBB->addSuccessor(incStackMBB, 1); + incStackMBB->addSuccessor(&prologueMBB, 99); + incStackMBB->addSuccessor(incStackMBB, 1); + } +#ifdef XDEBUG + MF.verify(); +#endif +} + +bool X86FrameLowering:: +convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, uint64_t Amount) const { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); + unsigned StackPtr = RegInfo.getStackRegister(); + + // Scan the call setup sequence for the pattern we're looking for. + // We only handle a simple case now - a sequence of MOV32mi or MOV32mr + // instructions, that push a sequence of 32-bit values onto the stack, with + // no gaps. + std::map<int64_t, MachineBasicBlock::iterator> MovMap; + do { + int Opcode = I->getOpcode(); + if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) + break; + + // We only want movs of the form: + // movl imm/r32, k(%ecx) + // If we run into something else, bail + // Note that AddrBaseReg may, counterintuitively, not be a register... + if (!I->getOperand(X86::AddrBaseReg).isReg() || + (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || + !I->getOperand(X86::AddrScaleAmt).isImm() || + (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || + (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || + (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || + !I->getOperand(X86::AddrDisp).isImm()) + return false; + + int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); + + // We don't want to consider the unaligned case. + if (StackDisp % 4) + return false; + + // If the same stack slot is being filled twice, something's fishy. + if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second) + return false; + + ++I; + } while (I != MBB.end()); + + // We now expect the end of the sequence - a call and a stack adjust. + if (I == MBB.end()) + return false; + if (!I->isCall()) + return false; + MachineBasicBlock::iterator Call = I; + if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode()) + return false; + + // Now, go through the map, and see that we don't have any gaps, + // but only a series of 32-bit MOVs. + // Since std::map provides ordered iteration, the original order + // of the MOVs doesn't matter. + int64_t ExpectedDist = 0; + for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME; + ++MMI, ExpectedDist += 4) + if (MMI->first != ExpectedDist) + return false; + + // Ok, everything looks fine. Do the transformation. + DebugLoc DL = I->getDebugLoc(); + + // It's possible the original stack adjustment amount was larger than + // that done by the pushes. If so, we still need a SUB. + Amount -= ExpectedDist; + if (Amount) { + MachineInstr* Sub = BuildMI(MBB, Call, DL, + TII.get(getSUBriOpcode(false, Amount)), StackPtr) + .addReg(StackPtr).addImm(Amount); + Sub->getOperand(3).setIsDead(); + } + + // Now, iterate through the map in reverse order, and replace the movs + // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses. + for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) { + MachineBasicBlock::iterator MOV = MMI->second; + MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); + + // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size + int PushOpcode = X86::PUSH32r; + if (MOV->getOpcode() == X86::MOV32mi) + PushOpcode = getPUSHiOpcode(false, PushOp); + + BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp); + MBB.erase(MOV); + } + + return true; +} + +void X86FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); + unsigned StackPtr = RegInfo.getStackRegister(); + bool reserveCallFrame = hasReservedCallFrame(MF); + int Opcode = I->getOpcode(); + bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + bool IsLP64 = STI.isTarget64BitLP64(); + DebugLoc DL = I->getDebugLoc(); + uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; + uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + I = MBB.erase(I); + + if (!reserveCallFrame) { + // If the stack pointer can be changed after prologue, turn the + // adjcallstackup instruction into a 'sub ESP, <amt>' and the + // adjcallstackdown instruction into 'add ESP, <amt>' + if (Amount == 0) + return; + + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + + MachineInstr *New = nullptr; + if (Opcode == TII.getCallFrameSetupOpcode()) { + // Try to convert movs to the stack into pushes. + // We currently only look for a pattern that appears in 32-bit + // calling conventions. + if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount)) + return; + + New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), + StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } else { + assert(Opcode == TII.getCallFrameDestroyOpcode()); + + // Factor out the amount the callee already popped. + Amount -= CalleeAmt; + + if (Amount) { + unsigned Opc = getADDriOpcode(IsLP64, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); + } + } + + if (New) { + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction. + MBB.insert(I, New); + } + + return; + } + + if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. + unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); + MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(CalleeAmt); + + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + + // We are not tracking the stack pointer adjustment by the callee, so make + // sure we restore the stack pointer immediately after the call, there may + // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + MachineBasicBlock::iterator B = MBB.begin(); + while (I != B && !std::prev(I)->isCall()) + --I; + MBB.insert(I, New); + } +} + |