diff options
author | Hans Wennborg <hans@hanshq.net> | 2016-03-25 01:10:56 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2016-03-25 01:10:56 +0000 |
commit | 4ae5119eeba206e2b1d88491e8ed76ed0f80721f (patch) | |
tree | f6dbb98d4d296d8e03752195c99274ab4b233bd2 /llvm/lib/Target | |
parent | 9651813ee0d9633e12accb7ae673a8a3b944f35c (diff) | |
download | bcm5719-llvm-4ae5119eeba206e2b1d88491e8ed76ed0f80721f.tar.gz bcm5719-llvm-4ae5119eeba206e2b1d88491e8ed76ed0f80721f.zip |
X86: Use push-pop for materializing 8-bit immediates for minsize (take 2)
This is the same as r255936, with added logic for avoiding clobbering of the
red zone (PR26023).
Differential Revision: http://reviews.llvm.org/D18246
llvm-svn: 264375
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86FrameLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrCompiler.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 58 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86MachineFunctionInfo.h | 6 |
7 files changed, 93 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index dd8b8134c43..9beb916d23d 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -958,6 +958,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; + X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI->setStackSize(StackSize); } diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index b5cc3f5d30e..4105191147a 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -157,9 +157,13 @@ namespace { /// performance. bool OptForSize; + /// If true, selector should try to optimize for minimum code size. + bool OptForMinSize; + public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), OptForSize(false) {} + : SelectionDAGISel(tm, OptLevel), OptForSize(false), + OptForMinSize(false) {} const char *getPassName() const override { return "X86 DAG->DAG Instruction Selection"; @@ -530,8 +534,10 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { } void X86DAGToDAGISel::PreprocessISelDAG() { - // OptForSize is used in pattern predicates that isel is matching. + // OptFor[Min]Size are used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->optForSize(); + OptForMinSize = MF->getFunction()->optForMinSize(); + assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 2c92cc370b5..c2a00aa5b09 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -250,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isPseudo = 1 in + isPseudo = 1, AddedComplexity = 20 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; @@ -263,7 +263,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> { } let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], - AddedComplexity = 1 in { + AddedComplexity = 15 in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { @@ -278,6 +278,17 @@ let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; } +let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in { +// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1. +// FIXME: Add itinerary class and Schedule. +def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", + [(set GR32:$dst, i32immSExt8:$src)]>, + Requires<[OptForMinSize, NotWin64WithoutFP]>; +def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", + [(set GR64:$dst, i64immSExt8:$src)]>, + Requires<[OptForMinSize, NotWin64WithoutFP]>; +} + // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 46e67bb982d..fbfcb37b1cd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" @@ -5391,6 +5392,60 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, return true; } +bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const { + MachineBasicBlock &MBB = *MIB->getParent(); + DebugLoc DL = MIB->getDebugLoc(); + int64_t Imm = MIB->getOperand(1).getImm(); + assert(Imm != 0 && "Using push/pop for 0 is not efficient."); + MachineBasicBlock::iterator I = MIB.getInstr(); + + int StackAdjustment; + + if (Subtarget.is64Bit()) { + assert(MIB->getOpcode() == X86::MOV64ImmSExti8 || + MIB->getOpcode() == X86::MOV32ImmSExti8); + + // Can't use push/pop lowering if the function might write to the red zone. + X86MachineFunctionInfo *X86FI = + MBB.getParent()->getInfo<X86MachineFunctionInfo>(); + if (X86FI->getUsesRedZone()) { + MIB->setDesc(get(MIB->getOpcode() == X86::MOV32ImmSExti8 ? X86::MOV32ri + : X86::MOV64ri)); + return true; + } + + // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and + // widen the register if necessary. + StackAdjustment = 8; + BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm); + MIB->setDesc(get(X86::POP64r)); + MIB->getOperand(0) + .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64)); + } else { + assert(MIB->getOpcode() == X86::MOV32ImmSExti8); + StackAdjustment = 4; + BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm); + MIB->setDesc(get(X86::POP32r)); + } + + // Build CFI if necessary. + MachineFunction &MF = *MBB.getParent(); + const X86FrameLowering *TFL = Subtarget.getFrameLowering(); + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool NeedsDwarfCFI = + !IsWin64Prologue && + (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry()); + bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI; + if (EmitCFI) { + TFL->BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment)); + TFL->BuildCFI(MBB, std::next(I), DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment)); + } + + return true; +} + // LoadStackGuard has so far only been implemented for 64-bit MachO. Different // code sequence is needed for other targets. static void expandLoadStackGuard(MachineInstrBuilder &MIB, @@ -5423,6 +5478,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return expandMOV32r1(MIB, *this, /*MinusOne=*/ false); case X86::MOV32r_1: return expandMOV32r1(MIB, *this, /*MinusOne=*/ true); + case X86::MOV32ImmSExti8: + case X86::MOV64ImmSExti8: + return ExpandMOVImmSExti8(MIB); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 7439fa2f740..79603bf5638 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -23,6 +23,7 @@ #include "X86GenInstrInfo.inc" namespace llvm { + class MachineInstrBuilder; class X86RegisterInfo; class X86Subtarget; @@ -564,6 +565,9 @@ private: /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const; + + /// Expand the MOVImmSExti8 pseudo-instructions. + bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index a4506610b7a..2788906a57a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -865,6 +865,8 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">, AssemblerPredicate<"Mode32Bit", "32-bit mode">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; +def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||" + "Subtarget->getFrameLowering()->hasFP(*MF)">; def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -878,6 +880,7 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">; def OptForSize : Predicate<"OptForSize">; +def OptForMinSize : Predicate<"OptForMinSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 00515dde556..61586784541 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -96,6 +96,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// copies. bool IsSplitCSR = false; + /// True if this function uses the red zone. + bool UsesRedZone = false; + private: /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. @@ -167,6 +170,9 @@ public: bool isSplitCSR() const { return IsSplitCSR; } void setIsSplitCSR(bool s) { IsSplitCSR = s; } + + bool getUsesRedZone() const { return UsesRedZone; } + void setUsesRedZone(bool V) { UsesRedZone = V; } }; } // End llvm namespace |