diff options
author | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2015-02-01 11:44:44 +0000 |
---|---|---|
committer | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2015-02-01 11:44:44 +0000 |
commit | bd57186c763f9c47a6da570f56aceabd830e32a0 (patch) | |
tree | e50de967a38d0d8a6a12df08fd6a6628bd7f7d49 /llvm/lib/Target/X86/X86FastISel.cpp | |
parent | af3c256ffa7110965cd05dcf49a9f4f70c7f225c (diff) | |
download | bcm5719-llvm-bd57186c763f9c47a6da570f56aceabd830e32a0.tar.gz bcm5719-llvm-bd57186c763f9c47a6da570f56aceabd830e32a0.zip |
[X86] Convert esp-relative movs of function arguments to pushes, step 2
This moves the transformation introduced in r223757 into a separate MI pass.
This allows it to cover many more cases (not only cases where there must be a
reserved call frame), and perform rudimentary call folding. It still doesn't
have a heuristic, so it is enabled only for optsize/minsize, with stack
alignment <= 8, where it ought to be a fairly clear win.
Differential Revision: http://reviews.llvm.org/D6789
llvm-svn: 227728
Diffstat (limited to 'llvm/lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 6716 |
1 files changed, 3358 insertions, 3358 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 227cacd24eb..220ba312197 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1,3358 +1,3358 @@ -//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the X86-specific support for the FastISel class. Much -// of the target-specific code is generated by tablegen in the file -// X86GenFastISel.inc, which is #included here. -// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86CallingConv.h" -#include "X86InstrBuilder.h" -#include "X86InstrInfo.h" -#include "X86MachineFunctionInfo.h" -#include "X86RegisterInfo.h" -#include "X86Subtarget.h" -#include "X86TargetMachine.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetOptions.h" -using namespace llvm; - -namespace { - -class X86FastISel final : public FastISel { - /// Subtarget - Keep a pointer to the X86Subtarget around so that we can - /// make the right decision when generating code for different targets. - const X86Subtarget *Subtarget; - - /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 - /// floating point ops. - /// When SSE is available, use it for f32 operations. - /// When SSE2 is available, use it for f64 operations. - bool X86ScalarSSEf64; - bool X86ScalarSSEf32; - -public: - explicit X86FastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) - : FastISel(funcInfo, libInfo) { - Subtarget = &TM.getSubtarget<X86Subtarget>(); - X86ScalarSSEf64 = Subtarget->hasSSE2(); - X86ScalarSSEf32 = Subtarget->hasSSE1(); - } - - bool fastSelectInstruction(const Instruction *I) override; - - /// \brief The specified machine instr operand is a vreg, and that - /// vreg is being provided by the specified load instruction. If possible, - /// try to fold the load as an operand to the instruction, returning true if - /// possible. - bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI) override; - - bool fastLowerArguments() override; - bool fastLowerCall(CallLoweringInfo &CLI) override; - bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; - -#include "X86GenFastISel.inc" - -private: - bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL); - - bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO, - unsigned &ResultReg); - - bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, - MachineMemOperand *MMO = nullptr, bool Aligned = false); - bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, - const X86AddressMode &AM, - MachineMemOperand *MMO = nullptr, bool Aligned = false); - - bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, - unsigned &ResultReg); - - bool X86SelectAddress(const Value *V, X86AddressMode &AM); - bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); - - bool X86SelectLoad(const Instruction *I); - - bool X86SelectStore(const Instruction *I); - - bool X86SelectRet(const Instruction *I); - - bool X86SelectCmp(const Instruction *I); - - bool X86SelectZExt(const Instruction *I); - - bool X86SelectBranch(const Instruction *I); - - bool X86SelectShift(const Instruction *I); - - bool X86SelectDivRem(const Instruction *I); - - bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I); - - bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I); - - bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I); - - bool X86SelectSelect(const Instruction *I); - - bool X86SelectTrunc(const Instruction *I); - - bool X86SelectFPExt(const Instruction *I); - bool X86SelectFPTrunc(const Instruction *I); - - const X86InstrInfo *getInstrInfo() const { - return getTargetMachine()->getSubtargetImpl()->getInstrInfo(); - } - const X86TargetMachine *getTargetMachine() const { - return static_cast<const X86TargetMachine *>(&TM); - } - - bool handleConstantAddresses(const Value *V, X86AddressMode &AM); - - unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT); - unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT); - unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT); - unsigned fastMaterializeConstant(const Constant *C) override; - - unsigned fastMaterializeAlloca(const AllocaInst *C) override; - - unsigned fastMaterializeFloatZero(const ConstantFP *CF) override; - - /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is - /// computed in an SSE register, not on the X87 floating point stack. - bool isScalarFPTypeInSSEReg(EVT VT) const { - return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 - (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 - } - - bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); - - bool IsMemcpySmall(uint64_t Len); - - bool TryEmitSmallMemcpy(X86AddressMode DestAM, - X86AddressMode SrcAM, uint64_t Len); - - bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, - const Value *Cond); -}; - -} // end anonymous namespace. - -static std::pair<X86::CondCode, bool> -getX86ConditionCode(CmpInst::Predicate Predicate) { - X86::CondCode CC = X86::COND_INVALID; - bool NeedSwap = false; - switch (Predicate) { - default: break; - // Floating-point Predicates - case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; - case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through - case CmpInst::FCMP_OGT: CC = X86::COND_A; break; - case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through - case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; - case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through - case CmpInst::FCMP_ULT: CC = X86::COND_B; break; - case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through - case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; - case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; - case CmpInst::FCMP_UNO: CC = X86::COND_P; break; - case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; - case CmpInst::FCMP_OEQ: // fall-through - case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; - - // Integer Predicates - case CmpInst::ICMP_EQ: CC = X86::COND_E; break; - case CmpInst::ICMP_NE: CC = X86::COND_NE; break; - case CmpInst::ICMP_UGT: CC = X86::COND_A; break; - case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; - case CmpInst::ICMP_ULT: CC = X86::COND_B; break; - case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; - case CmpInst::ICMP_SGT: CC = X86::COND_G; break; - case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; - case CmpInst::ICMP_SLT: CC = X86::COND_L; break; - case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; - } - - return std::make_pair(CC, NeedSwap); -} - -static std::pair<unsigned, bool> -getX86SSEConditionCode(CmpInst::Predicate Predicate) { - unsigned CC; - bool NeedSwap = false; - - // SSE Condition code mapping: - // 0 - EQ - // 1 - LT - // 2 - LE - // 3 - UNORD - // 4 - NEQ - // 5 - NLT - // 6 - NLE - // 7 - ORD - switch (Predicate) { - default: llvm_unreachable("Unexpected predicate"); - case CmpInst::FCMP_OEQ: CC = 0; break; - case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through - case CmpInst::FCMP_OLT: CC = 1; break; - case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through - case CmpInst::FCMP_OLE: CC = 2; break; - case CmpInst::FCMP_UNO: CC = 3; break; - case CmpInst::FCMP_UNE: CC = 4; break; - case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through - case CmpInst::FCMP_UGE: CC = 5; break; - case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through - case CmpInst::FCMP_UGT: CC = 6; break; - case CmpInst::FCMP_ORD: CC = 7; break; - case CmpInst::FCMP_UEQ: - case CmpInst::FCMP_ONE: CC = 8; break; - } - - return std::make_pair(CC, NeedSwap); -} - -/// \brief Check if it is possible to fold the condition from the XALU intrinsic -/// into the user. The condition code will only be updated on success. -bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, - const Value *Cond) { - if (!isa<ExtractValueInst>(Cond)) - return false; - - const auto *EV = cast<ExtractValueInst>(Cond); - if (!isa<IntrinsicInst>(EV->getAggregateOperand())) - return false; - - const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); - MVT RetVT; - const Function *Callee = II->getCalledFunction(); - Type *RetTy = - cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); - if (!isTypeLegal(RetTy, RetVT)) - return false; - - if (RetVT != MVT::i32 && RetVT != MVT::i64) - return false; - - X86::CondCode TmpCC; - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::sadd_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; - case Intrinsic::uadd_with_overflow: - case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; - } - - // Check if both instructions are in the same basic block. - if (II->getParent() != I->getParent()) - return false; - - // Make sure nothing is in the way - BasicBlock::const_iterator Start = I; - BasicBlock::const_iterator End = II; - for (auto Itr = std::prev(Start); Itr != End; --Itr) { - // We only expect extractvalue instructions between the intrinsic and the - // instruction to be selected. - if (!isa<ExtractValueInst>(Itr)) - return false; - - // Check that the extractvalue operand comes from the intrinsic. - const auto *EVI = cast<ExtractValueInst>(Itr); - if (EVI->getAggregateOperand() != II) - return false; - } - - CC = TmpCC; - return true; -} - -bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { - EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); - if (evt == MVT::Other || !evt.isSimple()) - // Unhandled type. Halt "fast" selection and bail. - return false; - - VT = evt.getSimpleVT(); - // For now, require SSE/SSE2 for performing floating-point operations, - // since x87 requires additional work. - if (VT == MVT::f64 && !X86ScalarSSEf64) - return false; - if (VT == MVT::f32 && !X86ScalarSSEf32) - return false; - // Similarly, no f80 support yet. - if (VT == MVT::f80) - return false; - // We only handle legal types. For example, on x86-32 the instruction - // selector contains all of the 64-bit instructions from x86-64, - // under the assumption that i64 won't be used if the target doesn't - // support it. - return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); -} - -#include "X86GenCallingConv.inc" - -/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. -/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. -/// Return true and the result register by reference if it is possible. -bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, - MachineMemOperand *MMO, unsigned &ResultReg) { - // Get opcode and regclass of the output for the given load instruction. - unsigned Opc = 0; - const TargetRegisterClass *RC = nullptr; - switch (VT.getSimpleVT().SimpleTy) { - default: return false; - case MVT::i1: - case MVT::i8: - Opc = X86::MOV8rm; - RC = &X86::GR8RegClass; - break; - case MVT::i16: - Opc = X86::MOV16rm; - RC = &X86::GR16RegClass; - break; - case MVT::i32: - Opc = X86::MOV32rm; - RC = &X86::GR32RegClass; - break; - case MVT::i64: - // Must be in x86-64 mode. - Opc = X86::MOV64rm; - RC = &X86::GR64RegClass; - break; - case MVT::f32: - if (X86ScalarSSEf32) { - Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; - RC = &X86::FR32RegClass; - } else { - Opc = X86::LD_Fp32m; - RC = &X86::RFP32RegClass; - } - break; - case MVT::f64: - if (X86ScalarSSEf64) { - Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; - RC = &X86::FR64RegClass; - } else { - Opc = X86::LD_Fp64m; - RC = &X86::RFP64RegClass; - } - break; - case MVT::f80: - // No f80 support yet. - return false; - } - - ResultReg = createResultReg(RC); - MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); - addFullAddress(MIB, AM); - if (MMO) - MIB->addMemOperand(*FuncInfo.MF, MMO); - return true; -} - -/// X86FastEmitStore - Emit a machine instruction to store a value Val of -/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr -/// and a displacement offset, or a GlobalAddress, -/// i.e. V. Return true if it is possible. -bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, - const X86AddressMode &AM, - MachineMemOperand *MMO, bool Aligned) { - // Get opcode and regclass of the output for the given store instruction. - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - case MVT::f80: // No f80 support yet. - default: return false; - case MVT::i1: { - // Mask out all but lowest bit. - unsigned AndResult = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::AND8ri), AndResult) - .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1); - ValReg = AndResult; - } - // FALLTHROUGH, handling i1 as i8. - case MVT::i8: Opc = X86::MOV8mr; break; - case MVT::i16: Opc = X86::MOV16mr; break; - case MVT::i32: Opc = X86::MOV32mr; break; - case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. - case MVT::f32: - Opc = X86ScalarSSEf32 ? - (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; - break; - case MVT::f64: - Opc = X86ScalarSSEf64 ? - (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; - break; - case MVT::v4f32: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; - else - Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; - break; - case MVT::v2f64: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr; - else - Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr; - break; - case MVT::v4i32: - case MVT::v2i64: - case MVT::v8i16: - case MVT::v16i8: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr; - else - Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; - break; - } - - MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); - addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); - if (MMO) - MIB->addMemOperand(*FuncInfo.MF, MMO); - - return true; -} - -bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM, - MachineMemOperand *MMO, bool Aligned) { - // Handle 'null' like i32/i64 0. - if (isa<ConstantPointerNull>(Val)) - Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); - - // If this is a store of a simple constant, fold the constant into the store. - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { - unsigned Opc = 0; - bool Signed = true; - switch (VT.getSimpleVT().SimpleTy) { - default: break; - case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. - case MVT::i8: Opc = X86::MOV8mi; break; - case MVT::i16: Opc = X86::MOV16mi; break; - case MVT::i32: Opc = X86::MOV32mi; break; - case MVT::i64: - // Must be a 32-bit sign extended value. - if (isInt<32>(CI->getSExtValue())) - Opc = X86::MOV64mi32; - break; - } - - if (Opc) { - MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); - addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() - : CI->getZExtValue()); - if (MMO) - MIB->addMemOperand(*FuncInfo.MF, MMO); - return true; - } - } - - unsigned ValReg = getRegForValue(Val); - if (ValReg == 0) - return false; - - bool ValKill = hasTrivialKill(Val); - return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned); -} - -/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of -/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. -/// ISD::SIGN_EXTEND). -bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, - unsigned Src, EVT SrcVT, - unsigned &ResultReg) { - unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, - Src, /*TODO: Kill=*/false); - if (RR == 0) - return false; - - ResultReg = RR; - return true; -} - -bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { - // Handle constant address. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // Can't handle alternate code models yet. - if (TM.getCodeModel() != CodeModel::Small) - return false; - - // Can't handle TLS yet. - if (GV->isThreadLocal()) - return false; - - // RIP-relative addresses can't have additional register operands, so if - // we've already folded stuff into the addressing mode, just force the - // global value into its own register, which we can use as the basereg. - if (!Subtarget->isPICStyleRIPRel() || - (AM.Base.Reg == 0 && AM.IndexReg == 0)) { - // Okay, we've committed to selecting this global. Set up the address. - AM.GV = GV; - - // Allow the subtarget to classify the global. - unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); - - // If this reference is relative to the pic base, set it now. - if (isGlobalRelativeToPICBase(GVFlags)) { - // FIXME: How do we know Base.Reg is free?? - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } - - // Unless the ABI requires an extra load, return a direct reference to - // the global. - if (!isGlobalStubReference(GVFlags)) { - if (Subtarget->isPICStyleRIPRel()) { - // Use rip-relative addressing if we can. Above we verified that the - // base and index registers are unused. - assert(AM.Base.Reg == 0 && AM.IndexReg == 0); - AM.Base.Reg = X86::RIP; - } - AM.GVOpFlags = GVFlags; - return true; - } - - // Ok, we need to do a load from a stub. If we've already loaded from - // this stub, reuse the loaded pointer, otherwise emit the load now. - DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V); - unsigned LoadReg; - if (I != LocalValueMap.end() && I->second != 0) { - LoadReg = I->second; - } else { - // Issue load from stub. - unsigned Opc = 0; - const TargetRegisterClass *RC = nullptr; - X86AddressMode StubAM; - StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = GV; - StubAM.GVOpFlags = GVFlags; - - // Prepare for inserting code in the local-value area. - SavePoint SaveInsertPt = enterLocalValueArea(); - - if (TLI.getPointerTy() == MVT::i64) { - Opc = X86::MOV64rm; - RC = &X86::GR64RegClass; - - if (Subtarget->isPICStyleRIPRel()) - StubAM.Base.Reg = X86::RIP; - } else { - Opc = X86::MOV32rm; - RC = &X86::GR32RegClass; - } - - LoadReg = createResultReg(RC); - MachineInstrBuilder LoadMI = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg); - addFullAddress(LoadMI, StubAM); - - // Ok, back to normal mode. - leaveLocalValueArea(SaveInsertPt); - - // Prevent loading GV stub multiple times in same MBB. - LocalValueMap[V] = LoadReg; - } - - // Now construct the final address. Note that the Disp, Scale, - // and Index values may already be set here. - AM.Base.Reg = LoadReg; - AM.GV = nullptr; - return true; - } - } - - // If all else fails, try to materialize the value in a register. - if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { - if (AM.Base.Reg == 0) { - AM.Base.Reg = getRegForValue(V); - return AM.Base.Reg != 0; - } - if (AM.IndexReg == 0) { - assert(AM.Scale == 1 && "Scale with no index!"); - AM.IndexReg = getRegForValue(V); - return AM.IndexReg != 0; - } - } - - return false; -} - -/// X86SelectAddress - Attempt to fill in an address from the given value. -/// -bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { - SmallVector<const Value *, 32> GEPs; -redo_gep: - const User *U = nullptr; - unsigned Opcode = Instruction::UserOp1; - if (const Instruction *I = dyn_cast<Instruction>(V)) { - // Don't walk into other basic blocks; it's possible we haven't - // visited them yet, so the instructions may not yet be assigned - // virtual registers. - if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || - FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { - Opcode = I->getOpcode(); - U = I; - } - } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { - Opcode = C->getOpcode(); - U = C; - } - - if (PointerType *Ty = dyn_cast<PointerType>(V->getType())) - if (Ty->getAddressSpace() > 255) - // Fast instruction selection doesn't support the special - // address spaces. - return false; - - switch (Opcode) { - default: break; - case Instruction::BitCast: - // Look past bitcasts. - return X86SelectAddress(U->getOperand(0), AM); - - case Instruction::IntToPtr: - // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return X86SelectAddress(U->getOperand(0), AM); - break; - - case Instruction::PtrToInt: - // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return X86SelectAddress(U->getOperand(0), AM); - break; - - case Instruction::Alloca: { - // Do static allocas. - const AllocaInst *A = cast<AllocaInst>(V); - DenseMap<const AllocaInst *, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(A); - if (SI != FuncInfo.StaticAllocaMap.end()) { - AM.BaseType = X86AddressMode::FrameIndexBase; - AM.Base.FrameIndex = SI->second; - return true; - } - break; - } - - case Instruction::Add: { - // Adds of constants are common and easy enough. - if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { - uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); - // They have to fit in the 32-bit signed displacement field though. - if (isInt<32>(Disp)) { - AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM); - } - } - break; - } - - case Instruction::GetElementPtr: { - X86AddressMode SavedAM = AM; - - // Pattern-match simple GEPs. - uint64_t Disp = (int32_t)AM.Disp; - unsigned IndexReg = AM.IndexReg; - unsigned Scale = AM.Scale; - gep_type_iterator GTI = gep_type_begin(U); - // Iterate through the indices, folding what we can. Constants can be - // folded, and one dynamic index can be handled, if the scale is supported. - for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); - i != e; ++i, ++GTI) { - const Value *Op = *i; - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - const StructLayout *SL = DL.getStructLayout(STy); - Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); - continue; - } - - // A array/variable index is always of the form i*S where S is the - // constant scale size. See if we can push the scale into immediates. - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); - for (;;) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { - // Constant-offset addressing. - Disp += CI->getSExtValue() * S; - break; - } - if (canFoldAddIntoGEP(U, Op)) { - // A compatible add with a constant operand. Fold the constant. - ConstantInt *CI = - cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); - Disp += CI->getSExtValue() * S; - // Iterate on the other operand. - Op = cast<AddOperator>(Op)->getOperand(0); - continue; - } - if (IndexReg == 0 && - (!AM.GV || !Subtarget->isPICStyleRIPRel()) && - (S == 1 || S == 2 || S == 4 || S == 8)) { - // Scaled-index addressing. - Scale = S; - IndexReg = getRegForGEPIndex(Op).first; - if (IndexReg == 0) - return false; - break; - } - // Unsupported. - goto unsupported_gep; - } - } - - // Check for displacement overflow. - if (!isInt<32>(Disp)) - break; - - AM.IndexReg = IndexReg; - AM.Scale = Scale; - AM.Disp = (uint32_t)Disp; - GEPs.push_back(V); - - if (const GetElementPtrInst *GEP = - dyn_cast<GetElementPtrInst>(U->getOperand(0))) { - // Ok, the GEP indices were covered by constant-offset and scaled-index - // addressing. Update the address state and move on to examining the base. - V = GEP; - goto redo_gep; - } else if (X86SelectAddress(U->getOperand(0), AM)) { - return true; - } - - // If we couldn't merge the gep value into this addr mode, revert back to - // our address and just match the value instead of completely failing. - AM = SavedAM; - - for (SmallVectorImpl<const Value *>::reverse_iterator - I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I) - if (handleConstantAddresses(*I, AM)) - return true; - - return false; - unsupported_gep: - // Ok, the GEP indices weren't all covered. - break; - } - } - - return handleConstantAddresses(V, AM); -} - -/// X86SelectCallAddress - Attempt to fill in an address from the given value. -/// -bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { - const User *U = nullptr; - unsigned Opcode = Instruction::UserOp1; - const Instruction *I = dyn_cast<Instruction>(V); - // Record if the value is defined in the same basic block. - // - // This information is crucial to know whether or not folding an - // operand is valid. - // Indeed, FastISel generates or reuses a virtual register for all - // operands of all instructions it selects. Obviously, the definition and - // its uses must use the same virtual register otherwise the produced - // code is incorrect. - // Before instruction selection, FunctionLoweringInfo::set sets the virtual - // registers for values that are alive across basic blocks. This ensures - // that the values are consistently set between across basic block, even - // if different instruction selection mechanisms are used (e.g., a mix of - // SDISel and FastISel). - // For values local to a basic block, the instruction selection process - // generates these virtual registers with whatever method is appropriate - // for its needs. In particular, FastISel and SDISel do not share the way - // local virtual registers are set. - // Therefore, this is impossible (or at least unsafe) to share values - // between basic blocks unless they use the same instruction selection - // method, which is not guarantee for X86. - // Moreover, things like hasOneUse could not be used accurately, if we - // allow to reference values across basic blocks whereas they are not - // alive across basic blocks initially. - bool InMBB = true; - if (I) { - Opcode = I->getOpcode(); - U = I; - InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); - } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { - Opcode = C->getOpcode(); - U = C; - } - - switch (Opcode) { - default: break; - case Instruction::BitCast: - // Look past bitcasts if its operand is in the same BB. - if (InMBB) - return X86SelectCallAddress(U->getOperand(0), AM); - break; - - case Instruction::IntToPtr: - // Look past no-op inttoptrs if its operand is in the same BB. - if (InMBB && - TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return X86SelectCallAddress(U->getOperand(0), AM); - break; - - case Instruction::PtrToInt: - // Look past no-op ptrtoints if its operand is in the same BB. - if (InMBB && - TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return X86SelectCallAddress(U->getOperand(0), AM); - break; - } - - // Handle constant address. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // Can't handle alternate code models yet. - if (TM.getCodeModel() != CodeModel::Small) - return false; - - // RIP-relative addresses can't have additional register operands. - if (Subtarget->isPICStyleRIPRel() && - (AM.Base.Reg != 0 || AM.IndexReg != 0)) - return false; - - // Can't handle DLL Import. - if (GV->hasDLLImportStorageClass()) - return false; - - // Can't handle TLS. - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->isThreadLocal()) - return false; - - // Okay, we've committed to selecting this global. Set up the basic address. - AM.GV = GV; - - // No ABI requires an extra load for anything other than DLLImport, which - // we rejected above. Return a direct reference to the global. - if (Subtarget->isPICStyleRIPRel()) { - // Use rip-relative addressing if we can. Above we verified that the - // base and index registers are unused. - assert(AM.Base.Reg == 0 && AM.IndexReg == 0); - AM.Base.Reg = X86::RIP; - } else if (Subtarget->isPICStyleStubPIC()) { - AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; - } else if (Subtarget->isPICStyleGOT()) { - AM.GVOpFlags = X86II::MO_GOTOFF; - } - - return true; - } - - // If all else fails, try to materialize the value in a register. - if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { - if (AM.Base.Reg == 0) { - AM.Base.Reg = getRegForValue(V); - return AM.Base.Reg != 0; - } - if (AM.IndexReg == 0) { - assert(AM.Scale == 1 && "Scale with no index!"); - AM.IndexReg = getRegForValue(V); - return AM.IndexReg != 0; - } - } - - return false; -} - - -/// X86SelectStore - Select and emit code to implement store instructions. -bool X86FastISel::X86SelectStore(const Instruction *I) { - // Atomic stores need special handling. - const StoreInst *S = cast<StoreInst>(I); - - if (S->isAtomic()) - return false; - - const Value *Val = S->getValueOperand(); - const Value *Ptr = S->getPointerOperand(); - - MVT VT; - if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) - return false; - - unsigned Alignment = S->getAlignment(); - unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = ABIAlignment; - bool Aligned = Alignment >= ABIAlignment; - - X86AddressMode AM; - if (!X86SelectAddress(Ptr, AM)) - return false; - - return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); -} - -/// X86SelectRet - Select and emit code to implement ret instructions. -bool X86FastISel::X86SelectRet(const Instruction *I) { - const ReturnInst *Ret = cast<ReturnInst>(I); - const Function &F = *I->getParent()->getParent(); - const X86MachineFunctionInfo *X86MFInfo = - FuncInfo.MF->getInfo<X86MachineFunctionInfo>(); - - if (!FuncInfo.CanLowerReturn) - return false; - - CallingConv::ID CC = F.getCallingConv(); - if (CC != CallingConv::C && - CC != CallingConv::Fast && - CC != CallingConv::X86_FastCall && - CC != CallingConv::X86_64_SysV) - return false; - - if (Subtarget->isCallingConvWin64(CC)) - return false; - - // Don't handle popping bytes on return for now. - if (X86MFInfo->getBytesToPopOnReturn() != 0) - return false; - - // fastcc with -tailcallopt is intended to provide a guaranteed - // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) - return false; - - // Let SDISel handle vararg functions. - if (F.isVarArg()) - return false; - - // Build a list of return value registers. - SmallVector<unsigned, 4> RetRegs; - - if (Ret->getNumOperands() > 0) { - SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); - - // Analyze operands of the call, assigning locations to each operand. - SmallVector<CCValAssign, 16> ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC_X86); - - const Value *RV = Ret->getOperand(0); - unsigned Reg = getRegForValue(RV); - if (Reg == 0) - return false; - - // Only handle a single return value for now. - if (ValLocs.size() != 1) - return false; - - CCValAssign &VA = ValLocs[0]; - - // Don't bother handling odd stuff for now. - if (VA.getLocInfo() != CCValAssign::Full) - return false; - // Only handle register returns for now. - if (!VA.isRegLoc()) - return false; - - // The calling-convention tables for x87 returns don't tell - // the whole story. - if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) - return false; - - unsigned SrcReg = Reg + VA.getValNo(); - EVT SrcVT = TLI.getValueType(RV->getType()); - EVT DstVT = VA.getValVT(); - // Special handling for extended integers. - if (SrcVT != DstVT) { - if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16) - return false; - - if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) - return false; - - assert(DstVT == MVT::i32 && "X86 should always ext to i32"); - - if (SrcVT == MVT::i1) { - if (Outs[0].Flags.isSExt()) - return false; - SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); - SrcVT = MVT::i8; - } - unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND : - ISD::SIGN_EXTEND; - SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, - SrcReg, /*TODO: Kill=*/false); - } - - // Make the copy. - unsigned DstReg = VA.getLocReg(); - const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); - // Avoid a cross-class copy. This is very unlikely. - if (!SrcRC->contains(DstReg)) - return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); - - // Add register to return instruction. - RetRegs.push_back(VA.getLocReg()); - } - - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. We saved the argument into - // a virtual register in the entry block, so now we copy the value out - // and into %rax. We also do the same with %eax for Win32. - if (F.hasStructRetAttr() && - (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) { - unsigned Reg = X86MFInfo->getSRetReturnReg(); - assert(Reg && - "SRetReturnReg should have been set in LowerFormalArguments()!"); - unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), RetReg).addReg(Reg); - RetRegs.push_back(RetReg); - } - - // Now emit the RET. - MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); - return true; -} - -/// X86SelectLoad - Select and emit code to implement load instructions. -/// -bool X86FastISel::X86SelectLoad(const Instruction *I) { - const LoadInst *LI = cast<LoadInst>(I); - - // Atomic loads need special handling. - if (LI->isAtomic()) - return false; - - MVT VT; - if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) - return false; - - const Value *Ptr = LI->getPointerOperand(); - - X86AddressMode AM; - if (!X86SelectAddress(Ptr, AM)) - return false; - - unsigned ResultReg = 0; - if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg)) - return false; - - updateValueMap(I, ResultReg); - return true; -} - -static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { - bool HasAVX = Subtarget->hasAVX(); - bool X86ScalarSSEf32 = Subtarget->hasSSE1(); - bool X86ScalarSSEf64 = Subtarget->hasSSE2(); - - switch (VT.getSimpleVT().SimpleTy) { - default: return 0; - case MVT::i8: return X86::CMP8rr; - case MVT::i16: return X86::CMP16rr; - case MVT::i32: return X86::CMP32rr; - case MVT::i64: return X86::CMP64rr; - case MVT::f32: - return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0; - case MVT::f64: - return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0; - } -} - -/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS -/// of the comparison, return an opcode that works for the compare (e.g. -/// CMP32ri) otherwise return 0. -static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { - switch (VT.getSimpleVT().SimpleTy) { - // Otherwise, we can't fold the immediate into this comparison. - default: return 0; - case MVT::i8: return X86::CMP8ri; - case MVT::i16: return X86::CMP16ri; - case MVT::i32: return X86::CMP32ri; - case MVT::i64: - // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext - // field. - if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) - return X86::CMP64ri32; - return 0; - } -} - -bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, - EVT VT, DebugLoc CurDbgLoc) { - unsigned Op0Reg = getRegForValue(Op0); - if (Op0Reg == 0) return false; - - // Handle 'null' like i32/i64 0. - if (isa<ConstantPointerNull>(Op1)) - Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext())); - - // We have two options: compare with register or immediate. If the RHS of - // the compare is an immediate that we can fold into this compare, use - // CMPri, otherwise use CMPrr. - if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { - if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc)) - .addReg(Op0Reg) - .addImm(Op1C->getSExtValue()); - return true; - } - } - - unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); - if (CompareOpc == 0) return false; - - unsigned Op1Reg = getRegForValue(Op1); - if (Op1Reg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc)) - .addReg(Op0Reg) - .addReg(Op1Reg); - - return true; -} - -bool X86FastISel::X86SelectCmp(const Instruction *I) { - const CmpInst *CI = cast<CmpInst>(I); - - MVT VT; - if (!isTypeLegal(I->getOperand(0)->getType(), VT)) - return false; - - // Try to optimize or fold the cmp. - CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); - unsigned ResultReg = 0; - switch (Predicate) { - default: break; - case CmpInst::FCMP_FALSE: { - ResultReg = createResultReg(&X86::GR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), - ResultReg); - ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, - X86::sub_8bit); - if (!ResultReg) - return false; - break; - } - case CmpInst::FCMP_TRUE: { - ResultReg = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), - ResultReg).addImm(1); - break; - } - } - - if (ResultReg) { - updateValueMap(I, ResultReg); - return true; - } - - const Value *LHS = CI->getOperand(0); - const Value *RHS = CI->getOperand(1); - - // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. - // We don't have to materialize a zero constant for this case and can just use - // %x again on the RHS. - if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { - const auto *RHSC = dyn_cast<ConstantFP>(RHS); - if (RHSC && RHSC->isNullValue()) - RHS = LHS; - } - - // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. - static unsigned SETFOpcTable[2][3] = { - { X86::SETEr, X86::SETNPr, X86::AND8rr }, - { X86::SETNEr, X86::SETPr, X86::OR8rr } - }; - unsigned *SETFOpc = nullptr; - switch (Predicate) { - default: break; - case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; - case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; - } - - ResultReg = createResultReg(&X86::GR8RegClass); - if (SETFOpc) { - if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) - return false; - - unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); - unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), - FlagReg1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), - FlagReg2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]), - ResultReg).addReg(FlagReg1).addReg(FlagReg2); - updateValueMap(I, ResultReg); - return true; - } - - X86::CondCode CC; - bool SwapArgs; - std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); - assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); - unsigned Opc = X86::getSETFromCond(CC); - - if (SwapArgs) - std::swap(LHS, RHS); - - // Emit a compare of LHS/RHS. - if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) - return false; - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); - updateValueMap(I, ResultReg); - return true; -} - -bool X86FastISel::X86SelectZExt(const Instruction *I) { - EVT DstVT = TLI.getValueType(I->getType()); - if (!TLI.isTypeLegal(DstVT)) - return false; - - unsigned ResultReg = getRegForValue(I->getOperand(0)); - if (ResultReg == 0) - return false; - - // Handle zero-extension from i1 to i8, which is common. - MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType()); - if (SrcVT.SimpleTy == MVT::i1) { - // Set the high bits to zero. - ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); - SrcVT = MVT::i8; - - if (ResultReg == 0) - return false; - } - - if (DstVT == MVT::i64) { - // Handle extension to 64-bits via sub-register shenanigans. - unsigned MovInst; - - switch (SrcVT.SimpleTy) { - case MVT::i8: MovInst = X86::MOVZX32rr8; break; - case MVT::i16: MovInst = X86::MOVZX32rr16; break; - case MVT::i32: MovInst = X86::MOV32rr; break; - default: llvm_unreachable("Unexpected zext to i64 source type"); - } - - unsigned Result32 = createResultReg(&X86::GR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32) - .addReg(ResultReg); - - ResultReg = createResultReg(&X86::GR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), - ResultReg) - .addImm(0).addReg(Result32).addImm(X86::sub_32bit); - } else if (DstVT != MVT::i8) { - ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, - ResultReg, /*Kill=*/true); - if (ResultReg == 0) - return false; - } - - updateValueMap(I, ResultReg); - return true; -} - -bool X86FastISel::X86SelectBranch(const Instruction *I) { - // Unconditional branches are selected by tablegen-generated code. - // Handle a conditional branch. - const BranchInst *BI = cast<BranchInst>(I); - MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; - MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - - // Fold the common case of a conditional branch with a comparison - // in the same block (values defined on other blocks may not have - // initialized registers). - X86::CondCode CC; - if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - if (CI->hasOneUse() && CI->getParent() == I->getParent()) { - EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); - - // Try to optimize or fold the cmp. - CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); - switch (Predicate) { - default: break; - case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true; - case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true; - } - - const Value *CmpLHS = CI->getOperand(0); - const Value *CmpRHS = CI->getOperand(1); - - // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, - // 0.0. - // We don't have to materialize a zero constant for this case and can just - // use %x again on the RHS. - if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { - const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); - if (CmpRHSC && CmpRHSC->isNullValue()) - CmpRHS = CmpLHS; - } - - // Try to take advantage of fallthrough opportunities. - if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { - std::swap(TrueMBB, FalseMBB); - Predicate = CmpInst::getInversePredicate(Predicate); - } - - // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition - // code check. Instead two branch instructions are required to check all - // the flags. First we change the predicate to a supported condition code, - // which will be the first branch. Later one we will emit the second - // branch. - bool NeedExtraBranch = false; - switch (Predicate) { - default: break; - case CmpInst::FCMP_OEQ: - std::swap(TrueMBB, FalseMBB); // fall-through - case CmpInst::FCMP_UNE: - NeedExtraBranch = true; - Predicate = CmpInst::FCMP_ONE; - break; - } - - bool SwapArgs; - unsigned BranchOpc; - std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); - assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); - - BranchOpc = X86::GetCondBranchFromCond(CC); - if (SwapArgs) - std::swap(CmpLHS, CmpRHS); - - // Emit a compare of the LHS and RHS, setting the flags. - if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc())) - return false; - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) - .addMBB(TrueMBB); - - // X86 requires a second branch to handle UNE (and OEQ, which is mapped - // to UNE above). - if (NeedExtraBranch) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1)) - .addMBB(TrueMBB); - } - - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); - - // Emits an unconditional branch to the FalseBB, obtains the branch - // weight, and adds it to the successor list. - fastEmitBranch(FalseMBB, DbgLoc); - - return true; - } - } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { - // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which - // typically happen for _Bool and C++ bools. - MVT SourceVT; - if (TI->hasOneUse() && TI->getParent() == I->getParent() && - isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { - unsigned TestOpc = 0; - switch (SourceVT.SimpleTy) { - default: break; - case MVT::i8: TestOpc = X86::TEST8ri; break; - case MVT::i16: TestOpc = X86::TEST16ri; break; - case MVT::i32: TestOpc = X86::TEST32ri; break; - case MVT::i64: TestOpc = X86::TEST64ri32; break; - } - if (TestOpc) { - unsigned OpReg = getRegForValue(TI->getOperand(0)); - if (OpReg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc)) - .addReg(OpReg).addImm(1); - - unsigned JmpOpc = X86::JNE_1; - if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { - std::swap(TrueMBB, FalseMBB); - JmpOpc = X86::JE_1; - } - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc)) - .addMBB(TrueMBB); - fastEmitBranch(FalseMBB, DbgLoc); - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); - return true; - } - } - } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned TmpReg = getRegForValue(BI->getCondition()); - if (TmpReg == 0) - return false; - - unsigned BranchOpc = X86::GetCondBranchFromCond(CC); - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) - .addMBB(TrueMBB); - fastEmitBranch(FalseMBB, DbgLoc); - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); - return true; - } - - // Otherwise do a clumsy setcc and re-test it. - // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used - // in an explicit cast, so make sure to handle that correctly. - unsigned OpReg = getRegForValue(BI->getCondition()); - if (OpReg == 0) return false; - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(OpReg).addImm(1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) - .addMBB(TrueMBB); - fastEmitBranch(FalseMBB, DbgLoc); - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); - return true; -} - -bool X86FastISel::X86SelectShift(const Instruction *I) { - unsigned CReg = 0, OpReg = 0; - const TargetRegisterClass *RC = nullptr; - if (I->getType()->isIntegerTy(8)) { - CReg = X86::CL; - RC = &X86::GR8RegClass; - switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR8rCL; break; - case Instruction::AShr: OpReg = X86::SAR8rCL; break; - case Instruction::Shl: OpReg = X86::SHL8rCL; break; - default: return false; - } - } else if (I->getType()->isIntegerTy(16)) { - CReg = X86::CX; - RC = &X86::GR16RegClass; - switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR16rCL; break; - case Instruction::AShr: OpReg = X86::SAR16rCL; break; - case Instruction::Shl: OpReg = X86::SHL16rCL; break; - default: return false; - } - } else if (I->getType()->isIntegerTy(32)) { - CReg = X86::ECX; - RC = &X86::GR32RegClass; - switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR32rCL; break; - case Instruction::AShr: OpReg = X86::SAR32rCL; break; - case Instruction::Shl: OpReg = X86::SHL32rCL; break; - default: return false; - } - } else if (I->getType()->isIntegerTy(64)) { - CReg = X86::RCX; - RC = &X86::GR64RegClass; - switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR64rCL; break; - case Instruction::AShr: OpReg = X86::SAR64rCL; break; - case Instruction::Shl: OpReg = X86::SHL64rCL; break; - default: return false; - } - } else { - return false; - } - - MVT VT; - if (!isTypeLegal(I->getType(), VT)) - return false; - - unsigned Op0Reg = getRegForValue(I->getOperand(0)); - if (Op0Reg == 0) return false; - - unsigned Op1Reg = getRegForValue(I->getOperand(1)); - if (Op1Reg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), - CReg).addReg(Op1Reg); - - // The shift instruction uses X86::CL. If we defined a super-register - // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. - if (CReg != X86::CL) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::KILL), X86::CL) - .addReg(CReg, RegState::Kill); - - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) - .addReg(Op0Reg); - updateValueMap(I, ResultReg); - return true; -} - -bool X86FastISel::X86SelectDivRem(const Instruction *I) { - const static unsigned NumTypes = 4; // i8, i16, i32, i64 - const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem - const static bool S = true; // IsSigned - const static bool U = false; // !IsSigned - const static unsigned Copy = TargetOpcode::COPY; - // For the X86 DIV/IDIV instruction, in most cases the dividend - // (numerator) must be in a specific register pair highreg:lowreg, - // producing the quotient in lowreg and the remainder in highreg. - // For most data types, to set up the instruction, the dividend is - // copied into lowreg, and lowreg is sign-extended or zero-extended - // into highreg. The exception is i8, where the dividend is defined - // as a single register rather than a register pair, and we - // therefore directly sign-extend or zero-extend the dividend into - // lowreg, instead of copying, and ignore the highreg. - const static struct DivRemEntry { - // The following portion depends only on the data type. - const TargetRegisterClass *RC; - unsigned LowInReg; // low part of the register pair - unsigned HighInReg; // high part of the register pair - // The following portion depends on both the data type and the operation. - struct DivRemResult { - unsigned OpDivRem; // The specific DIV/IDIV opcode to use. - unsigned OpSignExtend; // Opcode for sign-extending lowreg into - // highreg, or copying a zero into highreg. - unsigned OpCopy; // Opcode for copying dividend into lowreg, or - // zero/sign-extending into lowreg for i8. - unsigned DivRemResultReg; // Register containing the desired result. - bool IsOpSigned; // Whether to use signed or unsigned form. - } ResultTable[NumOps]; - } OpTable[NumTypes] = { - { &X86::GR8RegClass, X86::AX, 0, { - { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv - { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem - { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv - { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem - } - }, // i8 - { &X86::GR16RegClass, X86::AX, X86::DX, { - { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv - { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem - { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv - { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem - } - }, // i16 - { &X86::GR32RegClass, X86::EAX, X86::EDX, { - { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv - { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem - { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv - { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem - } - }, // i32 - { &X86::GR64RegClass, X86::RAX, X86::RDX, { - { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv - { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem - { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv - { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem - } - }, // i64 - }; - - MVT VT; - if (!isTypeLegal(I->getType(), VT)) - return false; - - unsigned TypeIndex, OpIndex; - switch (VT.SimpleTy) { - default: return false; - case MVT::i8: TypeIndex = 0; break; - case MVT::i16: TypeIndex = 1; break; - case MVT::i32: TypeIndex = 2; break; - case MVT::i64: TypeIndex = 3; - if (!Subtarget->is64Bit()) - return false; - break; - } - - switch (I->getOpcode()) { - default: llvm_unreachable("Unexpected div/rem opcode"); - case Instruction::SDiv: OpIndex = 0; break; - case Instruction::SRem: OpIndex = 1; break; - case Instruction::UDiv: OpIndex = 2; break; - case Instruction::URem: OpIndex = 3; break; - } - - const DivRemEntry &TypeEntry = OpTable[TypeIndex]; - const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; - unsigned Op0Reg = getRegForValue(I->getOperand(0)); - if (Op0Reg == 0) - return false; - unsigned Op1Reg = getRegForValue(I->getOperand(1)); - if (Op1Reg == 0) - return false; - - // Move op0 into low-order input register. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); - // Zero-extend or sign-extend into high-order input register. - if (OpEntry.OpSignExtend) { - if (OpEntry.IsOpSigned) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(OpEntry.OpSignExtend)); - else { - unsigned Zero32 = createResultReg(&X86::GR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::MOV32r0), Zero32); - - // Copy the zero into the appropriate sub/super/identical physical - // register. Unfortunately the operations needed are not uniform enough - // to fit neatly into the table above. - if (VT.SimpleTy == MVT::i16) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Copy), TypeEntry.HighInReg) - .addReg(Zero32, 0, X86::sub_16bit); - } else if (VT.SimpleTy == MVT::i32) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Copy), TypeEntry.HighInReg) - .addReg(Zero32); - } else if (VT.SimpleTy == MVT::i64) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) - .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); - } - } - } - // Generate the DIV/IDIV instruction. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); - // For i8 remainder, we can't reference AH directly, as we'll end - // up with bogus copies like %R9B = COPY %AH. Reference AX - // instead to prevent AH references in a REX instruction. - // - // The current assumption of the fast register allocator is that isel - // won't generate explicit references to the GPR8_NOREX registers. If - // the allocator and/or the backend get enhanced to be more robust in - // that regard, this can be, and should be, removed. - unsigned ResultReg = 0; - if ((I->getOpcode() == Instruction::SRem || - I->getOpcode() == Instruction::URem) && - OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { - unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass); - unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Copy), SourceSuperReg).addReg(X86::AX); - - // Shift AX right by 8 bits instead of using AH. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri), - ResultSuperReg).addReg(SourceSuperReg).addImm(8); - - // Now reference the 8-bit subreg of the result. - ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, - /*Kill=*/true, X86::sub_8bit); - } - // Copy the result out of the physreg if we haven't already. - if (!ResultReg) { - ResultReg = createResultReg(TypeEntry.RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg) - .addReg(OpEntry.DivRemResultReg); - } - updateValueMap(I, ResultReg); - - return true; -} - -/// \brief Emit a conditional move instruction (if the are supported) to lower -/// the select. -bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { - // Check if the subtarget supports these instructions. - if (!Subtarget->hasCMov()) - return false; - - // FIXME: Add support for i8. - if (RetVT < MVT::i16 || RetVT > MVT::i64) - return false; - - const Value *Cond = I->getOperand(0); - const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); - bool NeedTest = true; - X86::CondCode CC = X86::COND_NE; - - // Optimize conditions coming from a compare if both instructions are in the - // same basic block (values defined in other basic blocks may not have - // initialized registers). - const auto *CI = dyn_cast<CmpInst>(Cond); - if (CI && (CI->getParent() == I->getParent())) { - CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); - - // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. - static unsigned SETFOpcTable[2][3] = { - { X86::SETNPr, X86::SETEr , X86::TEST8rr }, - { X86::SETPr, X86::SETNEr, X86::OR8rr } - }; - unsigned *SETFOpc = nullptr; - switch (Predicate) { - default: break; - case CmpInst::FCMP_OEQ: - SETFOpc = &SETFOpcTable[0][0]; - Predicate = CmpInst::ICMP_NE; - break; - case CmpInst::FCMP_UNE: - SETFOpc = &SETFOpcTable[1][0]; - Predicate = CmpInst::ICMP_NE; - break; - } - - bool NeedSwap; - std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); - assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); - - const Value *CmpLHS = CI->getOperand(0); - const Value *CmpRHS = CI->getOperand(1); - if (NeedSwap) - std::swap(CmpLHS, CmpRHS); - - EVT CmpVT = TLI.getValueType(CmpLHS->getType()); - // Emit a compare of the LHS and RHS, setting the flags. - if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) - return false; - - if (SETFOpc) { - unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); - unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), - FlagReg1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), - FlagReg2); - auto const &II = TII.get(SETFOpc[2]); - if (II.getNumDefs()) { - unsigned TmpReg = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg) - .addReg(FlagReg2).addReg(FlagReg1); - } else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(FlagReg2).addReg(FlagReg1); - } - } - NeedTest = false; - } else if (foldX86XALUIntrinsic(CC, I, Cond)) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned TmpReg = getRegForValue(Cond); - if (TmpReg == 0) - return false; - - NeedTest = false; - } - - if (NeedTest) { - // Selects operate on i1, however, CondReg is 8 bits width and may contain - // garbage. Indeed, only the less significant bit is supposed to be - // accurate. If we read more than the lsb, we may see non-zero values - // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for - // the select. This is achieved by performing TEST against 1. - unsigned CondReg = getRegForValue(Cond); - if (CondReg == 0) - return false; - bool CondIsKill = hasTrivialKill(Cond); - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); - } - - const Value *LHS = I->getOperand(1); - const Value *RHS = I->getOperand(2); - - unsigned RHSReg = getRegForValue(RHS); - bool RHSIsKill = hasTrivialKill(RHS); - - unsigned LHSReg = getRegForValue(LHS); - bool LHSIsKill = hasTrivialKill(LHS); - - if (!LHSReg || !RHSReg) - return false; - - unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); - unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, - LHSReg, LHSIsKill); - updateValueMap(I, ResultReg); - return true; -} - -/// \brief Emit SSE instructions to lower the select. -/// -/// Try to use SSE1/SSE2 instructions to simulate a select without branches. -/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary -/// SSE instructions are available. -bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { - // Optimize conditions coming from a compare if both instructions are in the - // same basic block (values defined in other basic blocks may not have - // initialized registers). - const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0)); - if (!CI || (CI->getParent() != I->getParent())) - return false; - - if (I->getType() != CI->getOperand(0)->getType() || - !((Subtarget->hasSSE1() && RetVT == MVT::f32) || - (Subtarget->hasSSE2() && RetVT == MVT::f64))) - return false; - - const Value *CmpLHS = CI->getOperand(0); - const Value *CmpRHS = CI->getOperand(1); - CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); - - // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. - // We don't have to materialize a zero constant for this case and can just use - // %x again on the RHS. - if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { - const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); - if (CmpRHSC && CmpRHSC->isNullValue()) - CmpRHS = CmpLHS; - } - - unsigned CC; - bool NeedSwap; - std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); - if (CC > 7) - return false; - - if (NeedSwap) - std::swap(CmpLHS, CmpRHS); - - static unsigned OpcTable[2][2][4] = { - { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, - { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, - { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, - { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } - }; - - bool HasAVX = Subtarget->hasAVX(); - unsigned *Opc = nullptr; - switch (RetVT.SimpleTy) { - default: return false; - case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; - case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; - } - - const Value *LHS = I->getOperand(1); - const Value *RHS = I->getOperand(2); - - unsigned LHSReg = getRegForValue(LHS); - bool LHSIsKill = hasTrivialKill(LHS); - - unsigned RHSReg = getRegForValue(RHS); - bool RHSIsKill = hasTrivialKill(RHS); - - unsigned CmpLHSReg = getRegForValue(CmpLHS); - bool CmpLHSIsKill = hasTrivialKill(CmpLHS); - - unsigned CmpRHSReg = getRegForValue(CmpRHS); - bool CmpRHSIsKill = hasTrivialKill(CmpRHS); - - if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) - return false; - - const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); - unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, - CmpRHSReg, CmpRHSIsKill, CC); - unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, - LHSReg, LHSIsKill); - unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, - RHSReg, RHSIsKill); - unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, - AndReg, /*IsKill=*/true); - updateValueMap(I, ResultReg); - return true; -} - -bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { - // These are pseudo CMOV instructions and will be later expanded into control- - // flow. - unsigned Opc; - switch (RetVT.SimpleTy) { - default: return false; - case MVT::i8: Opc = X86::CMOV_GR8; break; - case MVT::i16: Opc = X86::CMOV_GR16; break; - case MVT::i32: Opc = X86::CMOV_GR32; break; - case MVT::f32: Opc = X86::CMOV_FR32; break; - case MVT::f64: Opc = X86::CMOV_FR64; break; - } - - const Value *Cond = I->getOperand(0); - X86::CondCode CC = X86::COND_NE; - - // Optimize conditions coming from a compare if both instructions are in the - // same basic block (values defined in other basic blocks may not have - // initialized registers). - const auto *CI = dyn_cast<CmpInst>(Cond); - if (CI && (CI->getParent() == I->getParent())) { - bool NeedSwap; - std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); - if (CC > X86::LAST_VALID_COND) - return false; - - const Value *CmpLHS = CI->getOperand(0); - const Value *CmpRHS = CI->getOperand(1); - - if (NeedSwap) - std::swap(CmpLHS, CmpRHS); - - EVT CmpVT = TLI.getValueType(CmpLHS->getType()); - if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) - return false; - } else { - unsigned CondReg = getRegForValue(Cond); - if (CondReg == 0) - return false; - bool CondIsKill = hasTrivialKill(Cond); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); - } - - const Value *LHS = I->getOperand(1); - const Value *RHS = I->getOperand(2); - - unsigned LHSReg = getRegForValue(LHS); - bool LHSIsKill = hasTrivialKill(LHS); - - unsigned RHSReg = getRegForValue(RHS); - bool RHSIsKill = hasTrivialKill(RHS); - - if (!LHSReg || !RHSReg) - return false; - - const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); - - unsigned ResultReg = - fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC); - updateValueMap(I, ResultReg); - return true; -} - -bool X86FastISel::X86SelectSelect(const Instruction *I) { - MVT RetVT; - if (!isTypeLegal(I->getType(), RetVT)) - return false; - - // Check if we can fold the select. - if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) { - CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); - const Value *Opnd = nullptr; - switch (Predicate) { - default: break; - case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; - case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; - } - // No need for a select anymore - this is an unconditional move. - if (Opnd) { - unsigned OpReg = getRegForValue(Opnd); - if (OpReg == 0) - return false; - bool OpIsKill = hasTrivialKill(Opnd); - const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(OpReg, getKillRegState(OpIsKill)); - updateValueMap(I, ResultReg); - return true; - } - } - - // First try to use real conditional move instructions. - if (X86FastEmitCMoveSelect(RetVT, I)) - return true; - - // Try to use a sequence of SSE instructions to simulate a conditional move. - if (X86FastEmitSSESelect(RetVT, I)) - return true; - - // Fall-back to pseudo conditional move instructions, which will be later - // converted to control-flow. - if (X86FastEmitPseudoSelect(RetVT, I)) - return true; - - return false; -} - -bool X86FastISel::X86SelectFPExt(const Instruction *I) { - // fpext from float to double. - if (X86ScalarSSEf64 && - I->getType()->isDoubleTy()) { - const Value *V = I->getOperand(0); - if (V->getType()->isFloatTy()) { - unsigned OpReg = getRegForValue(V); - if (OpReg == 0) return false; - unsigned ResultReg = createResultReg(&X86::FR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::CVTSS2SDrr), ResultReg) - .addReg(OpReg); - updateValueMap(I, ResultReg); - return true; - } - } - - return false; -} - -bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { - if (X86ScalarSSEf64) { - if (I->getType()->isFloatTy()) { - const Value *V = I->getOperand(0); - if (V->getType()->isDoubleTy()) { - unsigned OpReg = getRegForValue(V); - if (OpReg == 0) return false; - unsigned ResultReg = createResultReg(&X86::FR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::CVTSD2SSrr), ResultReg) - .addReg(OpReg); - updateValueMap(I, ResultReg); - return true; - } - } - } - - return false; -} - -bool X86FastISel::X86SelectTrunc(const Instruction *I) { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); - - // This code only handles truncation to byte. - if (DstVT != MVT::i8 && DstVT != MVT::i1) - return false; - if (!TLI.isTypeLegal(SrcVT)) - return false; - - unsigned InputReg = getRegForValue(I->getOperand(0)); - if (!InputReg) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - if (SrcVT == MVT::i8) { - // Truncate from i8 to i1; no code needed. - updateValueMap(I, InputReg); - return true; - } - - if (!Subtarget->is64Bit()) { - // If we're on x86-32; we can't extract an i8 from a general register. - // First issue a copy to GR16_ABCD or GR32_ABCD. - const TargetRegisterClass *CopyRC = - (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass; - unsigned CopyReg = createResultReg(CopyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg); - InputReg = CopyReg; - } - - // Issue an extract_subreg. - unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, - InputReg, /*Kill=*/true, - X86::sub_8bit); - if (!ResultReg) - return false; - - updateValueMap(I, ResultReg); - return true; -} - -bool X86FastISel::IsMemcpySmall(uint64_t Len) { - return Len <= (Subtarget->is64Bit() ? 32 : 16); -} - -bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, - X86AddressMode SrcAM, uint64_t Len) { - - // Make sure we don't bloat code by inlining very large memcpy's. - if (!IsMemcpySmall(Len)) - return false; - - bool i64Legal = Subtarget->is64Bit(); - - // We don't care about alignment here since we just emit integer accesses. - while (Len) { - MVT VT; - if (Len >= 8 && i64Legal) - VT = MVT::i64; - else if (Len >= 4) - VT = MVT::i32; - else if (Len >= 2) - VT = MVT::i16; - else - VT = MVT::i8; - - unsigned Reg; - bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); - RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM); - assert(RV && "Failed to emit load or store??"); - - unsigned Size = VT.getSizeInBits()/8; - Len -= Size; - DestAM.Disp += Size; - SrcAM.Disp += Size; - } - - return true; -} - -bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { - // FIXME: Handle more intrinsics. - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::frameaddress: { - Type *RetTy = II->getCalledFunction()->getReturnType(); - - MVT VT; - if (!isTypeLegal(RetTy, VT)) - return false; - - unsigned Opc; - const TargetRegisterClass *RC = nullptr; - - switch (VT.SimpleTy) { - default: llvm_unreachable("Invalid result type for frameaddress."); - case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; - case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; - } - - // This needs to be set before we call getPtrSizedFrameRegister, otherwise - // we get the wrong frame register. - MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); - MFI->setFrameAddressIsTaken(true); - - const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); - unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*(FuncInfo.MF)); - assert(((FrameReg == X86::RBP && VT == MVT::i64) || - (FrameReg == X86::EBP && VT == MVT::i32)) && - "Invalid Frame Register!"); - - // Always make a copy of the frame register to to a vreg first, so that we - // never directly reference the frame register (the TwoAddressInstruction- - // Pass doesn't like that). - unsigned SrcReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg); - - // Now recursively load from the frame address. - // movq (%rbp), %rax - // movq (%rax), %rax - // movq (%rax), %rax - // ... - unsigned DestReg; - unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); - while (Depth--) { - DestReg = createResultReg(RC); - addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), DestReg), SrcReg); - SrcReg = DestReg; - } - - updateValueMap(II, SrcReg); - return true; - } - case Intrinsic::memcpy: { - const MemCpyInst *MCI = cast<MemCpyInst>(II); - // Don't handle volatile or variable length memcpys. - if (MCI->isVolatile()) - return false; - - if (isa<ConstantInt>(MCI->getLength())) { - // Small memcpy's are common enough that we want to do them - // without a call if possible. - uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue(); - if (IsMemcpySmall(Len)) { - X86AddressMode DestAM, SrcAM; - if (!X86SelectAddress(MCI->getRawDest(), DestAM) || - !X86SelectAddress(MCI->getRawSource(), SrcAM)) - return false; - TryEmitSmallMemcpy(DestAM, SrcAM, Len); - return true; - } - } - - unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; - if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth)) - return false; - - if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) - return false; - - return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2); - } - case Intrinsic::memset: { - const MemSetInst *MSI = cast<MemSetInst>(II); - - if (MSI->isVolatile()) - return false; - - unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; - if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth)) - return false; - - if (MSI->getDestAddressSpace() > 255) - return false; - - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); - } - case Intrinsic::stackprotector: { - // Emit code to store the stack guard onto the stack. - EVT PtrTy = TLI.getPointerTy(); - - const Value *Op1 = II->getArgOperand(0); // The guard's value. - const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1)); - - MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]); - - // Grab the frame index. - X86AddressMode AM; - if (!X86SelectAddress(Slot, AM)) return false; - if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; - return true; - } - case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); - X86AddressMode AM; - assert(DI->getAddress() && "Null address should be checked earlier!"); - if (!X86SelectAddress(DI->getAddress(), AM)) - return false; - const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); - // FIXME may need to add RegState::Debug to any registers produced, - // although ESP/EBP should be the only ones at the moment. - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM) - .addImm(0) - .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); - return true; - } - case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); - return true; - } - case Intrinsic::sqrt: { - if (!Subtarget->hasSSE1()) - return false; - - Type *RetTy = II->getCalledFunction()->getReturnType(); - - MVT VT; - if (!isTypeLegal(RetTy, VT)) - return false; - - // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT - // is not generated by FastISel yet. - // FIXME: Update this code once tablegen can handle it. - static const unsigned SqrtOpc[2][2] = { - {X86::SQRTSSr, X86::VSQRTSSr}, - {X86::SQRTSDr, X86::VSQRTSDr} - }; - bool HasAVX = Subtarget->hasAVX(); - unsigned Opc; - const TargetRegisterClass *RC; - switch (VT.SimpleTy) { - default: return false; - case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; - case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; - } - - const Value *SrcVal = II->getArgOperand(0); - unsigned SrcReg = getRegForValue(SrcVal); - - if (SrcReg == 0) - return false; - - unsigned ImplicitDefReg = 0; - if (HasAVX) { - ImplicitDefReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); - } - - unsigned ResultReg = createResultReg(RC); - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), - ResultReg); - - if (ImplicitDefReg) - MIB.addReg(ImplicitDefReg); - - MIB.addReg(SrcReg); - - updateValueMap(II, ResultReg); - return true; - } - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::usub_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: { - // This implements the basic lowering of the xalu with overflow intrinsics - // into add/sub/mul followed by either seto or setb. - const Function *Callee = II->getCalledFunction(); - auto *Ty = cast<StructType>(Callee->getReturnType()); - Type *RetTy = Ty->getTypeAtIndex(0U); - Type *CondTy = Ty->getTypeAtIndex(1); - - MVT VT; - if (!isTypeLegal(RetTy, VT)) - return false; - - if (VT < MVT::i8 || VT > MVT::i64) - return false; - - const Value *LHS = II->getArgOperand(0); - const Value *RHS = II->getArgOperand(1); - - // Canonicalize immediate to the RHS. - if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && - isCommutativeIntrinsic(II)) - std::swap(LHS, RHS); - - bool UseIncDec = false; - if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne()) - UseIncDec = true; - - unsigned BaseOpc, CondOpc; - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::sadd_with_overflow: - BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD); - CondOpc = X86::SETOr; - break; - case Intrinsic::uadd_with_overflow: - BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; - case Intrinsic::ssub_with_overflow: - BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB); - CondOpc = X86::SETOr; - break; - case Intrinsic::usub_with_overflow: - BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; - case Intrinsic::smul_with_overflow: - BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; - case Intrinsic::umul_with_overflow: - BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; - } - - unsigned LHSReg = getRegForValue(LHS); - if (LHSReg == 0) - return false; - bool LHSIsKill = hasTrivialKill(LHS); - - unsigned ResultReg = 0; - // Check if we have an immediate version. - if (const auto *CI = dyn_cast<ConstantInt>(RHS)) { - static const unsigned Opc[2][4] = { - { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, - { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } - }; - - if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) { - ResultReg = createResultReg(TLI.getRegClassFor(VT)); - bool IsDec = BaseOpc == X86ISD::DEC; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg) - .addReg(LHSReg, getKillRegState(LHSIsKill)); - } else - ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill, - CI->getZExtValue()); - } - - unsigned RHSReg; - bool RHSIsKill; - if (!ResultReg) { - RHSReg = getRegForValue(RHS); - if (RHSReg == 0) - return false; - RHSIsKill = hasTrivialKill(RHS); - ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg, - RHSIsKill); - } - - // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit - // it manually. - if (BaseOpc == X86ISD::UMUL && !ResultReg) { - static const unsigned MULOpc[] = - { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; - static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; - // First copy the first operand into RAX, which is an implicit input to - // the X86::MUL*r instruction. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8]) - .addReg(LHSReg, getKillRegState(LHSIsKill)); - ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], - TLI.getRegClassFor(VT), RHSReg, RHSIsKill); - } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { - static const unsigned MULOpc[] = - { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; - if (VT == MVT::i8) { - // Copy the first operand into AL, which is an implicit input to the - // X86::IMUL8r instruction. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), X86::AL) - .addReg(LHSReg, getKillRegState(LHSIsKill)); - ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg, - RHSIsKill); - } else - ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], - TLI.getRegClassFor(VT), LHSReg, LHSIsKill, - RHSReg, RHSIsKill); - } - - if (!ResultReg) - return false; - - unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy); - assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc), - ResultReg2); - - updateValueMap(II, ResultReg, 2); - return true; - } - case Intrinsic::x86_sse_cvttss2si: - case Intrinsic::x86_sse_cvttss2si64: - case Intrinsic::x86_sse2_cvttsd2si: - case Intrinsic::x86_sse2_cvttsd2si64: { - bool IsInputDouble; - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic."); - case Intrinsic::x86_sse_cvttss2si: - case Intrinsic::x86_sse_cvttss2si64: - if (!Subtarget->hasSSE1()) - return false; - IsInputDouble = false; - break; - case Intrinsic::x86_sse2_cvttsd2si: - case Intrinsic::x86_sse2_cvttsd2si64: - if (!Subtarget->hasSSE2()) - return false; - IsInputDouble = true; - break; - } - - Type *RetTy = II->getCalledFunction()->getReturnType(); - MVT VT; - if (!isTypeLegal(RetTy, VT)) - return false; - - static const unsigned CvtOpc[2][2][2] = { - { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, - { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, - { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, - { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } } - }; - bool HasAVX = Subtarget->hasAVX(); - unsigned Opc; - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected result type."); - case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break; - case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break; - } - - // Check if we can fold insertelement instructions into the convert. - const Value *Op = II->getArgOperand(0); - while (auto *IE = dyn_cast<InsertElementInst>(Op)) { - const Value *Index = IE->getOperand(2); - if (!isa<ConstantInt>(Index)) - break; - unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); - - if (Idx == 0) { - Op = IE->getOperand(1); - break; - } - Op = IE->getOperand(0); - } - - unsigned Reg = getRegForValue(Op); - if (Reg == 0) - return false; - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(Reg); - - updateValueMap(II, ResultReg); - return true; - } - } -} - -bool X86FastISel::fastLowerArguments() { - if (!FuncInfo.CanLowerReturn) - return false; - - const Function *F = FuncInfo.Fn; - if (F->isVarArg()) - return false; - - CallingConv::ID CC = F->getCallingConv(); - if (CC != CallingConv::C) - return false; - - if (Subtarget->isCallingConvWin64(CC)) - return false; - - if (!Subtarget->is64Bit()) - return false; - - // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. - unsigned GPRCnt = 0; - unsigned FPRCnt = 0; - unsigned Idx = 0; - for (auto const &Arg : F->args()) { - // The first argument is at index 1. - ++Idx; - if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || - F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::Nest)) - return false; - - Type *ArgTy = Arg.getType(); - if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) - return false; - - EVT ArgVT = TLI.getValueType(ArgTy); - if (!ArgVT.isSimple()) return false; - switch (ArgVT.getSimpleVT().SimpleTy) { - default: return false; - case MVT::i32: - case MVT::i64: - ++GPRCnt; - break; - case MVT::f32: - case MVT::f64: - if (!Subtarget->hasSSE1()) - return false; - ++FPRCnt; - break; - } - - if (GPRCnt > 6) - return false; - - if (FPRCnt > 8) - return false; - } - - static const MCPhysReg GPR32ArgRegs[] = { - X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D - }; - static const MCPhysReg GPR64ArgRegs[] = { - X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 - }; - static const MCPhysReg XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 - }; - - unsigned GPRIdx = 0; - unsigned FPRIdx = 0; - for (auto const &Arg : F->args()) { - MVT VT = TLI.getSimpleValueType(Arg.getType()); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned SrcReg; - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type."); - case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break; - case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break; - case MVT::f32: // fall-through - case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break; - } - unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); - // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. - // Without this, EmitLiveInCopies may eliminate the livein if its only - // use is a bitcast (which isn't turned into an instruction). - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(DstReg, getKillRegState(true)); - updateValueMap(&Arg, ResultReg); - } - return true; -} - -static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget, - CallingConv::ID CC, - ImmutableCallSite *CS) { - if (Subtarget->is64Bit()) - return 0; - if (Subtarget->getTargetTriple().isOSMSVCRT()) - return 0; - if (CC == CallingConv::Fast || CC == CallingConv::GHC || - CC == CallingConv::HiPE) - return 0; - if (CS && !CS->paramHasAttr(1, Attribute::StructRet)) - return 0; - if (CS && CS->paramHasAttr(1, Attribute::InReg)) - return 0; - return 4; -} - -bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { - auto &OutVals = CLI.OutVals; - auto &OutFlags = CLI.OutFlags; - auto &OutRegs = CLI.OutRegs; - auto &Ins = CLI.Ins; - auto &InRegs = CLI.InRegs; - CallingConv::ID CC = CLI.CallConv; - bool &IsTailCall = CLI.IsTailCall; - bool IsVarArg = CLI.IsVarArg; - const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; - - bool Is64Bit = Subtarget->is64Bit(); - bool IsWin64 = Subtarget->isCallingConvWin64(CC); - - // Handle only C, fastcc, and webkit_js calling conventions for now. - switch (CC) { - default: return false; - case CallingConv::C: - case CallingConv::Fast: - case CallingConv::WebKit_JS: - case CallingConv::X86_FastCall: - case CallingConv::X86_64_Win64: - case CallingConv::X86_64_SysV: - break; - } - - // Allow SelectionDAG isel to handle tail calls. - if (IsTailCall) - return false; - - // fastcc with -tailcallopt is intended to provide a guaranteed - // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) - return false; - - // Don't know how to handle Win64 varargs yet. Nothing special needed for - // x86-32. Special handling for x86-64 is implemented. - if (IsVarArg && IsWin64) - return false; - - // Don't know about inalloca yet. - if (CLI.CS && CLI.CS->hasInAllocaArgument()) - return false; - - // Fast-isel doesn't know about callee-pop yet. - if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg, - TM.Options.GuaranteedTailCallOpt)) - return false; - - SmallVector<MVT, 16> OutVTs; - SmallVector<unsigned, 16> ArgRegs; - - // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra - // instruction. This is safe because it is common to all FastISel supported - // calling conventions on x86. - for (int i = 0, e = OutVals.size(); i != e; ++i) { - Value *&Val = OutVals[i]; - ISD::ArgFlagsTy Flags = OutFlags[i]; - if (auto *CI = dyn_cast<ConstantInt>(Val)) { - if (CI->getBitWidth() < 32) { - if (Flags.isSExt()) - Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext())); - else - Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext())); - } - } - - // Passing bools around ends up doing a trunc to i1 and passing it. - // Codegen this as an argument + "and 1". - MVT VT; - auto *TI = dyn_cast<TruncInst>(Val); - unsigned ResultReg; - if (TI && TI->getType()->isIntegerTy(1) && CLI.CS && - (TI->getParent() == CLI.CS->getInstruction()->getParent()) && - TI->hasOneUse()) { - Value *PrevVal = TI->getOperand(0); - ResultReg = getRegForValue(PrevVal); - - if (!ResultReg) - return false; - - if (!isTypeLegal(PrevVal->getType(), VT)) - return false; - - ResultReg = - fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1); - } else { - if (!isTypeLegal(Val->getType(), VT)) - return false; - ResultReg = getRegForValue(Val); - } - - if (!ResultReg) - return false; - - ArgRegs.push_back(ResultReg); - OutVTs.push_back(VT); - } - - // Analyze operands of the call, assigning locations to each operand. - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext()); - - // Allocate shadow area for Win64 - if (IsWin64) - CCInfo.AllocateStack(32, 8); - - CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86); - - // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); - - // Issue CALLSEQ_START - unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes); - - // Walk the register/memloc assignments, inserting copies/loads. - const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>( - TM.getSubtargetImpl()->getRegisterInfo()); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign const &VA = ArgLocs[i]; - const Value *ArgVal = OutVals[VA.getValNo()]; - MVT ArgVT = OutVTs[VA.getValNo()]; - - if (ArgVT == MVT::x86mmx) - return false; - - unsigned ArgReg = ArgRegs[VA.getValNo()]; - - // Promote the value if needed. - switch (VA.getLocInfo()) { - case CCValAssign::Full: break; - case CCValAssign::SExt: { - assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && - "Unexpected extend"); - bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, - ArgVT, ArgReg); - assert(Emitted && "Failed to emit a sext!"); (void)Emitted; - ArgVT = VA.getLocVT(); - break; - } - case CCValAssign::ZExt: { - assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && - "Unexpected extend"); - bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, - ArgVT, ArgReg); - assert(Emitted && "Failed to emit a zext!"); (void)Emitted; - ArgVT = VA.getLocVT(); - break; - } - case CCValAssign::AExt: { - assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && - "Unexpected extend"); - bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg, - ArgVT, ArgReg); - if (!Emitted) - Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, - ArgVT, ArgReg); - if (!Emitted) - Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, - ArgVT, ArgReg); - - assert(Emitted && "Failed to emit a aext!"); (void)Emitted; - ArgVT = VA.getLocVT(); - break; - } - case CCValAssign::BCvt: { - ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg, - /*TODO: Kill=*/false); - assert(ArgReg && "Failed to emit a bitcast!"); - ArgVT = VA.getLocVT(); - break; - } - case CCValAssign::VExt: - // VExt has not been implemented, so this should be impossible to reach - // for now. However, fallback to Selection DAG isel once implemented. - return false; - case CCValAssign::AExtUpper: - case CCValAssign::SExtUpper: - case CCValAssign::ZExtUpper: - case CCValAssign::FPExt: - llvm_unreachable("Unexpected loc info!"); - case CCValAssign::Indirect: - // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully - // support this. - return false; - } - - if (VA.isRegLoc()) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); - OutRegs.push_back(VA.getLocReg()); - } else { - assert(VA.isMemLoc()); - - // Don't emit stores for undef values. - if (isa<UndefValue>(ArgVal)) - continue; - - unsigned LocMemOffset = VA.getLocMemOffset(); - X86AddressMode AM; - AM.Base.Reg = RegInfo->getStackRegister(); - AM.Disp = LocMemOffset; - ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()]; - unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); - MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore, - ArgVT.getStoreSize(), Alignment); - if (Flags.isByVal()) { - X86AddressMode SrcAM; - SrcAM.Base.Reg = ArgReg; - if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize())) - return false; - } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) { - // If this is a really simple value, emit this with the Value* version - // of X86FastEmitStore. If it isn't simple, we don't want to do this, - // as it can cause us to reevaluate the argument. - if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO)) - return false; - } else { - bool ValIsKill = hasTrivialKill(ArgVal); - if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO)) - return false; - } - } - } - - // ELF / PIC requires GOT in the EBX register before function calls via PLT - // GOT pointer. - if (Subtarget->isPICStyleGOT()) { - unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); - } - - if (Is64Bit && IsVarArg && !IsWin64) { - // From AMD64 ABI document: - // For calls that may call functions that use varargs or stdargs - // (prototype-less calls or calls to functions containing ellipsis (...) in - // the declaration) %al is used as hidden argument to specify the number - // of SSE registers used. The contents of %al do not need to match exactly - // the number of registers, but must be an ubound on the number of SSE - // registers used and is in the range 0 - 8 inclusive. - - // Count the number of XMM registers allocated. - static const MCPhysReg XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 - }; - unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); - assert((Subtarget->hasSSE1() || !NumXMMRegs) - && "SSE registers cannot be used when SSE is disabled"); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), - X86::AL).addImm(NumXMMRegs); - } - - // Materialize callee address in a register. FIXME: GV address can be - // handled with a CALLpcrel32 instead. - X86AddressMode CalleeAM; - if (!X86SelectCallAddress(Callee, CalleeAM)) - return false; - - unsigned CalleeOp = 0; - const GlobalValue *GV = nullptr; - if (CalleeAM.GV != nullptr) { - GV = CalleeAM.GV; - } else if (CalleeAM.Base.Reg != 0) { - CalleeOp = CalleeAM.Base.Reg; - } else - return false; - - // Issue the call. - MachineInstrBuilder MIB; - if (CalleeOp) { - // Register-indirect call. - unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) - .addReg(CalleeOp); - } else { - // Direct call. - assert(GV && "Not a direct call"); - unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - - // See if we need any target-specific flags on the GV operand. - unsigned char OpFlags = 0; - - // On ELF targets, in both X86-64 and X86-32 mode, direct calls to - // external symbols most go through the PLT in PIC mode. If the symbol - // has hidden or protected visibility, or if it is static or local, then - // we don't need to use the PLT - we can directly call it. - if (Subtarget->isTargetELF() && - TM.getRelocationModel() == Reloc::PIC_ && - GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { - OpFlags = X86II::MO_PLT; - } else if (Subtarget->isPICStyleStubAny() && - (GV->isDeclaration() || GV->isWeakForLinker()) && - (!Subtarget->getTargetTriple().isMacOSX() || - Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { - // PC-relative references to external symbols should go through $stub, - // unless we're building with the leopard linker or later, which - // automatically synthesizes these stubs. - OpFlags = X86II::MO_DARWIN_STUB; - } - - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); - if (SymName) - MIB.addExternalSymbol(SymName, OpFlags); - else - MIB.addGlobalAddress(GV, 0, OpFlags); - } - - // Add a register mask operand representing the call-preserved registers. - // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); - - // Add an implicit use GOT pointer in EBX. - if (Subtarget->isPICStyleGOT()) - MIB.addReg(X86::EBX, RegState::Implicit); - - if (Is64Bit && IsVarArg && !IsWin64) - MIB.addReg(X86::AL, RegState::Implicit); - - // Add implicit physical register uses to the call. - for (auto Reg : OutRegs) - MIB.addReg(Reg, RegState::Implicit); - - // Issue CALLSEQ_END - unsigned NumBytesForCalleeToPop = - computeBytesPoppedByCallee(Subtarget, CC, CLI.CS); - unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(NumBytes).addImm(NumBytesForCalleeToPop); - - // Now handle call return values. - SmallVector<CCValAssign, 16> RVLocs; - CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, - CLI.RetTy->getContext()); - CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); - - // Copy all of the result registers out of their specified physreg. - unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy); - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - EVT CopyVT = VA.getValVT(); - unsigned CopyReg = ResultReg + i; - - // If this is x86-64, and we disabled SSE, we can't return FP values - if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && - ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { - report_fatal_error("SSE register return with SSE disabled"); - } - - // If we prefer to use the value in xmm registers, copy it out as f80 and - // use a truncate to move it from fp stack reg to xmm reg. - if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && - isScalarFPTypeInSSEReg(VA.getValVT())) { - CopyVT = MVT::f80; - CopyReg = createResultReg(&X86::RFP80RegClass); - } - - // Copy out the result. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg()); - InRegs.push_back(VA.getLocReg()); - - // Round the f80 to the right size, which also moves it to the appropriate - // xmm register. This is accomplished by storing the f80 value in memory - // and then loading it back. - if (CopyVT != VA.getValVT()) { - EVT ResVT = VA.getValVT(); - unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; - unsigned MemSize = ResVT.getSizeInBits()/8; - int FI = MFI.CreateStackObject(MemSize, MemSize, false); - addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc)), FI) - .addReg(CopyReg); - Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; - addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), ResultReg + i), FI); - } - } - - CLI.ResultReg = ResultReg; - CLI.NumResultRegs = RVLocs.size(); - CLI.Call = MIB; - - return true; -} - -bool -X86FastISel::fastSelectInstruction(const Instruction *I) { - switch (I->getOpcode()) { - default: break; - case Instruction::Load: - return X86SelectLoad(I); - case Instruction::Store: - return X86SelectStore(I); - case Instruction::Ret: - return X86SelectRet(I); - case Instruction::ICmp: - case Instruction::FCmp: - return X86SelectCmp(I); - case Instruction::ZExt: - return X86SelectZExt(I); - case Instruction::Br: - return X86SelectBranch(I); - case Instruction::LShr: - case Instruction::AShr: - case Instruction::Shl: - return X86SelectShift(I); - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::SRem: - case Instruction::URem: - return X86SelectDivRem(I); - case Instruction::Select: - return X86SelectSelect(I); - case Instruction::Trunc: - return X86SelectTrunc(I); - case Instruction::FPExt: - return X86SelectFPExt(I); - case Instruction::FPTrunc: - return X86SelectFPTrunc(I); - case Instruction::IntToPtr: // Deliberate fall-through. - case Instruction::PtrToInt: { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); - if (DstVT.bitsGT(SrcVT)) - return X86SelectZExt(I); - if (DstVT.bitsLT(SrcVT)) - return X86SelectTrunc(I); - unsigned Reg = getRegForValue(I->getOperand(0)); - if (Reg == 0) return false; - updateValueMap(I, Reg); - return true; - } - } - - return false; -} - -unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { - if (VT > MVT::i64) - return 0; - - uint64_t Imm = CI->getZExtValue(); - if (Imm == 0) { - unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass); - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type"); - case MVT::i1: - case MVT::i8: - return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, - X86::sub_8bit); - case MVT::i16: - return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true, - X86::sub_16bit); - case MVT::i32: - return SrcReg; - case MVT::i64: { - unsigned ResultReg = createResultReg(&X86::GR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) - .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); - return ResultReg; - } - } - } - - unsigned Opc = 0; - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type"); - case MVT::i1: VT = MVT::i8; // fall-through - case MVT::i8: Opc = X86::MOV8ri; break; - case MVT::i16: Opc = X86::MOV16ri; break; - case MVT::i32: Opc = X86::MOV32ri; break; - case MVT::i64: { - if (isUInt<32>(Imm)) - Opc = X86::MOV32ri; - else if (isInt<32>(Imm)) - Opc = X86::MOV64ri32; - else - Opc = X86::MOV64ri; - break; - } - } - if (VT == MVT::i64 && Opc == X86::MOV32ri) { - unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm); - unsigned ResultReg = createResultReg(&X86::GR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) - .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); - return ResultReg; - } - return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); -} - -unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { - if (CFP->isNullValue()) - return fastMaterializeFloatZero(CFP); - - // Can't handle alternate code models yet. - CodeModel::Model CM = TM.getCodeModel(); - if (CM != CodeModel::Small && CM != CodeModel::Large) - return 0; - - // Get opcode and regclass of the output for the given load instruction. - unsigned Opc = 0; - const TargetRegisterClass *RC = nullptr; - switch (VT.SimpleTy) { - default: return 0; - case MVT::f32: - if (X86ScalarSSEf32) { - Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; - RC = &X86::FR32RegClass; - } else { - Opc = X86::LD_Fp32m; - RC = &X86::RFP32RegClass; - } - break; - case MVT::f64: - if (X86ScalarSSEf64) { - Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; - RC = &X86::FR64RegClass; - } else { - Opc = X86::LD_Fp64m; - RC = &X86::RFP64RegClass; - } - break; - case MVT::f80: - // No f80 support yet. - return 0; - } - - // MachineConstantPool wants an explicit alignment. - unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); - if (Align == 0) { - // Alignment of vector types. FIXME! - Align = DL.getTypeAllocSize(CFP->getType()); - } - - // x86-32 PIC requires a PIC base register for constant pools. - unsigned PICBase = 0; - unsigned char OpFlag = 0; - if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic - OpFlag = X86II::MO_PIC_BASE_OFFSET; - PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } else if (Subtarget->isPICStyleGOT()) { - OpFlag = X86II::MO_GOTOFF; - PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } else if (Subtarget->isPICStyleRIPRel() && - TM.getCodeModel() == CodeModel::Small) { - PICBase = X86::RIP; - } - - // Create the load from the constant pool. - unsigned CPI = MCP.getConstantPoolIndex(CFP, Align); - unsigned ResultReg = createResultReg(RC); - - if (CM == CodeModel::Large) { - unsigned AddrReg = createResultReg(&X86::GR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), - AddrReg) - .addConstantPoolIndex(CPI, 0, OpFlag); - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), ResultReg); - addDirectMem(MIB, AddrReg); - MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, - TM.getDataLayout()->getPointerSize(), Align); - MIB->addMemOperand(*FuncInfo.MF, MMO); - return ResultReg; - } - - addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), ResultReg), - CPI, PICBase, OpFlag); - return ResultReg; -} - -unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { - // Can't handle alternate code models yet. - if (TM.getCodeModel() != CodeModel::Small) - return 0; - - // Materialize addresses with LEA/MOV instructions. - X86AddressMode AM; - if (X86SelectAddress(GV, AM)) { - // If the expression is just a basereg, then we're done, otherwise we need - // to emit an LEA. - if (AM.BaseType == X86AddressMode::RegBase && - AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) - return AM.Base.Reg; - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - if (TM.getRelocationModel() == Reloc::Static && - TLI.getPointerTy() == MVT::i64) { - // The displacement code could be more than 32 bits away so we need to use - // an instruction with a 64 bit immediate - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), - ResultReg) - .addGlobalAddress(GV); - } else { - unsigned Opc = TLI.getPointerTy() == MVT::i32 - ? (Subtarget->isTarget64BitILP32() - ? X86::LEA64_32r : X86::LEA32r) - : X86::LEA64r; - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), ResultReg), AM); - } - return ResultReg; - } - return 0; -} - -unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); - - // Only handle simple types. - if (!CEVT.isSimple()) - return 0; - MVT VT = CEVT.getSimpleVT(); - - if (const auto *CI = dyn_cast<ConstantInt>(C)) - return X86MaterializeInt(CI, VT); - else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) - return X86MaterializeFP(CFP, VT); - else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return X86MaterializeGV(GV, VT); - - return 0; -} - -unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) { - // Fail on dynamic allocas. At this point, getRegForValue has already - // checked its CSE maps, so if we're here trying to handle a dynamic - // alloca, we're not going to succeed. X86SelectAddress has a - // check for dynamic allocas, because it's called directly from - // various places, but targetMaterializeAlloca also needs a check - // in order to avoid recursion between getRegForValue, - // X86SelectAddrss, and targetMaterializeAlloca. - if (!FuncInfo.StaticAllocaMap.count(C)) - return 0; - assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?"); - - X86AddressMode AM; - if (!X86SelectAddress(C, AM)) - return 0; - unsigned Opc = TLI.getPointerTy() == MVT::i32 - ? (Subtarget->isTarget64BitILP32() - ? X86::LEA64_32r : X86::LEA32r) - : X86::LEA64r; - const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); - unsigned ResultReg = createResultReg(RC); - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), ResultReg), AM); - return ResultReg; -} - -unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) { - MVT VT; - if (!isTypeLegal(CF->getType(), VT)) - return 0; - - // Get opcode and regclass for the given zero. - unsigned Opc = 0; - const TargetRegisterClass *RC = nullptr; - switch (VT.SimpleTy) { - default: return 0; - case MVT::f32: - if (X86ScalarSSEf32) { - Opc = X86::FsFLD0SS; - RC = &X86::FR32RegClass; - } else { - Opc = X86::LD_Fp032; - RC = &X86::RFP32RegClass; - } - break; - case MVT::f64: - if (X86ScalarSSEf64) { - Opc = X86::FsFLD0SD; - RC = &X86::FR64RegClass; - } else { - Opc = X86::LD_Fp064; - RC = &X86::RFP64RegClass; - } - break; - case MVT::f80: - // No f80 support yet. - return 0; - } - - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); - return ResultReg; -} - - -bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI) { - const Value *Ptr = LI->getPointerOperand(); - X86AddressMode AM; - if (!X86SelectAddress(Ptr, AM)) - return false; - - const X86InstrInfo &XII = (const X86InstrInfo &)TII; - - unsigned Size = DL.getTypeAllocSize(LI->getType()); - unsigned Alignment = LI->getAlignment(); - - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = DL.getABITypeAlignment(LI->getType()); - - SmallVector<MachineOperand, 8> AddrOps; - AM.getFullAddress(AddrOps); - - MachineInstr *Result = - XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, - Size, Alignment, /*AllowCommute=*/true); - if (!Result) - return false; - - Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); - FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); - MI->eraseFromParent(); - return true; -} - - -namespace llvm { - FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) { - return new X86FastISel(funcInfo, libInfo); - } -} +//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86CallingConv.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel final : public FastISel {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf64;
+ bool X86ScalarSSEf32;
+
+public:
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+ }
+
+ bool fastSelectInstruction(const Instruction *I) override;
+
+ /// \brief The specified machine instr operand is a vreg, and that
+ /// vreg is being provided by the specified load instruction. If possible,
+ /// try to fold the load as an operand to the instruction, returning true if
+ /// possible.
+ bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) override;
+
+ bool fastLowerArguments() override;
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+ bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
+
+#include "X86GenFastISel.inc"
+
+private:
+ bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
+
+ bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
+ unsigned &ResultReg);
+
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+ MachineMemOperand *MMO = nullptr, bool Aligned = false);
+ bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO = nullptr, bool Aligned = false);
+
+ bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+
+ bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+ bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
+
+ bool X86SelectLoad(const Instruction *I);
+
+ bool X86SelectStore(const Instruction *I);
+
+ bool X86SelectRet(const Instruction *I);
+
+ bool X86SelectCmp(const Instruction *I);
+
+ bool X86SelectZExt(const Instruction *I);
+
+ bool X86SelectBranch(const Instruction *I);
+
+ bool X86SelectShift(const Instruction *I);
+
+ bool X86SelectDivRem(const Instruction *I);
+
+ bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
+
+ bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
+
+ bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
+
+ bool X86SelectSelect(const Instruction *I);
+
+ bool X86SelectTrunc(const Instruction *I);
+
+ bool X86SelectFPExt(const Instruction *I);
+ bool X86SelectFPTrunc(const Instruction *I);
+
+ const X86InstrInfo *getInstrInfo() const {
+ return getTargetMachine()->getSubtargetImpl()->getInstrInfo();
+ }
+ const X86TargetMachine *getTargetMachine() const {
+ return static_cast<const X86TargetMachine *>(&TM);
+ }
+
+ bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
+
+ unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
+ unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned fastMaterializeConstant(const Constant *C) override;
+
+ unsigned fastMaterializeAlloca(const AllocaInst *C) override;
+
+ unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool IsMemcpySmall(uint64_t Len);
+
+ bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len);
+
+ bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
+ const Value *Cond);
+};
+
+} // end anonymous namespace.
+
+static std::pair<X86::CondCode, bool>
+getX86ConditionCode(CmpInst::Predicate Predicate) {
+ X86::CondCode CC = X86::COND_INVALID;
+ bool NeedSwap = false;
+ switch (Predicate) {
+ default: break;
+ // Floating-point Predicates
+ case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
+ case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
+ case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
+ case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
+ case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
+ case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
+ case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
+ case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
+ case CmpInst::FCMP_OEQ: // fall-through
+ case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
+
+ // Integer Predicates
+ case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
+ case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
+ case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
+ case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
+ case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
+ case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
+ case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
+ case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
+ case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
+ case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
+static std::pair<unsigned, bool>
+getX86SSEConditionCode(CmpInst::Predicate Predicate) {
+ unsigned CC;
+ bool NeedSwap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (Predicate) {
+ default: llvm_unreachable("Unexpected predicate");
+ case CmpInst::FCMP_OEQ: CC = 0; break;
+ case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLT: CC = 1; break;
+ case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLE: CC = 2; break;
+ case CmpInst::FCMP_UNO: CC = 3; break;
+ case CmpInst::FCMP_UNE: CC = 4; break;
+ case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGE: CC = 5; break;
+ case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGT: CC = 6; break;
+ case CmpInst::FCMP_ORD: CC = 7; break;
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_ONE: CC = 8; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
+/// \brief Check if it is possible to fold the condition from the XALU intrinsic
+/// into the user. The condition code will only be updated on success.
+bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
+ const Value *Cond) {
+ if (!isa<ExtractValueInst>(Cond))
+ return false;
+
+ const auto *EV = cast<ExtractValueInst>(Cond);
+ if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
+ return false;
+
+ const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
+ MVT RetVT;
+ const Function *Callee = II->getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
+ if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return false;
+
+ X86::CondCode TmpCC;
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
+ }
+
+ // Check if both instructions are in the same basic block.
+ if (II->getParent() != I->getParent())
+ return false;
+
+ // Make sure nothing is in the way
+ BasicBlock::const_iterator Start = I;
+ BasicBlock::const_iterator End = II;
+ for (auto Itr = std::prev(Start); Itr != End; --Itr) {
+ // We only expect extractvalue instructions between the intrinsic and the
+ // instruction to be selected.
+ if (!isa<ExtractValueInst>(Itr))
+ return false;
+
+ // Check that the extractvalue operand comes from the intrinsic.
+ const auto *EVI = cast<ExtractValueInst>(Itr);
+ if (EVI->getAggregateOperand() != II)
+ return false;
+ }
+
+ CC = TmpCC;
+ return true;
+}
+
+bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
+ EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (evt == MVT::Other || !evt.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ VT = evt.getSimpleVT();
+ // For now, require SSE/SSE2 for performing floating-point operations,
+ // since x87 requires additional work.
+ if (VT == MVT::f64 && !X86ScalarSSEf64)
+ return false;
+ if (VT == MVT::f32 && !X86ScalarSSEf32)
+ return false;
+ // Similarly, no f80 support yet.
+ if (VT == MVT::f80)
+ return false;
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
+ MachineMemOperand *MMO, unsigned &ResultReg) {
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = &X86::GR8RegClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = &X86::GR16RegClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+ break;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
+ addFullAddress(MIB, AM);
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+ return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO, bool Aligned) {
+ // Get opcode and regclass of the output for the given store instruction.
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f80: // No f80 support yet.
+ default: return false;
+ case MVT::i1: {
+ // Mask out all but lowest bit.
+ unsigned AndResult = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::AND8ri), AndResult)
+ .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
+ ValReg = AndResult;
+ }
+ // FALLTHROUGH, handling i1 as i8.
+ case MVT::i8: Opc = X86::MOV8mr; break;
+ case MVT::i16: Opc = X86::MOV16mr; break;
+ case MVT::i32: Opc = X86::MOV32mr; break;
+ case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::f32:
+ Opc = X86ScalarSSEf32 ?
+ (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ break;
+ case MVT::f64:
+ Opc = X86ScalarSSEf64 ?
+ (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ break;
+ case MVT::v4f32:
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ break;
+ case MVT::v2f64:
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ break;
+ }
+
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+
+ return true;
+}
+
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO, bool Aligned) {
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Val))
+ Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
+
+ // If this is a store of a simple constant, fold the constant into the store.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ unsigned Opc = 0;
+ bool Signed = true;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
+ case MVT::i8: Opc = X86::MOV8mi; break;
+ case MVT::i16: Opc = X86::MOV16mi; break;
+ case MVT::i32: Opc = X86::MOV32mi; break;
+ case MVT::i64:
+ // Must be a 32-bit sign extended value.
+ if (isInt<32>(CI->getSExtValue()))
+ Opc = X86::MOV64mi32;
+ break;
+ }
+
+ if (Opc) {
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
+ : CI->getZExtValue());
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+ return true;
+ }
+ }
+
+ unsigned ValReg = getRegForValue(Val);
+ if (ValReg == 0)
+ return false;
+
+ bool ValKill = hasTrivialKill(Val);
+ return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+ unsigned Src, EVT SrcVT,
+ unsigned &ResultReg) {
+ unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+ if (RR == 0)
+ return false;
+
+ ResultReg = RR;
+ return true;
+}
+
+bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // Can't handle TLS yet.
+ if (GV->isThreadLocal())
+ return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ }
+
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = nullptr;
+ return true;
+ }
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+ SmallVector<const Value *, 32> GEPs;
+redo_gep:
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectAddress(U->getOperand(0), AM);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::Alloca: {
+ // Do static allocas.
+ const AllocaInst *A = cast<AllocaInst>(V);
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(A);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = SI->second;
+ return true;
+ }
+ break;
+ }
+
+ case Instruction::Add: {
+ // Adds of constants are common and easy enough.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+ // They have to fit in the 32-bit signed displacement field though.
+ if (isInt<32>(Disp)) {
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM);
+ }
+ }
+ break;
+ }
+
+ case Instruction::GetElementPtr: {
+ X86AddressMode SavedAM = AM;
+
+ // Pattern-match simple GEPs.
+ uint64_t Disp = (int32_t)AM.Disp;
+ unsigned IndexReg = AM.IndexReg;
+ unsigned Scale = AM.Scale;
+ gep_type_iterator GTI = gep_type_begin(U);
+ // Iterate through the indices, folding what we can. Constants can be
+ // folded, and one dynamic index can be handled, if the scale is supported.
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+ continue;
+ }
+
+ // A array/variable index is always of the form i*S where S is the
+ // constant scale size. See if we can push the scale into immediates.
+ uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ break;
+ }
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ break;
+ }
+ // Unsupported.
+ goto unsupported_gep;
+ }
+ }
+
+ // Check for displacement overflow.
+ if (!isInt<32>(Disp))
+ break;
+
+ AM.IndexReg = IndexReg;
+ AM.Scale = Scale;
+ AM.Disp = (uint32_t)Disp;
+ GEPs.push_back(V);
+
+ if (const GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ V = GEP;
+ goto redo_gep;
+ } else if (X86SelectAddress(U->getOperand(0), AM)) {
+ return true;
+ }
+
+ // If we couldn't merge the gep value into this addr mode, revert back to
+ // our address and just match the value instead of completely failing.
+ AM = SavedAM;
+
+ for (SmallVectorImpl<const Value *>::reverse_iterator
+ I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
+ if (handleConstantAddresses(*I, AM))
+ return true;
+
+ return false;
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
+ }
+ }
+
+ return handleConstantAddresses(V, AM);
+}
+
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ const Instruction *I = dyn_cast<Instruction>(V);
+ // Record if the value is defined in the same basic block.
+ //
+ // This information is crucial to know whether or not folding an
+ // operand is valid.
+ // Indeed, FastISel generates or reuses a virtual register for all
+ // operands of all instructions it selects. Obviously, the definition and
+ // its uses must use the same virtual register otherwise the produced
+ // code is incorrect.
+ // Before instruction selection, FunctionLoweringInfo::set sets the virtual
+ // registers for values that are alive across basic blocks. This ensures
+ // that the values are consistently set between across basic block, even
+ // if different instruction selection mechanisms are used (e.g., a mix of
+ // SDISel and FastISel).
+ // For values local to a basic block, the instruction selection process
+ // generates these virtual registers with whatever method is appropriate
+ // for its needs. In particular, FastISel and SDISel do not share the way
+ // local virtual registers are set.
+ // Therefore, this is impossible (or at least unsafe) to share values
+ // between basic blocks unless they use the same instruction selection
+ // method, which is not guarantee for X86.
+ // Moreover, things like hasOneUse could not be used accurately, if we
+ // allow to reference values across basic blocks whereas they are not
+ // alive across basic blocks initially.
+ bool InMBB = true;
+ if (I) {
+ Opcode = I->getOpcode();
+ U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
+ }
+
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // RIP-relative addresses can't have additional register operands.
+ if (Subtarget->isPICStyleRIPRel() &&
+ (AM.Base.Reg != 0 || AM.IndexReg != 0))
+ return false;
+
+ // Can't handle DLL Import.
+ if (GV->hasDLLImportStorageClass())
+ return false;
+
+ // Can't handle TLS.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Okay, we've committed to selecting this global. Set up the basic address.
+ AM.GV = GV;
+
+ // No ABI requires an extra load for anything other than DLLImport, which
+ // we rejected above. Return a direct reference to the global.
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ } else if (Subtarget->isPICStyleStubPIC()) {
+ AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+ } else if (Subtarget->isPICStyleGOT()) {
+ AM.GVOpFlags = X86II::MO_GOTOFF;
+ }
+
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(const Instruction *I) {
+ // Atomic stores need special handling.
+ const StoreInst *S = cast<StoreInst>(I);
+
+ if (S->isAtomic())
+ return false;
+
+ const Value *Val = S->getValueOperand();
+ const Value *Ptr = S->getPointerOperand();
+
+ MVT VT;
+ if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ unsigned Alignment = S->getAlignment();
+ unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = ABIAlignment;
+ bool Aligned = Alignment >= ABIAlignment;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(Ptr, AM))
+ return false;
+
+ return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
+}
+
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+ const X86MachineFunctionInfo *X86MFInfo =
+ FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall &&
+ CC != CallingConv::X86_64_SysV)
+ return false;
+
+ if (Subtarget->isCallingConvWin64(CC))
+ return false;
+
+ // Don't handle popping bytes on return for now.
+ if (X86MFInfo->getBytesToPopOnReturn() != 0)
+ return false;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (F.isVarArg())
+ return false;
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+
+ // The calling-convention tables for x87 returns don't tell
+ // the whole story.
+ if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
+ return false;
+
+ unsigned SrcReg = Reg + VA.getValNo();
+ EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT DstVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (SrcVT != DstVT) {
+ if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
+ return false;
+
+ if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+ return false;
+
+ assert(DstVT == MVT::i32 && "X86 should always ext to i32");
+
+ if (SrcVT == MVT::i1) {
+ if (Outs[0].Flags.isSExt())
+ return false;
+ SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+ }
+ unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg, /*TODO: Kill=*/false);
+ }
+
+ // Make the copy.
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
+
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax. We also do the same with %eax for Win32.
+ if (F.hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+ unsigned Reg = X86MFInfo->getSRetReturnReg();
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments()!");
+ unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
+ RetRegs.push_back(RetReg);
+ }
+
+ // Now emit the RET.
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+ return true;
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ const LoadInst *LI = cast<LoadInst>(I);
+
+ // Atomic loads need special handling.
+ if (LI->isAtomic())
+ return false;
+
+ MVT VT;
+ if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
+ return false;
+
+ const Value *Ptr = LI->getPointerOperand();
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(Ptr, AM))
+ return false;
+
+ unsigned ResultReg = 0;
+ if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ bool HasAVX = Subtarget->hasAVX();
+ bool X86ScalarSSEf32 = Subtarget->hasSSE1();
+ bool X86ScalarSSEf64 = Subtarget->hasSSE2();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8: return X86::CMP8rr;
+ case MVT::i16: return X86::CMP16rr;
+ case MVT::i32: return X86::CMP32rr;
+ case MVT::i64: return X86::CMP64rr;
+ case MVT::f32:
+ return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
+ case MVT::f64:
+ return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
+ }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ // Otherwise, we can't fold the immediate into this comparison.
+ default: return 0;
+ case MVT::i8: return X86::CMP8ri;
+ case MVT::i16: return X86::CMP16ri;
+ case MVT::i32: return X86::CMP32ri;
+ case MVT::i64:
+ // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+ // field.
+ if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ return X86::CMP64ri32;
+ return 0;
+ }
+}
+
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+ EVT VT, DebugLoc CurDbgLoc) {
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (Op0Reg == 0) return false;
+
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Op1))
+ Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
+
+ // We have two options: compare with register or immediate. If the RHS of
+ // the compare is an immediate that we can fold into this compare, use
+ // CMPri, otherwise use CMPrr.
+ if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
+ .addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
+ if (CompareOpc == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(Op1);
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
+ .addReg(Op0Reg)
+ .addReg(Op1Reg);
+
+ return true;
+}
+
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ unsigned ResultReg = 0;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: {
+ ResultReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
+ ResultReg);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ X86::sub_8bit);
+ if (!ResultReg)
+ return false;
+ break;
+ }
+ case CmpInst::FCMP_TRUE: {
+ ResultReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
+ ResultReg).addImm(1);
+ break;
+ }
+ }
+
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *RHSC = dyn_cast<ConstantFP>(RHS);
+ if (RHSC && RHSC->isNullValue())
+ RHS = LHS;
+ }
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static unsigned SETFOpcTable[2][3] = {
+ { X86::SETEr, X86::SETNPr, X86::AND8rr },
+ { X86::SETNEr, X86::SETPr, X86::OR8rr }
+ };
+ unsigned *SETFOpc = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
+ case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
+ }
+
+ ResultReg = createResultReg(&X86::GR8RegClass);
+ if (SETFOpc) {
+ if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
+ return false;
+
+ unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+ unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+ FlagReg1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+ FlagReg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
+ ResultReg).addReg(FlagReg1).addReg(FlagReg2);
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ X86::CondCode CC;
+ bool SwapArgs;
+ std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+ unsigned Opc = X86::getSETFromCond(CC);
+
+ if (SwapArgs)
+ std::swap(LHS, RHS);
+
+ // Emit a compare of LHS/RHS.
+ if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0)
+ return false;
+
+ // Handle zero-extension from i1 to i8, which is common.
+ MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
+ if (SrcVT.SimpleTy == MVT::i1) {
+ // Set the high bits to zero.
+ ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+
+ if (ResultReg == 0)
+ return false;
+ }
+
+ if (DstVT == MVT::i64) {
+ // Handle extension to 64-bits via sub-register shenanigans.
+ unsigned MovInst;
+
+ switch (SrcVT.SimpleTy) {
+ case MVT::i8: MovInst = X86::MOVZX32rr8; break;
+ case MVT::i16: MovInst = X86::MOVZX32rr16; break;
+ case MVT::i32: MovInst = X86::MOV32rr; break;
+ default: llvm_unreachable("Unexpected zext to i64 source type");
+ }
+
+ unsigned Result32 = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
+ .addReg(ResultReg);
+
+ ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
+ ResultReg)
+ .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
+ } else if (DstVT != MVT::i8) {
+ ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
+ }
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
+ // Unconditional branches are selected by tablegen-generated code.
+ // Handle a conditional branch.
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
+ X86::CondCode CC;
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+ EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
+ case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
+ }
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
+ // 0.0.
+ // We don't have to materialize a zero constant for this case and can just
+ // use %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
+ }
+
+ // Try to take advantage of fallthrough opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
+ // code check. Instead two branch instructions are required to check all
+ // the flags. First we change the predicate to a supported condition code,
+ // which will be the first branch. Later one we will emit the second
+ // branch.
+ bool NeedExtraBranch = false;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ:
+ std::swap(TrueMBB, FalseMBB); // fall-through
+ case CmpInst::FCMP_UNE:
+ NeedExtraBranch = true;
+ Predicate = CmpInst::FCMP_ONE;
+ break;
+ }
+
+ bool SwapArgs;
+ unsigned BranchOpc;
+ std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+
+ BranchOpc = X86::GetCondBranchFromCond(CC);
+ if (SwapArgs)
+ std::swap(CmpLHS, CmpRHS);
+
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
+
+ // X86 requires a second branch to handle UNE (and OEQ, which is mapped
+ // to UNE above).
+ if (NeedExtraBranch) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
+ .addMBB(TrueMBB);
+ }
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+ // Emits an unconditional branch to the FalseBB, obtains the branch
+ // weight, and adds it to the successor list.
+ fastEmitBranch(FalseMBB, DbgLoc);
+
+ return true;
+ }
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+ // typically happen for _Bool and C++ bools.
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+ unsigned TestOpc = 0;
+ switch (SourceVT.SimpleTy) {
+ default: break;
+ case MVT::i8: TestOpc = X86::TEST8ri; break;
+ case MVT::i16: TestOpc = X86::TEST16ri; break;
+ case MVT::i32: TestOpc = X86::TEST32ri; break;
+ case MVT::i64: TestOpc = X86::TEST64ri32; break;
+ }
+ if (TestOpc) {
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ if (OpReg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
+ .addReg(OpReg).addImm(1);
+
+ unsigned JmpOpc = X86::JNE_1;
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ JmpOpc = X86::JE_1;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
+ .addMBB(TrueMBB);
+ fastEmitBranch(FalseMBB, DbgLoc);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ return true;
+ }
+ }
+ } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned TmpReg = getRegForValue(BI->getCondition());
+ if (TmpReg == 0)
+ return false;
+
+ unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
+ fastEmitBranch(FalseMBB, DbgLoc);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ return true;
+ }
+
+ // Otherwise do a clumsy setcc and re-test it.
+ // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+ // in an explicit cast, so make sure to handle that correctly.
+ unsigned OpReg = getRegForValue(BI->getCondition());
+ if (OpReg == 0) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(OpReg).addImm(1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
+ .addMBB(TrueMBB);
+ fastEmitBranch(FalseMBB, DbgLoc);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ return true;
+}
+
+bool X86FastISel::X86SelectShift(const Instruction *I) {
+ unsigned CReg = 0, OpReg = 0;
+ const TargetRegisterClass *RC = nullptr;
+ if (I->getType()->isIntegerTy(8)) {
+ CReg = X86::CL;
+ RC = &X86::GR8RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(16)) {
+ CReg = X86::CX;
+ RC = &X86::GR16RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(32)) {
+ CReg = X86::ECX;
+ RC = &X86::GR32RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; break;
+ default: return false;
+ }
+ } else if (I->getType()->isIntegerTy(64)) {
+ CReg = X86::RCX;
+ RC = &X86::GR64RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; break;
+ default: return false;
+ }
+ } else {
+ return false;
+ }
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ CReg).addReg(Op1Reg);
+
+ // The shift instruction uses X86::CL. If we defined a super-register
+ // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
+ if (CReg != X86::CL)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
+ .addReg(Op0Reg);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectDivRem(const Instruction *I) {
+ const static unsigned NumTypes = 4; // i8, i16, i32, i64
+ const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
+ const static bool S = true; // IsSigned
+ const static bool U = false; // !IsSigned
+ const static unsigned Copy = TargetOpcode::COPY;
+ // For the X86 DIV/IDIV instruction, in most cases the dividend
+ // (numerator) must be in a specific register pair highreg:lowreg,
+ // producing the quotient in lowreg and the remainder in highreg.
+ // For most data types, to set up the instruction, the dividend is
+ // copied into lowreg, and lowreg is sign-extended or zero-extended
+ // into highreg. The exception is i8, where the dividend is defined
+ // as a single register rather than a register pair, and we
+ // therefore directly sign-extend or zero-extend the dividend into
+ // lowreg, instead of copying, and ignore the highreg.
+ const static struct DivRemEntry {
+ // The following portion depends only on the data type.
+ const TargetRegisterClass *RC;
+ unsigned LowInReg; // low part of the register pair
+ unsigned HighInReg; // high part of the register pair
+ // The following portion depends on both the data type and the operation.
+ struct DivRemResult {
+ unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
+ unsigned OpSignExtend; // Opcode for sign-extending lowreg into
+ // highreg, or copying a zero into highreg.
+ unsigned OpCopy; // Opcode for copying dividend into lowreg, or
+ // zero/sign-extending into lowreg for i8.
+ unsigned DivRemResultReg; // Register containing the desired result.
+ bool IsOpSigned; // Whether to use signed or unsigned form.
+ } ResultTable[NumOps];
+ } OpTable[NumTypes] = {
+ { &X86::GR8RegClass, X86::AX, 0, {
+ { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
+ { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
+ { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
+ { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
+ }
+ }, // i8
+ { &X86::GR16RegClass, X86::AX, X86::DX, {
+ { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
+ { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
+ }
+ }, // i16
+ { &X86::GR32RegClass, X86::EAX, X86::EDX, {
+ { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
+ { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
+ { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
+ { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
+ }
+ }, // i32
+ { &X86::GR64RegClass, X86::RAX, X86::RDX, {
+ { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
+ { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
+ }
+ }, // i64
+ };
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned TypeIndex, OpIndex;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::i8: TypeIndex = 0; break;
+ case MVT::i16: TypeIndex = 1; break;
+ case MVT::i32: TypeIndex = 2; break;
+ case MVT::i64: TypeIndex = 3;
+ if (!Subtarget->is64Bit())
+ return false;
+ break;
+ }
+
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected div/rem opcode");
+ case Instruction::SDiv: OpIndex = 0; break;
+ case Instruction::SRem: OpIndex = 1; break;
+ case Instruction::UDiv: OpIndex = 2; break;
+ case Instruction::URem: OpIndex = 3; break;
+ }
+
+ const DivRemEntry &TypeEntry = OpTable[TypeIndex];
+ const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0)
+ return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0)
+ return false;
+
+ // Move op0 into low-order input register.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
+ // Zero-extend or sign-extend into high-order input register.
+ if (OpEntry.OpSignExtend) {
+ if (OpEntry.IsOpSigned)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(OpEntry.OpSignExtend));
+ else {
+ unsigned Zero32 = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::MOV32r0), Zero32);
+
+ // Copy the zero into the appropriate sub/super/identical physical
+ // register. Unfortunately the operations needed are not uniform enough
+ // to fit neatly into the table above.
+ if (VT.SimpleTy == MVT::i16) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Copy), TypeEntry.HighInReg)
+ .addReg(Zero32, 0, X86::sub_16bit);
+ } else if (VT.SimpleTy == MVT::i32) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Copy), TypeEntry.HighInReg)
+ .addReg(Zero32);
+ } else if (VT.SimpleTy == MVT::i64) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+ .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
+ }
+ }
+ }
+ // Generate the DIV/IDIV instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
+ // For i8 remainder, we can't reference AH directly, as we'll end
+ // up with bogus copies like %R9B = COPY %AH. Reference AX
+ // instead to prevent AH references in a REX instruction.
+ //
+ // The current assumption of the fast register allocator is that isel
+ // won't generate explicit references to the GPR8_NOREX registers. If
+ // the allocator and/or the backend get enhanced to be more robust in
+ // that regard, this can be, and should be, removed.
+ unsigned ResultReg = 0;
+ if ((I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::URem) &&
+ OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
+ unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
+ unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Copy), SourceSuperReg).addReg(X86::AX);
+
+ // Shift AX right by 8 bits instead of using AH.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
+ ResultSuperReg).addReg(SourceSuperReg).addImm(8);
+
+ // Now reference the 8-bit subreg of the result.
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
+ /*Kill=*/true, X86::sub_8bit);
+ }
+ // Copy the result out of the physreg if we haven't already.
+ if (!ResultReg) {
+ ResultReg = createResultReg(TypeEntry.RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
+ .addReg(OpEntry.DivRemResultReg);
+ }
+ updateValueMap(I, ResultReg);
+
+ return true;
+}
+
+/// \brief Emit a conditional move instruction (if the are supported) to lower
+/// the select.
+bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
+ // Check if the subtarget supports these instructions.
+ if (!Subtarget->hasCMov())
+ return false;
+
+ // FIXME: Add support for i8.
+ if (RetVT < MVT::i16 || RetVT > MVT::i64)
+ return false;
+
+ const Value *Cond = I->getOperand(0);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ bool NeedTest = true;
+ X86::CondCode CC = X86::COND_NE;
+
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<CmpInst>(Cond);
+ if (CI && (CI->getParent() == I->getParent())) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static unsigned SETFOpcTable[2][3] = {
+ { X86::SETNPr, X86::SETEr , X86::TEST8rr },
+ { X86::SETPr, X86::SETNEr, X86::OR8rr }
+ };
+ unsigned *SETFOpc = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ:
+ SETFOpc = &SETFOpcTable[0][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ case CmpInst::FCMP_UNE:
+ SETFOpc = &SETFOpcTable[1][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ }
+
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
+ return false;
+
+ if (SETFOpc) {
+ unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+ unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+ FlagReg1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+ FlagReg2);
+ auto const &II = TII.get(SETFOpc[2]);
+ if (II.getNumDefs()) {
+ unsigned TmpReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ }
+ }
+ NeedTest = false;
+ } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned TmpReg = getRegForValue(Cond);
+ if (TmpReg == 0)
+ return false;
+
+ NeedTest = false;
+ }
+
+ if (NeedTest) {
+ // Selects operate on i1, however, CondReg is 8 bits width and may contain
+ // garbage. Indeed, only the less significant bit is supposed to be
+ // accurate. If we read more than the lsb, we may see non-zero values
+ // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
+ // the select. This is achieved by performing TEST against 1.
+ unsigned CondReg = getRegForValue(Cond);
+ if (CondReg == 0)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+/// \brief Emit SSE instructions to lower the select.
+///
+/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
+/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
+/// SSE instructions are available.
+bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
+ if (!CI || (CI->getParent() != I->getParent()))
+ return false;
+
+ if (I->getType() != CI->getOperand(0)->getType() ||
+ !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
+ (Subtarget->hasSSE2() && RetVT == MVT::f64)))
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
+ }
+
+ unsigned CC;
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
+ if (CC > 7)
+ return false;
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ static unsigned OpcTable[2][2][4] = {
+ { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
+ { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
+ { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ };
+
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned *Opc = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned CmpLHSReg = getRegForValue(CmpLHS);
+ bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
+
+ unsigned CmpRHSReg = getRegForValue(CmpRHS);
+ bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
+
+ if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
+ // These are pseudo CMOV instructions and will be later expanded into control-
+ // flow.
+ unsigned Opc;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::i8: Opc = X86::CMOV_GR8; break;
+ case MVT::i16: Opc = X86::CMOV_GR16; break;
+ case MVT::i32: Opc = X86::CMOV_GR32; break;
+ case MVT::f32: Opc = X86::CMOV_FR32; break;
+ case MVT::f64: Opc = X86::CMOV_FR64; break;
+ }
+
+ const Value *Cond = I->getOperand(0);
+ X86::CondCode CC = X86::COND_NE;
+
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<CmpInst>(Cond);
+ if (CI && (CI->getParent() == I->getParent())) {
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
+ if (CC > X86::LAST_VALID_COND)
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
+ return false;
+ } else {
+ unsigned CondReg = getRegForValue(Cond);
+ if (CondReg == 0)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+
+ unsigned ResultReg =
+ fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ // Check if we can fold the select.
+ if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ const Value *Opnd = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
+ case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
+ }
+ // No need for a select anymore - this is an unconditional move.
+ if (Opnd) {
+ unsigned OpReg = getRegForValue(Opnd);
+ if (OpReg == 0)
+ return false;
+ bool OpIsKill = hasTrivialKill(Opnd);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(OpReg, getKillRegState(OpIsKill));
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ // First try to use real conditional move instructions.
+ if (X86FastEmitCMoveSelect(RetVT, I))
+ return true;
+
+ // Try to use a sequence of SSE instructions to simulate a conditional move.
+ if (X86FastEmitSSESelect(RetVT, I))
+ return true;
+
+ // Fall-back to pseudo conditional move instructions, which will be later
+ // converted to control-flow.
+ if (X86FastEmitPseudoSelect(RetVT, I))
+ return true;
+
+ return false;
+}
+
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
+ // fpext from float to double.
+ if (X86ScalarSSEf64 &&
+ I->getType()->isDoubleTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isFloatTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(&X86::FR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::CVTSS2SDrr), ResultReg)
+ .addReg(OpReg);
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
+ if (X86ScalarSSEf64) {
+ if (I->getType()->isFloatTy()) {
+ const Value *V = I->getOperand(0);
+ if (V->getType()->isDoubleTy()) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(&X86::FR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::CVTSD2SSrr), ResultReg)
+ .addReg(OpReg);
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+
+ // This code only handles truncation to byte.
+ if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ return false;
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (SrcVT == MVT::i8) {
+ // Truncate from i8 to i1; no code needed.
+ updateValueMap(I, InputReg);
+ return true;
+ }
+
+ if (!Subtarget->is64Bit()) {
+ // If we're on x86-32; we can't extract an i8 from a general register.
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ const TargetRegisterClass *CopyRC =
+ (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
+ InputReg = CopyReg;
+ }
+
+ // Issue an extract_subreg.
+ unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
+ InputReg, /*Kill=*/true,
+ X86::sub_8bit);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+ return Len <= (Subtarget->is64Bit() ? 32 : 16);
+}
+
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len) {
+
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ if (!IsMemcpySmall(Len))
+ return false;
+
+ bool i64Legal = Subtarget->is64Bit();
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 8 && i64Legal)
+ VT = MVT::i64;
+ else if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else
+ VT = MVT::i8;
+
+ unsigned Reg;
+ bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
+ RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
+ assert(RV && "Failed to emit load or store??");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ DestAM.Disp += Size;
+ SrcAM.Disp += Size;
+ }
+
+ return true;
+}
+
+bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
+ // FIXME: Handle more intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::frameaddress: {
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Opc;
+ const TargetRegisterClass *RC = nullptr;
+
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Invalid result type for frameaddress.");
+ case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
+ case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
+ }
+
+ // This needs to be set before we call getPtrSizedFrameRegister, otherwise
+ // we get the wrong frame register.
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*(FuncInfo.MF));
+ assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
+ (FrameReg == X86::EBP && VT == MVT::i32)) &&
+ "Invalid Frame Register!");
+
+ // Always make a copy of the frame register to to a vreg first, so that we
+ // never directly reference the frame register (the TwoAddressInstruction-
+ // Pass doesn't like that).
+ unsigned SrcReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
+
+ // Now recursively load from the frame address.
+ // movq (%rbp), %rax
+ // movq (%rax), %rax
+ // movq (%rax), %rax
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(RC);
+ addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), DestReg), SrcReg);
+ SrcReg = DestReg;
+ }
+
+ updateValueMap(II, SrcReg);
+ return true;
+ }
+ case Intrinsic::memcpy: {
+ const MemCpyInst *MCI = cast<MemCpyInst>(II);
+ // Don't handle volatile or variable length memcpys.
+ if (MCI->isVolatile())
+ return false;
+
+ if (isa<ConstantInt>(MCI->getLength())) {
+ // Small memcpy's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
+ if (IsMemcpySmall(Len)) {
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI->getRawSource(), SrcAM))
+ return false;
+ TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return true;
+ }
+ }
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
+ return false;
+
+ return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
+ }
+ case Intrinsic::memset: {
+ const MemSetInst *MSI = cast<MemSetInst>(II);
+
+ if (MSI->isVolatile())
+ return false;
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MSI->getDestAddressSpace() > 255)
+ return false;
+
+ return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code to store the stack guard onto the stack.
+ EVT PtrTy = TLI.getPointerTy();
+
+ const Value *Op1 = II->getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
+
+ MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
+
+ // Grab the frame index.
+ X86AddressMode AM;
+ if (!X86SelectAddress(Slot, AM)) return false;
+ if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+ return true;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
+ X86AddressMode AM;
+ assert(DI->getAddress() && "Null address should be checked earlier!");
+ if (!X86SelectAddress(DI->getAddress(), AM))
+ return false;
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ // FIXME may need to add RegState::Debug to any registers produced,
+ // although ESP/EBP should be the only ones at the moment.
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
+ .addImm(0)
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ return true;
+ }
+ case Intrinsic::trap: {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
+ return true;
+ }
+ case Intrinsic::sqrt: {
+ if (!Subtarget->hasSSE1())
+ return false;
+
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
+ // is not generated by FastISel yet.
+ // FIXME: Update this code once tablegen can handle it.
+ static const unsigned SqrtOpc[2][2] = {
+ {X86::SQRTSSr, X86::VSQRTSSr},
+ {X86::SQRTSDr, X86::VSQRTSDr}
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
+ case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
+ }
+
+ const Value *SrcVal = II->getArgOperand(0);
+ unsigned SrcReg = getRegForValue(SrcVal);
+
+ if (SrcReg == 0)
+ return false;
+
+ unsigned ImplicitDefReg = 0;
+ if (HasAVX) {
+ ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg);
+
+ if (ImplicitDefReg)
+ MIB.addReg(ImplicitDefReg);
+
+ MIB.addReg(SrcReg);
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ // This implements the basic lowering of the xalu with overflow intrinsics
+ // into add/sub/mul followed by either seto or setb.
+ const Function *Callee = II->getCalledFunction();
+ auto *Ty = cast<StructType>(Callee->getReturnType());
+ Type *RetTy = Ty->getTypeAtIndex(0U);
+ Type *CondTy = Ty->getTypeAtIndex(1);
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ if (VT < MVT::i8 || VT > MVT::i64)
+ return false;
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(II))
+ std::swap(LHS, RHS);
+
+ bool UseIncDec = false;
+ if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
+ UseIncDec = true;
+
+ unsigned BaseOpc, CondOpc;
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::sadd_with_overflow:
+ BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
+ CondOpc = X86::SETOr;
+ break;
+ case Intrinsic::uadd_with_overflow:
+ BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
+ case Intrinsic::ssub_with_overflow:
+ BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
+ CondOpc = X86::SETOr;
+ break;
+ case Intrinsic::usub_with_overflow:
+ BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
+ case Intrinsic::smul_with_overflow:
+ BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
+ case Intrinsic::umul_with_overflow:
+ BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
+ }
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (LHSReg == 0)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned ResultReg = 0;
+ // Check if we have an immediate version.
+ if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
+ static const unsigned Opc[2][4] = {
+ { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
+ { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
+ };
+
+ if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ bool IsDec = BaseOpc == X86ISD::DEC;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ } else
+ ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
+ CI->getZExtValue());
+ }
+
+ unsigned RHSReg;
+ bool RHSIsKill;
+ if (!ResultReg) {
+ RHSReg = getRegForValue(RHS);
+ if (RHSReg == 0)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+ ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill);
+ }
+
+ // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
+ // it manually.
+ if (BaseOpc == X86ISD::UMUL && !ResultReg) {
+ static const unsigned MULOpc[] =
+ { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
+ static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
+ // First copy the first operand into RAX, which is an implicit input to
+ // the X86::MUL*r instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
+ TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
+ } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
+ static const unsigned MULOpc[] =
+ { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
+ if (VT == MVT::i8) {
+ // Copy the first operand into AL, which is an implicit input to the
+ // X86::IMUL8r instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), X86::AL)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
+ RHSIsKill);
+ } else
+ ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
+ TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ }
+
+ if (!ResultReg)
+ return false;
+
+ unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
+ assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
+ ResultReg2);
+
+ updateValueMap(II, ResultReg, 2);
+ return true;
+ }
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ bool IsInputDouble;
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ if (!Subtarget->hasSSE1())
+ return false;
+ IsInputDouble = false;
+ break;
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (!Subtarget->hasSSE2())
+ return false;
+ IsInputDouble = true;
+ break;
+ }
+
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ static const unsigned CvtOpc[2][2][2] = {
+ { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
+ { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
+ { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
+ { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected result type.");
+ case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
+ case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
+ }
+
+ // Check if we can fold insertelement instructions into the convert.
+ const Value *Op = II->getArgOperand(0);
+ while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
+ const Value *Index = IE->getOperand(2);
+ if (!isa<ConstantInt>(Index))
+ break;
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+
+ if (Idx == 0) {
+ Op = IE->getOperand(1);
+ break;
+ }
+ Op = IE->getOperand(0);
+ }
+
+ unsigned Reg = getRegForValue(Op);
+ if (Reg == 0)
+ return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Reg);
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ }
+}
+
+bool X86FastISel::fastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ if (Subtarget->isCallingConvWin64(CC))
+ return false;
+
+ if (!Subtarget->is64Bit())
+ return false;
+
+ // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
+ unsigned GPRCnt = 0;
+ unsigned FPRCnt = 0;
+ unsigned Idx = 0;
+ for (auto const &Arg : F->args()) {
+ // The first argument is at index 1.
+ ++Idx;
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = Arg.getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i32:
+ case MVT::i64:
+ ++GPRCnt;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasSSE1())
+ return false;
+ ++FPRCnt;
+ break;
+ }
+
+ if (GPRCnt > 6)
+ return false;
+
+ if (FPRCnt > 8)
+ return false;
+ }
+
+ static const MCPhysReg GPR32ArgRegs[] = {
+ X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
+ };
+ static const MCPhysReg GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
+ };
+ static const MCPhysReg XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+
+ unsigned GPRIdx = 0;
+ unsigned FPRIdx = 0;
+ for (auto const &Arg : F->args()) {
+ MVT VT = TLI.getSimpleValueType(Arg.getType());
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned SrcReg;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
+ case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
+ case MVT::f32: // fall-through
+ case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
+ }
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(DstReg, getKillRegState(true));
+ updateValueMap(&Arg, ResultReg);
+ }
+ return true;
+}
+
+static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
+ CallingConv::ID CC,
+ ImmutableCallSite *CS) {
+ if (Subtarget->is64Bit())
+ return 0;
+ if (Subtarget->getTargetTriple().isOSMSVCRT())
+ return 0;
+ if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::HiPE)
+ return 0;
+ if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
+ return 0;
+ if (CS && CS->paramHasAttr(1, Attribute::InReg))
+ return 0;
+ return 4;
+}
+
+bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ auto &OutVals = CLI.OutVals;
+ auto &OutFlags = CLI.OutFlags;
+ auto &OutRegs = CLI.OutRegs;
+ auto &Ins = CLI.Ins;
+ auto &InRegs = CLI.InRegs;
+ CallingConv::ID CC = CLI.CallConv;
+ bool &IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ const char *SymName = CLI.SymName;
+
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isCallingConvWin64(CC);
+
+ // Handle only C, fastcc, and webkit_js calling conventions for now.
+ switch (CC) {
+ default: return false;
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::WebKit_JS:
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_64_Win64:
+ case CallingConv::X86_64_SysV:
+ break;
+ }
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
+ return false;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
+ return false;
+
+ // Don't know how to handle Win64 varargs yet. Nothing special needed for
+ // x86-32. Special handling for x86-64 is implemented.
+ if (IsVarArg && IsWin64)
+ return false;
+
+ // Don't know about inalloca yet.
+ if (CLI.CS && CLI.CS->hasInAllocaArgument())
+ return false;
+
+ // Fast-isel doesn't know about callee-pop yet.
+ if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
+ TM.Options.GuaranteedTailCallOpt))
+ return false;
+
+ SmallVector<MVT, 16> OutVTs;
+ SmallVector<unsigned, 16> ArgRegs;
+
+ // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all FastISel supported
+ // calling conventions on x86.
+ for (int i = 0, e = OutVals.size(); i != e; ++i) {
+ Value *&Val = OutVals[i];
+ ISD::ArgFlagsTy Flags = OutFlags[i];
+ if (auto *CI = dyn_cast<ConstantInt>(Val)) {
+ if (CI->getBitWidth() < 32) {
+ if (Flags.isSExt())
+ Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
+ else
+ Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
+ }
+ }
+
+ // Passing bools around ends up doing a trunc to i1 and passing it.
+ // Codegen this as an argument + "and 1".
+ MVT VT;
+ auto *TI = dyn_cast<TruncInst>(Val);
+ unsigned ResultReg;
+ if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
+ (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
+ TI->hasOneUse()) {
+ Value *PrevVal = TI->getOperand(0);
+ ResultReg = getRegForValue(PrevVal);
+
+ if (!ResultReg)
+ return false;
+
+ if (!isTypeLegal(PrevVal->getType(), VT))
+ return false;
+
+ ResultReg =
+ fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
+ } else {
+ if (!isTypeLegal(Val->getType(), VT))
+ return false;
+ ResultReg = getRegForValue(Val);
+ }
+
+ if (!ResultReg)
+ return false;
+
+ ArgRegs.push_back(ResultReg);
+ OutVTs.push_back(VT);
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
+
+ // Allocate shadow area for Win64
+ if (IsWin64)
+ CCInfo.AllocateStack(32, 8);
+
+ CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
+ .addImm(NumBytes).addImm(0);
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign const &VA = ArgLocs[i];
+ const Value *ArgVal = OutVals[VA.getValNo()];
+ MVT ArgVT = OutVTs[VA.getValNo()];
+
+ if (ArgVT == MVT::x86mmx)
+ return false;
+
+ unsigned ArgReg = ArgRegs[VA.getValNo()];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
+ bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
+ /*TODO: Kill=*/false);
+ assert(ArgReg && "Failed to emit a bitcast!");
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::VExt:
+ // VExt has not been implemented, so this should be impossible to reach
+ // for now. However, fallback to Selection DAG isel once implemented.
+ return false;
+ case CCValAssign::AExtUpper:
+ case CCValAssign::SExtUpper:
+ case CCValAssign::ZExtUpper:
+ case CCValAssign::FPExt:
+ llvm_unreachable("Unexpected loc info!");
+ case CCValAssign::Indirect:
+ // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
+ // support this.
+ return false;
+ }
+
+ if (VA.isRegLoc()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
+ OutRegs.push_back(VA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ X86AddressMode AM;
+ AM.Base.Reg = RegInfo->getStackRegister();
+ AM.Disp = LocMemOffset;
+ ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
+ ArgVT.getStoreSize(), Alignment);
+ if (Flags.isByVal()) {
+ X86AddressMode SrcAM;
+ SrcAM.Base.Reg = ArgReg;
+ if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
+ return false;
+ } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+ // If this is a really simple value, emit this with the Value* version
+ // of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // as it can cause us to reevaluate the argument.
+ if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
+ return false;
+ } else {
+ bool ValIsKill = hasTrivialKill(ArgVal);
+ if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
+ return false;
+ }
+ }
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (Subtarget->isPICStyleGOT()) {
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
+ }
+
+ if (Is64Bit && IsVarArg && !IsWin64) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // Count the number of XMM registers allocated.
+ static const MCPhysReg XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
+ X86::AL).addImm(NumXMMRegs);
+ }
+
+ // Materialize callee address in a register. FIXME: GV address can be
+ // handled with a CALLpcrel32 instead.
+ X86AddressMode CalleeAM;
+ if (!X86SelectCallAddress(Callee, CalleeAM))
+ return false;
+
+ unsigned CalleeOp = 0;
+ const GlobalValue *GV = nullptr;
+ if (CalleeAM.GV != nullptr) {
+ GV = CalleeAM.GV;
+ } else if (CalleeAM.Base.Reg != 0) {
+ CalleeOp = CalleeAM.Base.Reg;
+ } else
+ return false;
+
+ // Issue the call.
+ MachineInstrBuilder MIB;
+ if (CalleeOp) {
+ // Register-indirect call.
+ unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
+ .addReg(CalleeOp);
+ } else {
+ // Direct call.
+ assert(GV && "Not a direct call");
+ unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+
+ // See if we need any target-specific flags on the GV operand.
+ unsigned char OpFlags = 0;
+
+ // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+ // external symbols most go through the PLT in PIC mode. If the symbol
+ // has hidden or protected visibility, or if it is static or local, then
+ // we don't need to use the PLT - we can directly call it.
+ if (Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+ OpFlags = X86II::MO_PLT;
+ } else if (Subtarget->isPICStyleStubAny() &&
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = X86II::MO_DARWIN_STUB;
+ }
+
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
+ if (SymName)
+ MIB.addExternalSymbol(SymName, OpFlags);
+ else
+ MIB.addGlobalAddress(GV, 0, OpFlags);
+ }
+
+ // Add a register mask operand representing the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (Subtarget->isPICStyleGOT())
+ MIB.addReg(X86::EBX, RegState::Implicit);
+
+ if (Is64Bit && IsVarArg && !IsWin64)
+ MIB.addReg(X86::AL, RegState::Implicit);
+
+ // Add implicit physical register uses to the call.
+ for (auto Reg : OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
+
+ // Issue CALLSEQ_END
+ unsigned NumBytesForCalleeToPop =
+ computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
+
+ // Now handle call return values.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
+ CLI.RetTy->getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ EVT CopyVT = VA.getValVT();
+ unsigned CopyReg = ResultReg + i;
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT())) {
+ CopyVT = MVT::f80;
+ CopyReg = createResultReg(&X86::RFP80RegClass);
+ }
+
+ // Copy out the result.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
+ InRegs.push_back(VA.getLocReg());
+
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register. This is accomplished by storing the f80 value in memory
+ // and then loading it back.
+ if (CopyVT != VA.getValVT()) {
+ EVT ResVT = VA.getValVT();
+ unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+ unsigned MemSize = ResVT.getSizeInBits()/8;
+ int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc)), FI)
+ .addReg(CopyReg);
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg + i), FI);
+ }
+ }
+
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = RVLocs.size();
+ CLI.Call = MIB;
+
+ return true;
+}
+
+bool
+X86FastISel::fastSelectInstruction(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ return X86SelectLoad(I);
+ case Instruction::Store:
+ return X86SelectStore(I);
+ case Instruction::Ret:
+ return X86SelectRet(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return X86SelectCmp(I);
+ case Instruction::ZExt:
+ return X86SelectZExt(I);
+ case Instruction::Br:
+ return X86SelectBranch(I);
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ return X86SelectShift(I);
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ return X86SelectDivRem(I);
+ case Instruction::Select:
+ return X86SelectSelect(I);
+ case Instruction::Trunc:
+ return X86SelectTrunc(I);
+ case Instruction::FPExt:
+ return X86SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return X86SelectFPTrunc(I);
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return X86SelectZExt(I);
+ if (DstVT.bitsLT(SrcVT))
+ return X86SelectTrunc(I);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ updateValueMap(I, Reg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
+ if (VT > MVT::i64)
+ return 0;
+
+ uint64_t Imm = CI->getZExtValue();
+ if (Imm == 0) {
+ unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type");
+ case MVT::i1:
+ case MVT::i8:
+ return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ X86::sub_8bit);
+ case MVT::i16:
+ return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
+ X86::sub_16bit);
+ case MVT::i32:
+ return SrcReg;
+ case MVT::i64: {
+ unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
+ .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
+ return ResultReg;
+ }
+ }
+ }
+
+ unsigned Opc = 0;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type");
+ case MVT::i1: VT = MVT::i8; // fall-through
+ case MVT::i8: Opc = X86::MOV8ri; break;
+ case MVT::i16: Opc = X86::MOV16ri; break;
+ case MVT::i32: Opc = X86::MOV32ri; break;
+ case MVT::i64: {
+ if (isUInt<32>(Imm))
+ Opc = X86::MOV32ri;
+ else if (isInt<32>(Imm))
+ Opc = X86::MOV64ri32;
+ else
+ Opc = X86::MOV64ri;
+ break;
+ }
+ }
+ if (VT == MVT::i64 && Opc == X86::MOV32ri) {
+ unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
+ unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
+ .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
+ return ResultReg;
+ }
+ return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
+}
+
+unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ if (CFP->isNullValue())
+ return fastMaterializeFloatZero(CFP);
+
+ // Can't handle alternate code models yet.
+ CodeModel::Model CM = TM.getCodeModel();
+ if (CM != CodeModel::Small && CM != CodeModel::Large)
+ return 0;
+
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ switch (VT.SimpleTy) {
+ default: return 0;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return 0;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = DL.getTypeAllocSize(CFP->getType());
+ }
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ unsigned char OpFlag = 0;
+ if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+ OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleGOT()) {
+ OpFlag = X86II::MO_GOTOFF;
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ TM.getCodeModel() == CodeModel::Small) {
+ PICBase = X86::RIP;
+ }
+
+ // Create the load from the constant pool.
+ unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
+ unsigned ResultReg = createResultReg(RC);
+
+ if (CM == CodeModel::Large) {
+ unsigned AddrReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
+ AddrReg)
+ .addConstantPoolIndex(CPI, 0, OpFlag);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg);
+ addDirectMem(MIB, AddrReg);
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+ TM.getDataLayout()->getPointerSize(), Align);
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+ return ResultReg;
+ }
+
+ addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg),
+ CPI, PICBase, OpFlag);
+ return ResultReg;
+}
+
+unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
+
+ // Materialize addresses with LEA/MOV instructions.
+ X86AddressMode AM;
+ if (X86SelectAddress(GV, AM)) {
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
+ return AM.Base.Reg;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ if (TM.getRelocationModel() == Reloc::Static &&
+ TLI.getPointerTy() == MVT::i64) {
+ // The displacement code could be more than 32 bits away so we need to use
+ // an instruction with a 64 bit immediate
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
+ ResultReg)
+ .addGlobalAddress(GV);
+ } else {
+ unsigned Opc = TLI.getPointerTy() == MVT::i32
+ ? (Subtarget->isTarget64BitILP32()
+ ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg), AM);
+ }
+ return ResultReg;
+ }
+ return 0;
+}
+
+unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple())
+ return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const auto *CI = dyn_cast<ConstantInt>(C))
+ return X86MaterializeInt(CI, VT);
+ else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return X86MaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return X86MaterializeGV(GV, VT);
+
+ return 0;
+}
+
+unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
+ // Fail on dynamic allocas. At this point, getRegForValue has already
+ // checked its CSE maps, so if we're here trying to handle a dynamic
+ // alloca, we're not going to succeed. X86SelectAddress has a
+ // check for dynamic allocas, because it's called directly from
+ // various places, but targetMaterializeAlloca also needs a check
+ // in order to avoid recursion between getRegForValue,
+ // X86SelectAddrss, and targetMaterializeAlloca.
+ if (!FuncInfo.StaticAllocaMap.count(C))
+ return 0;
+ assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(C, AM))
+ return 0;
+ unsigned Opc = TLI.getPointerTy() == MVT::i32
+ ? (Subtarget->isTarget64BitILP32()
+ ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
+ const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+}
+
+unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
+ MVT VT;
+ if (!isTypeLegal(CF->getType(), VT))
+ return 0;
+
+ // Get opcode and regclass for the given zero.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ switch (VT.SimpleTy) {
+ default: return 0;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = X86::FsFLD0SS;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = X86::FsFLD0SD;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return 0;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
+ return ResultReg;
+}
+
+
+bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ const Value *Ptr = LI->getPointerOperand();
+ X86AddressMode AM;
+ if (!X86SelectAddress(Ptr, AM))
+ return false;
+
+ const X86InstrInfo &XII = (const X86InstrInfo &)TII;
+
+ unsigned Size = DL.getTypeAllocSize(LI->getType());
+ unsigned Alignment = LI->getAlignment();
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = DL.getABITypeAlignment(LI->getType());
+
+ SmallVector<MachineOperand, 8> AddrOps;
+ AM.getFullAddress(AddrOps);
+
+ MachineInstr *Result =
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
+ Size, Alignment, /*AllowCommute=*/true);
+ if (!Result)
+ return false;
+
+ Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
+ FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+ MI->eraseFromParent();
+ return true;
+}
+
+
+namespace llvm {
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new X86FastISel(funcInfo, libInfo);
+ }
+}
|