diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 54 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 15 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMSubtarget.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/Alpha/AlphaISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/CellSPU/SPUISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/IA64/IA64ISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/Mips/MipsISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/Sparc/SparcISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 271 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 20 |
11 files changed, 167 insertions, 221 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 7218560cc6c..0095352c415 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -197,11 +197,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - // Expand mem operations genericly. - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - // Use the default implementation. setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAARG , MVT::Other, Expand); @@ -1246,18 +1241,30 @@ static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } -SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { +SDOperand +ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff){ // Do repeated 4-byte loads and stores. To be improved. - assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!"); - unsigned BytesLeft = Size & 3; - unsigned NumMemOps = Size >> 2; + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDOperand(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDOperand(); + uint64_t SizeVal = ConstantSize->getValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDOperand(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; - unsigned SrcOff = 0, DstOff = 0; MVT::ValueType VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; @@ -1272,9 +1279,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + DAG.getNode(ISD::ADD, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcOff); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -1283,9 +1290,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + DAG.getNode(ISD::ADD, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstOff); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i); @@ -1309,9 +1316,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, } Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + DAG.getNode(ISD::ADD, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcOff); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -1331,9 +1338,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, } TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + DAG.getNode(ISD::ADD, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstOff); ++i; DstOff += VTSize; BytesLeft -= VTSize; @@ -1409,7 +1416,6 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: break; case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 285a20d23f8..58d8d8c6c86 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -119,8 +119,8 @@ namespace llvm { getRegClassForInlineAsmConstraint(const std::string &Constraint, MVT::ValueType VT) const; - virtual const TargetSubtarget* getSubtarget() { - return static_cast<const TargetSubtarget*>(Subtarget); + virtual const ARMSubtarget* getSubtarget() { + return Subtarget; } private: @@ -143,11 +143,14 @@ namespace llvm { SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG); - SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest, - SDOperand Source, unsigned Size, - unsigned Align, SelectionDAG &DAG); - + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); }; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index c43924b53df..fbc9e579df1 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -62,6 +62,8 @@ protected: /// ARMSubtarget(const Module &M, const std::string &FS, bool thumb); + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size + /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. // Change this once Thumb ldmia / stmia support is added. diff --git a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp index d208f59e4f6..91b118029a3 100644 --- a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp @@ -87,10 +87,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::SDIV , MVT::i64, Custom); setOperationAction(ISD::UDIV , MVT::i64, Custom); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Expand); - // We don't support sin/cos/sqrt/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); diff --git a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp index 29226092688..1cb691882de 100644 --- a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp @@ -175,9 +175,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); // SPU has no intrinsics for these particular operations: - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); // PowerPC has no SREM/UREM instructions diff --git a/llvm/lib/Target/IA64/IA64ISelLowering.cpp b/llvm/lib/Target/IA64/IA64ISelLowering.cpp index 2ec08b60b6f..c53f3b44eba 100644 --- a/llvm/lib/Target/IA64/IA64ISelLowering.cpp +++ b/llvm/lib/Target/IA64/IA64ISelLowering.cpp @@ -65,9 +65,6 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) setOperationAction(ISD::UREM , MVT::f32 , Expand); setOperationAction(ISD::UREM , MVT::f64 , Expand); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 5c2e1c0190a..5ea9cdd9c25 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -80,9 +80,6 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // Mips not supported intrinsics. - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ddc8e1a7859..e42e9dcba05 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -78,9 +78,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); // PowerPC has no intrinsics for these particular operations - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); // PowerPC has no SREM/UREM instructions @@ -1735,10 +1732,9 @@ static SDOperand CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, unsigned Size) { - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); - SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); - SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); + SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false, + NULL, 0, NULL, 0); } SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 1d4fe0bc8cb..3d5ad0b7402 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -570,9 +570,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // SPARC has no intrinsics for these particular operations. - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66384f921c2..9db0288c4e3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -206,7 +206,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BRCOND , MVT::Other, Custom); setOperationAction(ISD::BR_CC , MVT::Other, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); @@ -281,9 +280,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - // X86 wants to expand memset / memcpy itself. - setOperationAction(ISD::MEMSET , MVT::Other, Custom); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); @@ -1113,10 +1109,10 @@ CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain, static SDOperand CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); - SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/true, + NULL, 0, NULL, 0); } SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, @@ -4557,52 +4553,51 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); } -SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { - SDOperand InFlag(0, 0); - SDOperand Chain = Op.getOperand(0); - unsigned Align = - (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); - if (Align == 0) Align = 1; - - ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); - // If not DWORD aligned or size is more than the threshold, call memset. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { +SDOperand +X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff) { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + + /// If not DWORD aligned or size is more than the threshold, call the library. + /// The libc version is likely to be faster for these cases. It can use the + /// address value and run time information about the CPU. + if ((Align & 3) == 0 || + !ConstantSize || + ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) { + SDOperand InFlag(0, 0); // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Op.getOperand(2)); - const char *bzeroEntry = - V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0; - - MVT::ValueType IntPtr = getPointerTy(); - const Type *IntPtrTy = getTargetData()->getIntPtrType(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Op.getOperand(1); - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - - if (!bzeroEntry) { - // Extend the unsigned i8 argument to be an int value for the call. - Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); + if (const char *bzeroEntry = + V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + MVT::ValueType IntPtr = getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair<SDOperand,SDOperand> CallResult = + LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, + false, DAG.getExternalSymbol(bzeroEntry, IntPtr), + Args, DAG); + return CallResult.second; } - Entry.Node = Op.getOperand(3); - Args.push_back(Entry); - const char *Name = bzeroEntry ? bzeroEntry : "memset"; - std::pair<SDOperand,SDOperand> CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, - false, DAG.getExternalSymbol(Name, IntPtr), Args, DAG); - return CallResult.second; + // Otherwise have the target-independent code call memset. + return SDOperand(); } + uint64_t SizeVal = ConstantSize->getValue(); + SDOperand InFlag(0, 0); MVT::ValueType AVT; SDOperand Count; - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { @@ -4630,22 +4625,14 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; - Count = Op.getOperand(3); + Count = Size; break; } if (AVT > MVT::i8) { - if (I) { - unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - Count = DAG.getIntPtrConstant(I->getValue() / UBytes); - BytesLeft = I->getValue() % UBytes; - } else { - assert(AVT >= MVT::i32 && - "Do not use rep;stos if not at least DWORD aligned"); - Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), - Op.getOperand(3), DAG.getConstant(2, MVT::i8)); - TwoRepStos = true; - } + unsigned UBytes = MVT::getSizeInBits(AVT) / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), @@ -4653,8 +4640,8 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { InFlag = Chain.getValue(1); } else { AVT = MVT::i8; - Count = Op.getOperand(3); - Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); + Count = Size; + Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } @@ -4662,7 +4649,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Op.getOperand(1), InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4674,7 +4661,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { if (TwoRepStos) { InFlag = Chain.getValue(1); - Count = Op.getOperand(3); + Count = Size; MVT::ValueType CVT = Count.getValueType(); SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); @@ -4688,79 +4675,68 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(InFlag); Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); } else if (BytesLeft) { - // Issue stores for the last 1 - 7 bytes. - SDOperand Value; - unsigned Val = ValC->getValue() & 255; - unsigned Offset = I->getValue() - BytesLeft; - SDOperand DstAddr = Op.getOperand(1); - MVT::ValueType AddrVT = DstAddr.getValueType(); - if (BytesLeft >= 4) { - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - Value = DAG.getConstant(Val, MVT::i32); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getConstant((Val << 8) | Val, MVT::i16); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } - if (BytesLeft == 1) { - Value = DAG.getConstant(Val, MVT::i8); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType AddrVT = Dst.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, + DAG.getNode(ISD::ADD, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, DstSV, Offset); } + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; } -SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { +SDOperand +X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff){ + + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDOperand(); + uint64_t SizeVal = ConstantSize->getValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDOperand(); + + SmallVector<SDOperand, 4> Results; + MVT::ValueType AVT; unsigned BytesLeft = 0; - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; - break; - default: // Byte aligned - AVT = MVT::i8; - break; - } + if (Align >= 8 && Subtarget->is64Bit()) + AVT = MVT::i64; + else if (Align >= 4) + AVT = MVT::i32; + else if (Align >= 2) + AVT = MVT::i16; + else + AVT = MVT::i8; unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - SDOperand Count = DAG.getIntPtrConstant(Size / UBytes); - BytesLeft = Size % UBytes; + unsigned CountVal = SizeVal / UBytes; + SDOperand Count = DAG.getIntPtrConstant(CountVal); + BytesLeft = SizeVal % UBytes; SDOperand InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Dest, InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, - Source, InFlag); + Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4768,57 +4744,28 @@ SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, Ops.push_back(Chain); Ops.push_back(DAG.getValueType(AVT)); Ops.push_back(InFlag); - Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); + Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size())); if (BytesLeft) { - // Issue loads and stores for the last 1 - 7 bytes. - unsigned Offset = Size - BytesLeft; - SDOperand DstAddr = Dest; - MVT::ValueType DstVT = DstAddr.getValueType(); - SDOperand SrcAddr = Source; - MVT::ValueType SrcVT = SrcAddr.getValueType(); - SDOperand Value; - if (BytesLeft >= 4) { - Value = DAG.getLoad(MVT::i32, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getLoad(MVT::i16, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType DstVT = Dst.getValueType(); + MVT::ValueType SrcVT = Src.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); - if (BytesLeft == 1) { - Value = DAG.getLoad(MVT::i8, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - } + Results.push_back(DAG.getMemcpy(Chain, + DAG.getNode(ISD::ADD, DstVT, Dst, + DAG.getConstant(Offset, + DstVT)), + DAG.getNode(ISD::ADD, SrcVT, Src, + DAG.getConstant(Offset, + SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, AlwaysInline, + DstSV, Offset, SrcSV, Offset)); } - return Chain; + return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); } /// Expand the result of: i64,outchain = READCYCLECOUNTER inchain @@ -5430,8 +5377,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::CALL: return LowerCALL(Op, DAG); case ISD::RET: return LowerRET(Op, DAG); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); - case ISD::MEMSET: return LowerMEMSET(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index d8099506fd6..2abe237ed82 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -441,8 +441,8 @@ namespace llvm { SDOperand Ret, SelectionDAG &DAG) const; - virtual const TargetSubtarget* getSubtarget() { - return static_cast<const TargetSubtarget*>(Subtarget); + virtual const X86Subtarget* getSubtarget() { + return Subtarget; } /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is @@ -512,9 +512,6 @@ namespace llvm { SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG); SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG); - SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source, - SDOperand Chain, unsigned Size, unsigned Align, - SelectionDAG &DAG); SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG); SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG); SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG); @@ -535,6 +532,19 @@ namespace llvm { SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG); + + SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff); + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); }; } |