diff options
| author | Dan Gohman <gohman@apple.com> | 2008-04-12 04:36:06 +0000 |
|---|---|---|
| committer | Dan Gohman <gohman@apple.com> | 2008-04-12 04:36:06 +0000 |
| commit | 544ab2c50ba1acb803e57519ebf7ec81c3340f79 (patch) | |
| tree | 53c0609d21cde14b6d7c7bd5e809f3b3a4060d25 /llvm/lib/Target | |
| parent | 8c7cf88f7ea574d5c3831e0c50655e5ab60af85d (diff) | |
| download | bcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.tar.gz bcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.zip | |
Drop ISD::MEMSET, ISD::MEMMOVE, and ISD::MEMCPY, which are not Legal
on any current target and aren't optimized in DAGCombiner. Instead
of using intermediate nodes, expand the operations, choosing between
simple loads/stores, target-specific code, and library calls,
immediately.
Previously, the code to emit optimized code for these operations
was only used at initial SelectionDAG construction time; now it is
used at all times. This fixes some cases where rep;movs was being
used for small copies where simple loads/stores would be better.
This also cleans up code that checks for alignments less than 4;
let the targets make that decision instead of doing it in
target-independent code. This allows x86 to use rep;movs in
low-alignment cases.
Also, this fixes a bug that resulted in the use of rep;stos for
memsets of 0 with non-constant memory size when the alignment was
at least 4. It's better to use the library in this case, which
can be significantly faster when the size is large.
This also preserves more SourceValue information when memory
intrinsics are lowered into simple loads/stores.
llvm-svn: 49572
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 54 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMSubtarget.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/Alpha/AlphaISelLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/CellSPU/SPUISelLowering.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/IA64/IA64ISelLowering.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/Mips/MipsISelLowering.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/Sparc/SparcISelLowering.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 271 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 20 |
11 files changed, 167 insertions, 221 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 7218560cc6c..0095352c415 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -197,11 +197,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - // Expand mem operations genericly. - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - // Use the default implementation. setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAARG , MVT::Other, Expand); @@ -1246,18 +1241,30 @@ static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } -SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { +SDOperand +ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff){ // Do repeated 4-byte loads and stores. To be improved. - assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!"); - unsigned BytesLeft = Size & 3; - unsigned NumMemOps = Size >> 2; + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDOperand(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDOperand(); + uint64_t SizeVal = ConstantSize->getValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDOperand(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; - unsigned SrcOff = 0, DstOff = 0; MVT::ValueType VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; @@ -1272,9 +1279,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + DAG.getNode(ISD::ADD, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcOff); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -1283,9 +1290,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + DAG.getNode(ISD::ADD, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstOff); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i); @@ -1309,9 +1316,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, } Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + DAG.getNode(ISD::ADD, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcOff); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -1331,9 +1338,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, } TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + DAG.getNode(ISD::ADD, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstOff); ++i; DstOff += VTSize; BytesLeft -= VTSize; @@ -1409,7 +1416,6 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: break; case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 285a20d23f8..58d8d8c6c86 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -119,8 +119,8 @@ namespace llvm { getRegClassForInlineAsmConstraint(const std::string &Constraint, MVT::ValueType VT) const; - virtual const TargetSubtarget* getSubtarget() { - return static_cast<const TargetSubtarget*>(Subtarget); + virtual const ARMSubtarget* getSubtarget() { + return Subtarget; } private: @@ -143,11 +143,14 @@ namespace llvm { SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG); - SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest, - SDOperand Source, unsigned Size, - unsigned Align, SelectionDAG &DAG); - + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); }; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index c43924b53df..fbc9e579df1 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -62,6 +62,8 @@ protected: /// ARMSubtarget(const Module &M, const std::string &FS, bool thumb); + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size + /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. // Change this once Thumb ldmia / stmia support is added. diff --git a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp index d208f59e4f6..91b118029a3 100644 --- a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp @@ -87,10 +87,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::SDIV , MVT::i64, Custom); setOperationAction(ISD::UDIV , MVT::i64, Custom); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Expand); - // We don't support sin/cos/sqrt/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); diff --git a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp index 29226092688..1cb691882de 100644 --- a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp @@ -175,9 +175,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); // SPU has no intrinsics for these particular operations: - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); // PowerPC has no SREM/UREM instructions diff --git a/llvm/lib/Target/IA64/IA64ISelLowering.cpp b/llvm/lib/Target/IA64/IA64ISelLowering.cpp index 2ec08b60b6f..c53f3b44eba 100644 --- a/llvm/lib/Target/IA64/IA64ISelLowering.cpp +++ b/llvm/lib/Target/IA64/IA64ISelLowering.cpp @@ -65,9 +65,6 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) setOperationAction(ISD::UREM , MVT::f32 , Expand); setOperationAction(ISD::UREM , MVT::f64 , Expand); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 5c2e1c0190a..5ea9cdd9c25 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -80,9 +80,6 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // Mips not supported intrinsics. - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ddc8e1a7859..e42e9dcba05 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -78,9 +78,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); // PowerPC has no intrinsics for these particular operations - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); // PowerPC has no SREM/UREM instructions @@ -1735,10 +1732,9 @@ static SDOperand CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, unsigned Size) { - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); - SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); - SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); + SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false, + NULL, 0, NULL, 0); } SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 1d4fe0bc8cb..3d5ad0b7402 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -570,9 +570,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // SPARC has no intrinsics for these particular operations. - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66384f921c2..9db0288c4e3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -206,7 +206,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BRCOND , MVT::Other, Custom); setOperationAction(ISD::BR_CC , MVT::Other, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); @@ -281,9 +280,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - // X86 wants to expand memset / memcpy itself. - setOperationAction(ISD::MEMSET , MVT::Other, Custom); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); @@ -1113,10 +1109,10 @@ CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain, static SDOperand CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); - SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/true, + NULL, 0, NULL, 0); } SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, @@ -4557,52 +4553,51 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); } -SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { - SDOperand InFlag(0, 0); - SDOperand Chain = Op.getOperand(0); - unsigned Align = - (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); - if (Align == 0) Align = 1; - - ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); - // If not DWORD aligned or size is more than the threshold, call memset. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { +SDOperand +X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff) { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + + /// If not DWORD aligned or size is more than the threshold, call the library. + /// The libc version is likely to be faster for these cases. It can use the + /// address value and run time information about the CPU. + if ((Align & 3) == 0 || + !ConstantSize || + ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) { + SDOperand InFlag(0, 0); // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Op.getOperand(2)); - const char *bzeroEntry = - V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0; - - MVT::ValueType IntPtr = getPointerTy(); - const Type *IntPtrTy = getTargetData()->getIntPtrType(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Op.getOperand(1); - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - - if (!bzeroEntry) { - // Extend the unsigned i8 argument to be an int value for the call. - Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); + if (const char *bzeroEntry = + V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + MVT::ValueType IntPtr = getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair<SDOperand,SDOperand> CallResult = + LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, + false, DAG.getExternalSymbol(bzeroEntry, IntPtr), + Args, DAG); + return CallResult.second; } - Entry.Node = Op.getOperand(3); - Args.push_back(Entry); - const char *Name = bzeroEntry ? bzeroEntry : "memset"; - std::pair<SDOperand,SDOperand> CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, - false, DAG.getExternalSymbol(Name, IntPtr), Args, DAG); - return CallResult.second; + // Otherwise have the target-independent code call memset. + return SDOperand(); } + uint64_t SizeVal = ConstantSize->getValue(); + SDOperand InFlag(0, 0); MVT::ValueType AVT; SDOperand Count; - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { @@ -4630,22 +4625,14 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; - Count = Op.getOperand(3); + Count = Size; break; } if (AVT > MVT::i8) { - if (I) { - unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - Count = DAG.getIntPtrConstant(I->getValue() / UBytes); - BytesLeft = I->getValue() % UBytes; - } else { - assert(AVT >= MVT::i32 && - "Do not use rep;stos if not at least DWORD aligned"); - Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), - Op.getOperand(3), DAG.getConstant(2, MVT::i8)); - TwoRepStos = true; - } + unsigned UBytes = MVT::getSizeInBits(AVT) / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), @@ -4653,8 +4640,8 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { InFlag = Chain.getValue(1); } else { AVT = MVT::i8; - Count = Op.getOperand(3); - Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); + Count = Size; + Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } @@ -4662,7 +4649,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Op.getOperand(1), InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4674,7 +4661,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { if (TwoRepStos) { InFlag = Chain.getValue(1); - Count = Op.getOperand(3); + Count = Size; MVT::ValueType CVT = Count.getValueType(); SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); @@ -4688,79 +4675,68 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(InFlag); Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); } else if (BytesLeft) { - // Issue stores for the last 1 - 7 bytes. - SDOperand Value; - unsigned Val = ValC->getValue() & 255; - unsigned Offset = I->getValue() - BytesLeft; - SDOperand DstAddr = Op.getOperand(1); - MVT::ValueType AddrVT = DstAddr.getValueType(); - if (BytesLeft >= 4) { - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - Value = DAG.getConstant(Val, MVT::i32); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getConstant((Val << 8) | Val, MVT::i16); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } - if (BytesLeft == 1) { - Value = DAG.getConstant(Val, MVT::i8); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType AddrVT = Dst.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, + DAG.getNode(ISD::ADD, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, DstSV, Offset); } + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; } -SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { +SDOperand +X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff){ + + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDOperand(); + uint64_t SizeVal = ConstantSize->getValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDOperand(); + + SmallVector<SDOperand, 4> Results; + MVT::ValueType AVT; unsigned BytesLeft = 0; - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; - break; - default: // Byte aligned - AVT = MVT::i8; - break; - } + if (Align >= 8 && Subtarget->is64Bit()) + AVT = MVT::i64; + else if (Align >= 4) + AVT = MVT::i32; + else if (Align >= 2) + AVT = MVT::i16; + else + AVT = MVT::i8; unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - SDOperand Count = DAG.getIntPtrConstant(Size / UBytes); - BytesLeft = Size % UBytes; + unsigned CountVal = SizeVal / UBytes; + SDOperand Count = DAG.getIntPtrConstant(CountVal); + BytesLeft = SizeVal % UBytes; SDOperand InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Dest, InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, - Source, InFlag); + Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4768,57 +4744,28 @@ SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, Ops.push_back(Chain); Ops.push_back(DAG.getValueType(AVT)); Ops.push_back(InFlag); - Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); + Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size())); if (BytesLeft) { - // Issue loads and stores for the last 1 - 7 bytes. - unsigned Offset = Size - BytesLeft; - SDOperand DstAddr = Dest; - MVT::ValueType DstVT = DstAddr.getValueType(); - SDOperand SrcAddr = Source; - MVT::ValueType SrcVT = SrcAddr.getValueType(); - SDOperand Value; - if (BytesLeft >= 4) { - Value = DAG.getLoad(MVT::i32, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getLoad(MVT::i16, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType DstVT = Dst.getValueType(); + MVT::ValueType SrcVT = Src.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); - if (BytesLeft == 1) { - Value = DAG.getLoad(MVT::i8, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - } + Results.push_back(DAG.getMemcpy(Chain, + DAG.getNode(ISD::ADD, DstVT, Dst, + DAG.getConstant(Offset, + DstVT)), + DAG.getNode(ISD::ADD, SrcVT, Src, + DAG.getConstant(Offset, + SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, AlwaysInline, + DstSV, Offset, SrcSV, Offset)); } - return Chain; + return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); } /// Expand the result of: i64,outchain = READCYCLECOUNTER inchain @@ -5430,8 +5377,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::CALL: return LowerCALL(Op, DAG); case ISD::RET: return LowerRET(Op, DAG); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); - case ISD::MEMSET: return LowerMEMSET(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index d8099506fd6..2abe237ed82 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -441,8 +441,8 @@ namespace llvm { SDOperand Ret, SelectionDAG &DAG) const; - virtual const TargetSubtarget* getSubtarget() { - return static_cast<const TargetSubtarget*>(Subtarget); + virtual const X86Subtarget* getSubtarget() { + return Subtarget; } /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is @@ -512,9 +512,6 @@ namespace llvm { SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG); SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG); - SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source, - SDOperand Chain, unsigned Size, unsigned Align, - SelectionDAG &DAG); SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG); SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG); SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG); @@ -535,6 +532,19 @@ namespace llvm { SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG); + + SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff); + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); }; } |

