summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2008-04-12 04:36:06 +0000
committerDan Gohman <gohman@apple.com>2008-04-12 04:36:06 +0000
commit544ab2c50ba1acb803e57519ebf7ec81c3340f79 (patch)
tree53c0609d21cde14b6d7c7bd5e809f3b3a4060d25 /llvm/lib/Target/ARM
parent8c7cf88f7ea574d5c3831e0c50655e5ab60af85d (diff)
downloadbcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.tar.gz
bcm5719-llvm-544ab2c50ba1acb803e57519ebf7ec81c3340f79.zip
Drop ISD::MEMSET, ISD::MEMMOVE, and ISD::MEMCPY, which are not Legal
on any current target and aren't optimized in DAGCombiner. Instead of using intermediate nodes, expand the operations, choosing between simple loads/stores, target-specific code, and library calls, immediately. Previously, the code to emit optimized code for these operations was only used at initial SelectionDAG construction time; now it is used at all times. This fixes some cases where rep;movs was being used for small copies where simple loads/stores would be better. This also cleans up code that checks for alignments less than 4; let the targets make that decision instead of doing it in target-independent code. This allows x86 to use rep;movs in low-alignment cases. Also, this fixes a bug that resulted in the use of rep;stos for memsets of 0 with non-constant memory size when the alignment was at least 4. It's better to use the library in this case, which can be significantly faster when the size is large. This also preserves more SourceValue information when memory intrinsics are lowered into simple loads/stores. llvm-svn: 49572
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp54
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h15
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h2
3 files changed, 41 insertions, 30 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7218560cc6c..0095352c415 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -197,11 +197,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
- // Expand mem operations genericly.
- setOperationAction(ISD::MEMSET , MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
- setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
-
// Use the default implementation.
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAARG , MVT::Other, Expand);
@@ -1246,18 +1241,30 @@ static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
}
-SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- unsigned Size,
- unsigned Align,
- SelectionDAG &DAG) {
+SDOperand
+ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff){
// Do repeated 4-byte loads and stores. To be improved.
- assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!");
- unsigned BytesLeft = Size & 3;
- unsigned NumMemOps = Size >> 2;
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDOperand();
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDOperand();
+ uint64_t SizeVal = ConstantSize->getValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDOperand();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
unsigned EmittedNumMemOps = 0;
- unsigned SrcOff = 0, DstOff = 0;
MVT::ValueType VT = MVT::i32;
unsigned VTSize = 4;
unsigned i = 0;
@@ -1272,9 +1279,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
Loads[i] = DAG.getLoad(VT, Chain,
- DAG.getNode(ISD::ADD, MVT::i32, Source,
+ DAG.getNode(ISD::ADD, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- NULL, 0);
+ SrcSV, SrcOff);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
@@ -1283,9 +1290,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
TFOps[i] = DAG.getStore(Chain, Loads[i],
- DAG.getNode(ISD::ADD, MVT::i32, Dest,
+ DAG.getNode(ISD::ADD, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- NULL, 0);
+ DstSV, DstOff);
DstOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i);
@@ -1309,9 +1316,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
}
Loads[i] = DAG.getLoad(VT, Chain,
- DAG.getNode(ISD::ADD, MVT::i32, Source,
+ DAG.getNode(ISD::ADD, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- NULL, 0);
+ SrcSV, SrcOff);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
@@ -1331,9 +1338,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
}
TFOps[i] = DAG.getStore(Chain, Loads[i],
- DAG.getNode(ISD::ADD, MVT::i32, Dest,
+ DAG.getNode(ISD::ADD, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- NULL, 0);
+ DstSV, DstOff);
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
@@ -1409,7 +1416,6 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::RETURNADDR: break;
case ISD::FRAMEADDR: break;
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
- case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 285a20d23f8..58d8d8c6c86 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -119,8 +119,8 @@ namespace llvm {
getRegClassForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const;
- virtual const TargetSubtarget* getSubtarget() {
- return static_cast<const TargetSubtarget*>(Subtarget);
+ virtual const ARMSubtarget* getSubtarget() {
+ return Subtarget;
}
private:
@@ -143,11 +143,14 @@ namespace llvm {
SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
- SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest,
- SDOperand Source, unsigned Size,
- unsigned Align, SelectionDAG &DAG);
-
+ SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff);
};
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index c43924b53df..fbc9e579df1 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -62,6 +62,8 @@ protected:
///
ARMSubtarget(const Module &M, const std::string &FS, bool thumb);
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const {
// FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
// Change this once Thumb ldmia / stmia support is added.
OpenPOWER on IntegriCloud