summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-06-20 15:58:30 +0000
committerSanjay Patel <spatel@rotateright.com>2017-06-20 15:58:30 +0000
commit0656629b870ae9933e350c5f4edc733012e7ece0 (patch)
tree82fc84249670cc54145d258342581ecd40b81bec /llvm/lib/Target/X86
parent4822b5b649f0086aa8339c2def1dbdd303dcb257 (diff)
downloadbcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.tar.gz
bcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.zip
[x86] enable CGP memcmp() expansion for 2/4/8 byte sizes
There are a couple of potential improvements as seen in the IR and asm: 1. We're unnecessarily extending to a larger type to compare values. 2. The codegen for (select cond, 1, -1) could avoid a cmov. (or we could change the order of the compares, so we have a select with 0 operand) llvm-svn: 305802
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp6
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h2
3 files changed, 13 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eacf2e55143..8dfaf3f080e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1662,6 +1662,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
+
+ // TODO: These control memcmp expansion in CGP and are set low to prevent
+ // altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder.
+ MaxLoadsPerMemcmp = 1;
+ MaxLoadsPerMemcmpOptSize = 1;
+
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 1d58cccc308..f13933e9288 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2232,6 +2232,12 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
return (CallerBits & CalleeBits) == CalleeBits;
}
+bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) {
+ // TODO: We can increase these based on available vector ops.
+ MaxLoadSize = ST->is64Bit() ? 8 : 4;
+ return true;
+}
+
bool X86TTIImpl::enableInterleavedAccessVectorization() {
// TODO: We expect this to be beneficial regardless of arch,
// but there are currently some unexplained performance artifacts on Atom.
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index b907b7556a1..375fb924c2c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -107,7 +107,7 @@ public:
bool isLegalMaskedScatter(Type *DataType);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
-
+ bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
bool enableInterleavedAccessVectorization();
private:
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
OpenPOWER on IntegriCloud