[x86] enable CGP memcmp() expansion for 2/4/8 byte sizes

There are a couple of potential improvements as seen in the IR and asm: 1. We're unnecessarily extending to a larger type to compare values. 2. The codegen for (select cond, 1, -1) could avoid a cmov. (or we could change the order of the compares, so we have a select with 0 operand) llvm-svn: 305802
author: Sanjay Patel <spatel@rotateright.com> 2017-06-20 15:58:30 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-06-20 15:58:30 +0000
commit: 0656629b870ae9933e350c5f4edc733012e7ece0 (patch)
tree: 82fc84249670cc54145d258342581ecd40b81bec /llvm/lib/Target/X86
parent: 4822b5b649f0086aa8339c2def1dbdd303dcb257 (diff)
download: bcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.tar.gz
bcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.zip
3 files changed, 13 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eacf2e55143..8dfaf3f080e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1662,6 +1662,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   MaxStoresPerMemcpyOptSize = 4;
   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
   MaxStoresPerMemmoveOptSize = 4;
+
+  // TODO: These control memcmp expansion in CGP and are set low to prevent
+  // altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder.
+  MaxLoadsPerMemcmp = 1;
+  MaxLoadsPerMemcmpOptSize = 1;
+
   // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
   setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
 
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 1d58cccc308..f13933e9288 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2232,6 +2232,12 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
   return (CallerBits & CalleeBits) == CalleeBits;
 }
 
+bool X86TTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) {
+  // TODO: We can increase these based on available vector ops.
+  MaxLoadSize = ST->is64Bit() ? 8 : 4;
+  return true;
+}
+
 bool X86TTIImpl::enableInterleavedAccessVectorization() {
   // TODO: We expect this to be beneficial regardless of arch,
   // but there are currently some unexplained performance artifacts on Atom.
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index b907b7556a1..375fb924c2c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -107,7 +107,7 @@ public:
   bool isLegalMaskedScatter(Type *DataType);
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
-
+  bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize);
   bool enableInterleavedAccessVectorization();
 private:
   int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
author	Sanjay Patel <spatel@rotateright.com>	2017-06-20 15:58:30 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-06-20 15:58:30 +0000
commit	0656629b870ae9933e350c5f4edc733012e7ece0 (patch)
tree	82fc84249670cc54145d258342581ecd40b81bec /llvm/lib/Target/X86
parent	4822b5b649f0086aa8339c2def1dbdd303dcb257 (diff)
download	bcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.tar.gz bcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.zip