summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp33
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h3
4 files changed, 44 insertions, 9 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 52c5b688d35..43b9892fc5a 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -226,9 +226,17 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
return LoopHasReductions;
}
-bool PPCTTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) {
- MaxLoadSize = 8;
- return true;
+const PPCTTIImpl::TTI::MemCmpExpansionOptions *
+PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
+ static const auto Options = []() {
+ TTI::MemCmpExpansionOptions Options;
+ Options.LoadSizes.push_back(8);
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ return &Options;
}
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 60dea0b0226..acf5066bc6d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -63,7 +63,8 @@ public:
/// @{
bool enableAggressiveInterleaving(bool LoopHasReductions);
- bool enableMemCmpExpansion(unsigned &MaxLoadSize);
+ const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index f54728d4482..effbd07fa31 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2536,10 +2536,35 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
return (CallerBits & CalleeBits) == CalleeBits;
}
-bool X86TTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) {
- // TODO: We can increase these based on available vector ops.
- MaxLoadSize = ST->is64Bit() ? 8 : 4;
- return true;
+const X86TTIImpl::TTI::MemCmpExpansionOptions *
+X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
+ // Only enable vector loads for equality comparison.
+ // Right now the vector version is not as fast, see #33329.
+ static const auto ThreeWayOptions = [this]() {
+ TTI::MemCmpExpansionOptions Options;
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ static const auto EqZeroOptions = [this]() {
+ TTI::MemCmpExpansionOptions Options;
+ // TODO: enable AVX512 when the DAG is ready.
+ // if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
+ if (ST->hasAVX2()) Options.LoadSizes.push_back(32);
+ if (ST->hasSSE2()) Options.LoadSizes.push_back(16);
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
}
bool X86TTIImpl::enableInterleavedAccessVectorization() {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 0d2c90dc58b..5cb5c0cc298 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -127,7 +127,8 @@ public:
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- bool enableMemCmpExpansion(unsigned &MaxLoadSize);
+ const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
private:
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
OpenPOWER on IntegriCloud