summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorClement Courbet <courbet@google.com>2017-10-30 14:19:33 +0000
committerClement Courbet <courbet@google.com>2017-10-30 14:19:33 +0000
commitb2c3eb8cf18fd749eb6f71314a7f657dcbfb4b84 (patch)
treed54c47b5bd661ab5a5fa46864fad656e58b0cc72 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parentbef1c56724dbb566af0f0ce18fc30cf43ba82fda (diff)
downloadbcm5719-llvm-b2c3eb8cf18fd749eb6f71314a7f657dcbfb4b84.tar.gz
bcm5719-llvm-b2c3eb8cf18fd749eb6f71314a7f657dcbfb4b84.zip
[CodeGen][ExpandMemcmp] Allow memcmp to expand to vector loads (2).
- Targets that want to support memcmp expansions now return the list of supported load sizes. - Expansion codegen does not assume that all power-of-two load sizes smaller than the max load size are valid. For examples, this is not the case for x86(32bit)+sse2. Fixes PR34887. llvm-svn: 316905
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp33
1 files changed, 29 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index f54728d4482..effbd07fa31 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2536,10 +2536,35 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
return (CallerBits & CalleeBits) == CalleeBits;
}
-bool X86TTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) {
- // TODO: We can increase these based on available vector ops.
- MaxLoadSize = ST->is64Bit() ? 8 : 4;
- return true;
+const X86TTIImpl::TTI::MemCmpExpansionOptions *
+X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
+ // Only enable vector loads for equality comparison.
+ // Right now the vector version is not as fast, see #33329.
+ static const auto ThreeWayOptions = [this]() {
+ TTI::MemCmpExpansionOptions Options;
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ static const auto EqZeroOptions = [this]() {
+ TTI::MemCmpExpansionOptions Options;
+ // TODO: enable AVX512 when the DAG is ready.
+ // if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
+ if (ST->hasAVX2()) Options.LoadSizes.push_back(32);
+ if (ST->hasSSE2()) Options.LoadSizes.push_back(16);
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
}
bool X86TTIImpl::enableInterleavedAccessVectorization() {
OpenPOWER on IntegriCloud