diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 3 |
4 files changed, 44 insertions, 9 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 52c5b688d35..43b9892fc5a 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -226,9 +226,17 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { return LoopHasReductions; } -bool PPCTTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) { - MaxLoadSize = 8; - return true; +const PPCTTIImpl::TTI::MemCmpExpansionOptions * +PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const { + static const auto Options = []() { + TTI::MemCmpExpansionOptions Options; + Options.LoadSizes.push_back(8); + Options.LoadSizes.push_back(4); + Options.LoadSizes.push_back(2); + Options.LoadSizes.push_back(1); + return Options; + }(); + return &Options; } bool PPCTTIImpl::enableInterleavedAccessVectorization() { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 60dea0b0226..acf5066bc6d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -63,7 +63,8 @@ public: /// @{ bool enableAggressiveInterleaving(bool LoopHasReductions); - bool enableMemCmpExpansion(unsigned &MaxLoadSize); + const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const; bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector) const; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index f54728d4482..effbd07fa31 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2536,10 +2536,35 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, return (CallerBits & CalleeBits) == CalleeBits; } -bool X86TTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) { - // TODO: We can increase these based on available vector ops. - MaxLoadSize = ST->is64Bit() ? 8 : 4; - return true; +const X86TTIImpl::TTI::MemCmpExpansionOptions * +X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const { + // Only enable vector loads for equality comparison. + // Right now the vector version is not as fast, see #33329. + static const auto ThreeWayOptions = [this]() { + TTI::MemCmpExpansionOptions Options; + if (ST->is64Bit()) { + Options.LoadSizes.push_back(8); + } + Options.LoadSizes.push_back(4); + Options.LoadSizes.push_back(2); + Options.LoadSizes.push_back(1); + return Options; + }(); + static const auto EqZeroOptions = [this]() { + TTI::MemCmpExpansionOptions Options; + // TODO: enable AVX512 when the DAG is ready. + // if (ST->hasAVX512()) Options.LoadSizes.push_back(64); + if (ST->hasAVX2()) Options.LoadSizes.push_back(32); + if (ST->hasSSE2()) Options.LoadSizes.push_back(16); + if (ST->is64Bit()) { + Options.LoadSizes.push_back(8); + } + Options.LoadSizes.push_back(4); + Options.LoadSizes.push_back(2); + Options.LoadSizes.push_back(1); + return Options; + }(); + return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions; } bool X86TTIImpl::enableInterleavedAccessVectorization() { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 0d2c90dc58b..5cb5c0cc298 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -127,7 +127,8 @@ public: bool hasDivRemOp(Type *DataType, bool IsSigned); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - bool enableMemCmpExpansion(unsigned &MaxLoadSize); + const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const; bool enableInterleavedAccessVectorization(); private: int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask, |

