summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp5
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp51
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp33
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h3
-rw-r--r--llvm/lib/Transforms/Scalar/MergeICmps.cpp3
7 files changed, 74 insertions, 38 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c3185bf2bbd..53bedfe3f63 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -250,8 +250,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
-bool TargetTransformInfo::enableMemCmpExpansion(unsigned &MaxLoadSize) const {
- return TTIImpl->enableMemCmpExpansion(MaxLoadSize);
+const TargetTransformInfo::MemCmpExpansionOptions *
+TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
+ return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
}
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 1fbe57820c4..0a417e34084 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1758,9 +1758,10 @@ class MemCmpExpansion {
Value *getMemCmpOneBlock();
public:
- MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
- unsigned MaxNumLoads, unsigned NumLoadsPerBlock,
- const DataLayout &DL);
+ MemCmpExpansion(CallInst *CI, uint64_t Size,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
+ unsigned NumLoadsPerBlock, const DataLayout &DL);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -1778,29 +1779,32 @@ class MemCmpExpansion {
// return from.
// 3. ResultBlock, block to branch to for early exit when a
// LoadCmpBlock finds a difference.
-MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
- const unsigned MaxLoadSize,
- const unsigned MaxNumLoads,
- const unsigned LoadsPerBlock,
- const DataLayout &TheDataLayout)
+MemCmpExpansion::MemCmpExpansion(
+ CallInst *const CI, uint64_t Size,
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
+ const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
+ const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout)
: CI(CI),
Size(Size),
- MaxLoadSize(MaxLoadSize),
+ MaxLoadSize(0),
NumLoadsNonOneByte(0),
- NumLoadsPerBlock(LoadsPerBlock),
- IsUsedForZeroCmp(isOnlyUsedInZeroEqualityComparison(CI)),
+ NumLoadsPerBlock(NumLoadsPerBlock),
+ IsUsedForZeroCmp(IsUsedForZeroCmp),
DL(TheDataLayout),
Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
- while (this->MaxLoadSize > Size) {
- this->MaxLoadSize /= 2;
+ size_t LoadSizeIndex = 0;
+ while (LoadSizeIndex < Options.LoadSizes.size() &&
+ Options.LoadSizes[LoadSizeIndex] > Size) {
+ ++LoadSizeIndex;
}
+ this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
// Compute the decomposition.
- unsigned LoadSize = this->MaxLoadSize;
uint64_t CurSize = Size;
uint64_t Offset = 0;
- while (CurSize) {
+ while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
+ const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
assert(LoadSize > 0 && "zero load size");
const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
@@ -1821,11 +1825,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
}
CurSize = CurSize % LoadSize;
}
- // FIXME: This can result in a non-native load size (e.g. X86-32+SSE can
- // load 16 and 4 but not 8), which throws the load count off (e.g. in the
- // aforementioned case, 16 bytes will count for 2 loads but will generate
- // 4).
- LoadSize /= 2;
+ ++LoadSizeIndex;
}
assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
}
@@ -2362,15 +2362,16 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
// TTI call to check if target would like to expand memcmp. Also, get the
- // max LoadSize.
- unsigned MaxLoadSize;
- if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return false;
+ // available load sizes.
+ const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+ const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
+ if (!Options) return false;
const unsigned MaxNumLoads =
TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
- MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MaxNumLoads,
- MemCmpNumLoadsPerBlock, *DL);
+ MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
+ IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 52c5b688d35..43b9892fc5a 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -226,9 +226,17 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
return LoopHasReductions;
}
-bool PPCTTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) {
- MaxLoadSize = 8;
- return true;
+const PPCTTIImpl::TTI::MemCmpExpansionOptions *
+PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
+ static const auto Options = []() {
+ TTI::MemCmpExpansionOptions Options;
+ Options.LoadSizes.push_back(8);
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ return &Options;
}
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 60dea0b0226..acf5066bc6d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -63,7 +63,8 @@ public:
/// @{
bool enableAggressiveInterleaving(bool LoopHasReductions);
- bool enableMemCmpExpansion(unsigned &MaxLoadSize);
+ const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index f54728d4482..effbd07fa31 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2536,10 +2536,35 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
return (CallerBits & CalleeBits) == CalleeBits;
}
-bool X86TTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) {
- // TODO: We can increase these based on available vector ops.
- MaxLoadSize = ST->is64Bit() ? 8 : 4;
- return true;
+const X86TTIImpl::TTI::MemCmpExpansionOptions *
+X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
+ // Only enable vector loads for equality comparison.
+ // Right now the vector version is not as fast, see #33329.
+ static const auto ThreeWayOptions = [this]() {
+ TTI::MemCmpExpansionOptions Options;
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ static const auto EqZeroOptions = [this]() {
+ TTI::MemCmpExpansionOptions Options;
+ // TODO: enable AVX512 when the DAG is ready.
+ // if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
+ if (ST->hasAVX2()) Options.LoadSizes.push_back(32);
+ if (ST->hasSSE2()) Options.LoadSizes.push_back(16);
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
+ }();
+ return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
}
bool X86TTIImpl::enableInterleavedAccessVectorization() {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 0d2c90dc58b..5cb5c0cc298 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -127,7 +127,8 @@ public:
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- bool enableMemCmpExpansion(unsigned &MaxLoadSize);
+ const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
private:
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index 1244a9776fa..f4de036059e 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -625,8 +625,7 @@ PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
// We only try merging comparisons if the target wants to expand memcmp later.
// The rationale is to avoid turning small chains into memcmp calls.
- unsigned MaxLoadSize;
- if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all();
+ if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
bool MadeChange = false;
OpenPOWER on IntegriCloud