summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
authorClement Courbet <courbet@google.com>2017-10-27 12:34:18 +0000
committerClement Courbet <courbet@google.com>2017-10-27 12:34:18 +0000
commitbe684eee8291a44fc4f65f53335e224a2feb7e71 (patch)
treee894bed3faac94708351aea52a636a320bd7dccb /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent131f98f054e480cf2533e9754f625c131dc72c95 (diff)
downloadbcm5719-llvm-be684eee8291a44fc4f65f53335e224a2feb7e71.tar.gz
bcm5719-llvm-be684eee8291a44fc4f65f53335e224a2feb7e71.zip
[CodeGen][ExpandMemCmp][NFC] Simplify load sequence generation.
llvm-svn: 316763
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp73
1 files changed, 33 insertions, 40 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 1e5f15397bb..346248ffc06 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1743,7 +1743,6 @@ class MemCmpExpansion {
const uint64_t Offset;
};
SmallVector<LoadEntry, 8> LoadSequence;
- void computeLoadSequence();
void createLoadCmpBlocks();
void createResultBlock();
@@ -1759,18 +1758,13 @@ class MemCmpExpansion {
Value *getMemCmpEqZeroOneBlock();
Value *getMemCmpOneBlock();
- // Computes the decomposition. THis is the common code to compute the number
- // of loads and the actual load sequence. `callback` is called with each load
- // size and number of loads for the block size.
- template <typename CallBackT>
- void getDecomposition(CallBackT callback) const;
-
public:
MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
- unsigned NumLoadsPerBlock, const DataLayout &DL);
+ unsigned MaxNumLoads, unsigned NumLoadsPerBlock,
+ const DataLayout &DL);
unsigned getNumBlocks();
- uint64_t getNumLoads() const { return NumLoads; }
+ uint64_t getNumLoads() const { return LoadSequence.size(); }
Value *getMemCmpExpansion();
};
@@ -1787,6 +1781,7 @@ class MemCmpExpansion {
// LoadCmpBlock finds a difference.
MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
const unsigned MaxLoadSize,
+ const unsigned MaxNumLoads,
const unsigned LoadsPerBlock,
const DataLayout &TheDataLayout)
: CI(CI),
@@ -1798,27 +1793,34 @@ MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size,
IsUsedForZeroCmp(isOnlyUsedInZeroEqualityComparison(CI)),
DL(TheDataLayout),
Builder(CI) {
+ assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
while (this->MaxLoadSize > Size) {
this->MaxLoadSize /= 2;
}
- // Compute the number of loads. At that point we don't want to compute the
- // actual decomposition because it might be too large to fit in memory.
- getDecomposition([this](unsigned LoadSize, uint64_t NumLoadsForSize) {
- NumLoads += NumLoadsForSize;
- });
-}
-
-template <typename CallBackT>
-void MemCmpExpansion::getDecomposition(CallBackT callback) const {
+ // Compute the decomposition.
unsigned LoadSize = this->MaxLoadSize;
- assert(Size > 0 && "zero blocks");
uint64_t CurSize = Size;
+ uint64_t Offset = 0;
while (CurSize) {
assert(LoadSize > 0 && "zero load size");
const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
+ if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+ // Do not expand if the total number of loads is larger than what the
+ // target allows. Note that it's important that we exit before completing
+ // the expansion to avoid using a ton of memory to store the expansion for
+ // large sizes.
+ LoadSequence.clear();
+ return;
+ }
if (NumLoadsForThisSize > 0) {
- callback(LoadSize, NumLoadsForThisSize);
+ for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+ LoadSequence.push_back({LoadSize, Offset});
+ Offset += LoadSize;
+ }
+ if (LoadSize > 1) {
+ ++NumLoadsNonOneByte;
+ }
CurSize = CurSize % LoadSize;
}
// FIXME: This can result in a non-native load size (e.g. X86-32+SSE can
@@ -1827,21 +1829,7 @@ void MemCmpExpansion::getDecomposition(CallBackT callback) const {
// 4).
LoadSize /= 2;
}
-}
-
-void MemCmpExpansion::computeLoadSequence() {
- uint64_t Offset = 0;
- getDecomposition(
- [this, &Offset](unsigned LoadSize, uint64_t NumLoadsForSize) {
- for (uint64_t I = 0; I < NumLoadsForSize; ++I) {
- LoadSequence.push_back({LoadSize, Offset});
- Offset += LoadSize;
- }
- if (LoadSize > 1) {
- ++NumLoadsNonOneByte;
- }
- });
- assert(LoadSequence.size() == getNumLoads() && "mismatch in numbe rof loads");
+ assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
}
unsigned MemCmpExpansion::getNumBlocks() {
@@ -2241,7 +2229,6 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion() {
- computeLoadSequence();
// A memcmp with zero-comparison with only one block of load and compare does
// not need to set up any extra blocks. This case could be handled in the DAG,
// but since we have all of the machinery to flexibly expand any memcpy here,
@@ -2372,17 +2359,23 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
const uint64_t SizeVal = SizeCast->getZExtValue();
+ if (SizeVal == 0) {
+ return false;
+ }
+
// TTI call to check if target would like to expand memcmp. Also, get the
// max LoadSize.
unsigned MaxLoadSize;
if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return false;
- MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MemCmpNumLoadsPerBlock,
- *DL);
+ const unsigned MaxNumLoads =
+ TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+
+ MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MaxNumLoads,
+ MemCmpNumLoadsPerBlock, *DL);
// Don't expand if this will require more loads than desired by the target.
- if (Expansion.getNumLoads() >
- TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
+ if (Expansion.getNumLoads() == 0) {
NumMemCmpGreaterThanMax++;
return false;
}
OpenPOWER on IntegriCloud