diff options
author | Clement Courbet <courbet@google.com> | 2017-10-27 12:34:18 +0000 |
---|---|---|
committer | Clement Courbet <courbet@google.com> | 2017-10-27 12:34:18 +0000 |
commit | be684eee8291a44fc4f65f53335e224a2feb7e71 (patch) | |
tree | e894bed3faac94708351aea52a636a320bd7dccb /llvm/lib/CodeGen | |
parent | 131f98f054e480cf2533e9754f625c131dc72c95 (diff) | |
download | bcm5719-llvm-be684eee8291a44fc4f65f53335e224a2feb7e71.tar.gz bcm5719-llvm-be684eee8291a44fc4f65f53335e224a2feb7e71.zip |
[CodeGen][ExpandMemCmp][NFC] Simplify load sequence generation.
llvm-svn: 316763
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 73 |
1 files changed, 33 insertions, 40 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 1e5f15397bb..346248ffc06 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1743,7 +1743,6 @@ class MemCmpExpansion { const uint64_t Offset; }; SmallVector<LoadEntry, 8> LoadSequence; - void computeLoadSequence(); void createLoadCmpBlocks(); void createResultBlock(); @@ -1759,18 +1758,13 @@ class MemCmpExpansion { Value *getMemCmpEqZeroOneBlock(); Value *getMemCmpOneBlock(); - // Computes the decomposition. THis is the common code to compute the number - // of loads and the actual load sequence. `callback` is called with each load - // size and number of loads for the block size. - template <typename CallBackT> - void getDecomposition(CallBackT callback) const; - public: MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize, - unsigned NumLoadsPerBlock, const DataLayout &DL); + unsigned MaxNumLoads, unsigned NumLoadsPerBlock, + const DataLayout &DL); unsigned getNumBlocks(); - uint64_t getNumLoads() const { return NumLoads; } + uint64_t getNumLoads() const { return LoadSequence.size(); } Value *getMemCmpExpansion(); }; @@ -1787,6 +1781,7 @@ class MemCmpExpansion { // LoadCmpBlock finds a difference. MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size, const unsigned MaxLoadSize, + const unsigned MaxNumLoads, const unsigned LoadsPerBlock, const DataLayout &TheDataLayout) : CI(CI), @@ -1798,27 +1793,34 @@ MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size, IsUsedForZeroCmp(isOnlyUsedInZeroEqualityComparison(CI)), DL(TheDataLayout), Builder(CI) { + assert(Size > 0 && "zero blocks"); // Scale the max size down if the target can load more bytes than we need. while (this->MaxLoadSize > Size) { this->MaxLoadSize /= 2; } - // Compute the number of loads. At that point we don't want to compute the - // actual decomposition because it might be too large to fit in memory. - getDecomposition([this](unsigned LoadSize, uint64_t NumLoadsForSize) { - NumLoads += NumLoadsForSize; - }); -} - -template <typename CallBackT> -void MemCmpExpansion::getDecomposition(CallBackT callback) const { + // Compute the decomposition. unsigned LoadSize = this->MaxLoadSize; - assert(Size > 0 && "zero blocks"); uint64_t CurSize = Size; + uint64_t Offset = 0; while (CurSize) { assert(LoadSize > 0 && "zero load size"); const uint64_t NumLoadsForThisSize = CurSize / LoadSize; + if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { + // Do not expand if the total number of loads is larger than what the + // target allows. Note that it's important that we exit before completing + // the expansion to avoid using a ton of memory to store the expansion for + // large sizes. + LoadSequence.clear(); + return; + } if (NumLoadsForThisSize > 0) { - callback(LoadSize, NumLoadsForThisSize); + for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { + LoadSequence.push_back({LoadSize, Offset}); + Offset += LoadSize; + } + if (LoadSize > 1) { + ++NumLoadsNonOneByte; + } CurSize = CurSize % LoadSize; } // FIXME: This can result in a non-native load size (e.g. X86-32+SSE can @@ -1827,21 +1829,7 @@ void MemCmpExpansion::getDecomposition(CallBackT callback) const { // 4). LoadSize /= 2; } -} - -void MemCmpExpansion::computeLoadSequence() { - uint64_t Offset = 0; - getDecomposition( - [this, &Offset](unsigned LoadSize, uint64_t NumLoadsForSize) { - for (uint64_t I = 0; I < NumLoadsForSize; ++I) { - LoadSequence.push_back({LoadSize, Offset}); - Offset += LoadSize; - } - if (LoadSize > 1) { - ++NumLoadsNonOneByte; - } - }); - assert(LoadSequence.size() == getNumLoads() && "mismatch in numbe rof loads"); + assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); } unsigned MemCmpExpansion::getNumBlocks() { @@ -2241,7 +2229,6 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { // This function expands the memcmp call into an inline expansion and returns // the memcmp result. Value *MemCmpExpansion::getMemCmpExpansion() { - computeLoadSequence(); // A memcmp with zero-comparison with only one block of load and compare does // not need to set up any extra blocks. This case could be handled in the DAG, // but since we have all of the machinery to flexibly expand any memcpy here, @@ -2372,17 +2359,23 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, } const uint64_t SizeVal = SizeCast->getZExtValue(); + if (SizeVal == 0) { + return false; + } + // TTI call to check if target would like to expand memcmp. Also, get the // max LoadSize. unsigned MaxLoadSize; if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return false; - MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MemCmpNumLoadsPerBlock, - *DL); + const unsigned MaxNumLoads = + TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + + MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MaxNumLoads, + MemCmpNumLoadsPerBlock, *DL); // Don't expand if this will require more loads than desired by the target. - if (Expansion.getNumLoads() > - TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) { + if (Expansion.getNumLoads() == 0) { NumMemCmpGreaterThanMax++; return false; } |