diff options
| author | David L Kreitzer <david.l.kreitzer@intel.com> | 2016-04-13 14:31:06 +0000 |
|---|---|---|
| committer | David L Kreitzer <david.l.kreitzer@intel.com> | 2016-04-13 14:31:06 +0000 |
| commit | 752c1448fe9c61866393725704de259c9da096c6 (patch) | |
| tree | ca6b43e66128cb4b27e92a8decf54f920977fa98 /llvm/lib | |
| parent | 4fb7f509fd19297df047db074bbb2581c88408bc (diff) | |
| download | bcm5719-llvm-752c1448fe9c61866393725704de259c9da096c6.tar.gz bcm5719-llvm-752c1448fe9c61866393725704de259c9da096c6.zip | |
Simplify strlen to a subtraction for certain cases.
Patch by Li Huang (li1.huang@intel.com)
Differential Revision: http://reviews.llvm.org/D18230
llvm-svn: 266200
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/ValueTracking.cpp | 34 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 51 |
2 files changed, 72 insertions, 13 deletions
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 757c3b6fb0d..55aa24dab18 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2650,6 +2650,24 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, return Ptr; } +bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) { + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return false; + + // Make sure the index-ee is a pointer to array of i8. + ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); + if (!AT || !AT->getElementType()->isIntegerTy(8)) + return false; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); + if (!FirstIdx || !FirstIdx->isZero()) + return false; + + return true; +} /// This function computes the length of a null-terminated C string pointed to /// by V. If successful, it returns true and returns the string in Str. @@ -2664,19 +2682,9 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // If the value is a GEP instruction or constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - // Make sure the GEP has exactly three arguments. - if (GEP->getNumOperands() != 3) - return false; - - // Make sure the index-ee is a pointer to array of i8. - ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); - if (!AT || !AT->getElementType()->isIntegerTy(8)) - return false; - - // Check to make sure that the first operand of the GEP is an integer and - // has value 0 so that we are sure we're indexing into the initializer. - const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); - if (!FirstIdx || !FirstIdx->isZero()) + // The GEP operator should be based on a pointer to string constant, and is + // indexing into the string constant. + if (!isGEPBasedOnPointerToString(GEP)) return false; // If the second index isn't a ConstantInt, then this is a variable index diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 35aa8eebeb0..b0c8fc7f997 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -535,6 +535,57 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { if (uint64_t Len = GetStringLength(Src)) return ConstantInt::get(CI->getType(), Len - 1); + // If s is a constant pointer pointing to a string literal, we can fold + // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // [0, strlen(s)] or the string has a single null terminator '\0' at the end. + // We only try to simplify strlen when the pointer s points to an array + // of i8. Otherwise, we would need to scale the offset x before doing the + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is + // very uncommon. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) { + if (!isGEPBasedOnPointerToString(GEP)) + return nullptr; + + StringRef Str; + if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { + size_t NullTermIdx = Str.find('\0'); + + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == StringRef::npos) + return nullptr; + + Value *Offset = GEP->getOperand(2); + unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, + nullptr); + KnownZero.flipAllBits(); + size_t ArrSize = + cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); + + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent + // bits unknown in Offset. Therefore, Offset is known to be in range + // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // unsigned-less-than NullTermIdx. + // + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) + // is a pointer to an object whose memory extent is NullTermIdx+1. + if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && + NullTermIdx == ArrSize - 1)) + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + Offset); + } + + return nullptr; + } + // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { uint64_t LenTrue = GetStringLength(SI->getTrueValue()); |

