diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 323 |
1 files changed, 320 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c0786afe965..f322e4ed7aa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -13,6 +13,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -595,6 +596,318 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); } +/// Returns true if we can rewrite Start as a GEP with pointer Base +/// and some integer offset. The nodes that need to be re-written +/// for this transformation will be added to Explored. +static bool canRewriteGEPAsOffset(Value *Start, Value *Base, + const DataLayout &DL, + SetVector<Value *> &Explored) { + SmallVector<Value *, 16> WorkList(1, Start); + Explored.insert(Base); + + // The following traversal gives us an order which can be used + // when doing the final transformation. Since in the final + // transformation we create the PHI replacement instructions first, + // we don't have to get them in any particular order. + // + // However, for other instructions we will have to traverse the + // operands of an instruction first, which means that we have to + // do a post-order traversal. + while (!WorkList.empty()) { + SetVector<PHINode *> PHIs; + + while (!WorkList.empty()) { + if (Explored.size() >= 100) + return false; + + Value *V = WorkList.back(); + + if (Explored.count(V) != 0) { + WorkList.pop_back(); + continue; + } + + if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) && + !isa<GEPOperator>(V) && !isa<PHINode>(V)) + // We've found some value that we can't explore which is different from + // the base. Therefore we can't do this transformation. + return false; + + if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) { + auto *CI = dyn_cast<CastInst>(V); + if (!CI->isNoopCast(DL)) + return false; + + if (Explored.count(CI->getOperand(0)) == 0) + WorkList.push_back(CI->getOperand(0)); + } + + if (auto *GEP = dyn_cast<GEPOperator>(V)) { + // We're limiting the GEP to having one index. This will preserve + // the original pointer type. We could handle more cases in the + // future. + if (GEP->getNumIndices() != 1 || !GEP->isInBounds() || + GEP->getType() != Start->getType()) + return false; + + if (Explored.count(GEP->getOperand(0)) == 0) + WorkList.push_back(GEP->getOperand(0)); + } + + if (WorkList.back() == V) { + WorkList.pop_back(); + // We've finished visiting this node, mark it as such. + Explored.insert(V); + } + + if (auto *PN = dyn_cast<PHINode>(V)) { + Explored.insert(PN); + PHIs.insert(PN); + } + } + + // Explore the PHI nodes further. + for (auto *PN : PHIs) + for (Value *Op : PN->incoming_values()) + if (Explored.count(Op) == 0) + WorkList.push_back(Op); + } + + // Make sure that we can do this. Since we can't insert GEPs in a basic + // block before a PHI node, we can't easily do this transformation if + // we have PHI node users of transformed instructions. + for (Value *Val : Explored) { + for (Value *Use : Val->uses()) { + + auto *PHI = dyn_cast<PHINode>(Use); + auto *Inst = dyn_cast<Instruction>(Val); + + if (Inst == Base || Inst == PHI || !Inst || !PHI || + Explored.count(PHI) == 0) + continue; + + if (PHI->getParent() == Inst->getParent()) + return false; + } + } + return true; +} + +// Sets the appropriate insert point on Builder where we can add +// a replacement Instruction for V (if that is possible). +static void setInsertionPoint(IRBuilder<> &Builder, Value *V, + bool Before = true) { + if (auto *PHI = dyn_cast<PHINode>(V)) { + Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt()); + return; + } + if (auto *I = dyn_cast<Instruction>(V)) { + if (!Before) + I = &*std::next(I->getIterator()); + Builder.SetInsertPoint(I); + return; + } + if (auto *A = dyn_cast<Argument>(V)) { + // Set the insertion point in the entry block. + BasicBlock &Entry = A->getParent()->getEntryBlock(); + Builder.SetInsertPoint(&*Entry.getFirstInsertionPt()); + return; + } + // Otherwise, this is a constant and we don't need to set a new + // insertion point. + assert(isa<Constant>(V) && "Setting insertion point for unknown value!"); +} + +/// Returns a re-written value of Start as an indexed GEP using Base as a +/// pointer. +static Value *rewriteGEPAsOffset(Value *Start, Value *Base, + const DataLayout &DL, + SetVector<Value *> &Explored) { + // Perform all the substitutions. This is a bit tricky because we can + // have cycles in our use-def chains. + // 1. Create the PHI nodes without any incoming values. + // 2. Create all the other values. + // 3. Add the edges for the PHI nodes. + // 4. Emit GEPs to get the original pointers. + // 5. Remove the original instructions. + Type *IndexType = IntegerType::get( + Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType())); + + DenseMap<Value *, Value *> NewInsts; + NewInsts[Base] = ConstantInt::getNullValue(IndexType); + + // Create the new PHI nodes, without adding any incoming values. + for (Value *Val : Explored) { + if (Val == Base) + continue; + // Create empty phi nodes. This avoids cyclic dependencies when creating + // the remaining instructions. + if (auto *PHI = dyn_cast<PHINode>(Val)) + NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(), + PHI->getName() + ".idx", PHI); + } + IRBuilder<> Builder(Base->getContext()); + + // Create all the other instructions. + for (Value *Val : Explored) { + + if (NewInsts.find(Val) != NewInsts.end()) + continue; + + if (auto *CI = dyn_cast<CastInst>(Val)) { + NewInsts[CI] = NewInsts[CI->getOperand(0)]; + continue; + } + if (auto *GEP = dyn_cast<GEPOperator>(Val)) { + Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)] + : GEP->getOperand(1); + setInsertionPoint(Builder, GEP); + // Indices might need to be sign extended. GEPs will magically do + // this, but we need to do it ourselves here. + if (Index->getType()->getScalarSizeInBits() != + NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) { + Index = Builder.CreateSExtOrTrunc( + Index, NewInsts[GEP->getOperand(0)]->getType(), + GEP->getOperand(0)->getName() + ".sext"); + } + + auto *Op = NewInsts[GEP->getOperand(0)]; + if (isa<ConstantInt>(Op) && dyn_cast<ConstantInt>(Op)->isZero()) + NewInsts[GEP] = Index; + else + NewInsts[GEP] = Builder.CreateNSWAdd( + Op, Index, GEP->getOperand(0)->getName() + ".add"); + continue; + } + if (isa<PHINode>(Val)) + continue; + + llvm_unreachable("Unexpected instruction type"); + } + + // Add the incoming values to the PHI nodes. + for (Value *Val : Explored) { + if (Val == Base) + continue; + // All the instructions have been created, we can now add edges to the + // phi nodes. + if (auto *PHI = dyn_cast<PHINode>(Val)) { + PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]); + for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) { + Value *NewIncoming = PHI->getIncomingValue(I); + + if (NewInsts.find(NewIncoming) != NewInsts.end()) + NewIncoming = NewInsts[NewIncoming]; + + NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I)); + } + } + } + + for (Value *Val : Explored) { + if (Val == Base) + continue; + + // Depending on the type, for external users we have to emit + // a GEP or a GEP + ptrtoint. + setInsertionPoint(Builder, Val, false); + + // If required, create an inttoptr instruction for Base. + Value *NewBase = Base; + if (!Base->getType()->isPointerTy()) + NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(), + Start->getName() + "to.ptr"); + + Value *GEP = Builder.CreateInBoundsGEP( + Start->getType()->getPointerElementType(), NewBase, + makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr"); + + if (!Val->getType()->isPointerTy()) { + Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(), + Val->getName() + ".conv"); + GEP = Cast; + } + Val->replaceAllUsesWith(GEP); + } + + return NewInsts[Start]; +} + +/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express +/// the input Value as a constant indexed GEP. Returns a pair containing +/// the GEPs Pointer and Index. +static std::pair<Value *, Value *> +getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { + Type *IndexType = IntegerType::get(V->getContext(), + DL.getPointerTypeSizeInBits(V->getType())); + + Constant *Index = ConstantInt::getNullValue(IndexType); + while (true) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + // We accept only inbouds GEPs here to exclude the possibility of + // overflow. + if (!GEP->isInBounds()) + break; + if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 && + GEP->getType() == V->getType()) { + V = GEP->getOperand(0); + Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1)); + Index = ConstantExpr::getAdd( + Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType)); + continue; + } + break; + } + if (auto *CI = dyn_cast<IntToPtrInst>(V)) { + if (!CI->isNoopCast(DL)) + break; + V = CI->getOperand(0); + continue; + } + if (auto *CI = dyn_cast<PtrToIntInst>(V)) { + if (!CI->isNoopCast(DL)) + break; + V = CI->getOperand(0); + continue; + } + break; + } + return {V, Index}; +} + +// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant. +// We can look through PHIs, GEPs and casts in order to determine a +// common base between GEPLHS and RHS. +static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + const DataLayout &DL) { + if (!GEPLHS->hasAllConstantIndices()) + return nullptr; + + Value *PtrBase, *Index; + std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL); + + // The set of nodes that will take part in this transformation. + SetVector<Value *> Nodes; + + if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes)) + return nullptr; + + // We know we can re-write this as + // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) + // Since we've only looked through inbouds GEPs we know that we + // can't have overflow on either side. We can therefore re-write + // this as: + // OFFSET1 cmp OFFSET2 + Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes); + + // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written + // GEP having PtrBase as the pointer base, and has returned in NewRHS the + // offset. Since Index is the offset of LHS to the base pointer, we will now + // compare the offsets instead of comparing the pointers. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS); +} + /// FoldGEPICmp - Fold comparisons between a GEP instruction and something /// else. At this point we know that the GEP is on the LHS of the comparison. Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, @@ -674,8 +987,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // Otherwise, the base pointers are different and the indices are - // different, bail out. - return nullptr; + // different. Try convert this to an indexed compare by looking through + // PHIs/casts. + return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } // If one of the GEPs has all zero indices, recurse. @@ -727,7 +1041,10 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); } } - return nullptr; + + // Try convert this to an indexed compare by looking through PHIs/casts as a + // last resort. + return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, |