diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 93 |
2 files changed, 89 insertions, 12 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 6ba054c1e4d..27f5b12b4ca 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -866,7 +866,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, /// \brief Check whether the access through \p Ptr has a constant stride. int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, - bool Assume, bool ShouldCheckWrap) { + bool Assume) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -905,9 +905,9 @@ int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = !ShouldCheckWrap || - PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || - isNoWrapAddRec(Ptr, AR, PSE, Lp); + bool IsNoWrapAddRec = + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || + isNoWrapAddRec(Ptr, AR, PSE, Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { if (Assume) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 785ee68b555..fb0243542fc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2242,13 +2242,87 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { + assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); + auto *SE = PSE.getSE(); + // Make sure that the pointer does not point to structs. + if (Ptr->getType()->getPointerElementType()->isAggregateType()) + return 0; + + // If this value is a pointer induction variable, we know it is consecutive. + PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr); + if (Phi && Inductions.count(Phi)) { + InductionDescriptor II = Inductions[Phi]; + return II.getConsecutiveDirection(); + } + + GetElementPtrInst *Gep = getGEPInstruction(Ptr); + if (!Gep) + return 0; + + unsigned NumOperands = Gep->getNumOperands(); + Value *GpPtr = Gep->getPointerOperand(); + // If this GEP value is a consecutive pointer induction variable and all of + // the indices are constant, then we know it is consecutive. + Phi = dyn_cast<PHINode>(GpPtr); + if (Phi && Inductions.count(Phi)) { + + // Make sure that the pointer does not point to structs. + PointerType *GepPtrType = cast<PointerType>(GpPtr->getType()); + if (GepPtrType->getElementType()->isAggregateType()) + return 0; + + // Make sure that all of the index operands are loop invariant. + for (unsigned i = 1; i < NumOperands; ++i) + if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) + return 0; + + InductionDescriptor II = Inductions[Phi]; + return II.getConsecutiveDirection(); + } + + unsigned InductionOperand = getGEPInductionOperand(Gep); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0; i != NumOperands; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) + return 0; - const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : - ValueToValueMap(); + // We can emit wide load/stores only if the last non-zero index is the + // induction variable. + const SCEV *Last = nullptr; + if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep)) + Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); + else { + // Because of the multiplication by a stride we can have a s/zext cast. + // We are going to replace this stride by 1 so the cast is safe to ignore. + // + // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + // %0 = trunc i64 %indvars.iv to i32 + // %mul = mul i32 %0, %Stride1 + // %idxprom = zext i32 %mul to i64 << Safe cast. + // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom + // + Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(), + Gep->getOperand(InductionOperand), Gep); + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last)) + Last = + (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) + ? C->getOperand() + : Last; + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) { + const SCEV *Step = AR->getStepRecurrence(*SE); + + // The memory is consecutive because the last index is consecutive + // and all other indices are loop invariant. + if (Step->isOne()) + return 1; + if (Step->isAllOnesValue()) + return -1; + } - int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); - if (Stride == 1 || Stride == -1) - return Stride; return 0; } @@ -2584,9 +2658,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // Handle consecutive loads/stores. GetElementPtrInst *Gep = getGEPInstruction(Ptr); if (ConsecutiveStride) { - if (Gep && - !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), - OrigLoop)) { + if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { setDebugLocFromInst(Builder, Gep); Value *PtrOperand = Gep->getPointerOperand(); Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; @@ -2599,6 +2671,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Ptr = Builder.Insert(Gep2); } else if (Gep) { setDebugLocFromInst(Builder, Gep); + assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), + OrigLoop) && + "Base ptr must be invariant"); // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); @@ -2627,6 +2702,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { } Ptr = Builder.Insert(Gep2); } else { // No GEP + // Use the induction element ptr. + assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); setDebugLocFromInst(Builder, Ptr); VectorParts &PtrVal = getVectorValue(Ptr); Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); |