diff options
-rw-r--r-- | llvm/include/llvm/Analysis/LoopAccessAnalysis.h | 4 | ||||
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 93 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/consec_no_gep.ll | 43 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/ptr-induction.ll | 1 |
5 files changed, 90 insertions, 59 deletions
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index d7ad818b731..3a6c88af78b 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -679,11 +679,9 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, /// to \p PtrToStride and therefore add further predicates to \p PSE. /// The \p Assume parameter indicates if we are allowed to make additional /// run-time assumptions. -/// The \p ShouldCheckWrap indicates that we should ensure that address -/// calculation does not wrap. int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap = ValueToValueMap(), - bool Assume = false, bool ShouldCheckWrap = true); + bool Assume = false); /// \brief Returns true if the memory operations \p A and \p B are consecutive. /// This is a simple API that does not depend on the analysis pass. diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 6ba054c1e4d..27f5b12b4ca 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -866,7 +866,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, /// \brief Check whether the access through \p Ptr has a constant stride. int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap, - bool Assume, bool ShouldCheckWrap) { + bool Assume) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -905,9 +905,9 @@ int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = !ShouldCheckWrap || - PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || - isNoWrapAddRec(Ptr, AR, PSE, Lp); + bool IsNoWrapAddRec = + PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || + isNoWrapAddRec(Ptr, AR, PSE, Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { if (Assume) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 785ee68b555..fb0243542fc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2242,13 +2242,87 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { + assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); + auto *SE = PSE.getSE(); + // Make sure that the pointer does not point to structs. + if (Ptr->getType()->getPointerElementType()->isAggregateType()) + return 0; + + // If this value is a pointer induction variable, we know it is consecutive. + PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr); + if (Phi && Inductions.count(Phi)) { + InductionDescriptor II = Inductions[Phi]; + return II.getConsecutiveDirection(); + } + + GetElementPtrInst *Gep = getGEPInstruction(Ptr); + if (!Gep) + return 0; + + unsigned NumOperands = Gep->getNumOperands(); + Value *GpPtr = Gep->getPointerOperand(); + // If this GEP value is a consecutive pointer induction variable and all of + // the indices are constant, then we know it is consecutive. + Phi = dyn_cast<PHINode>(GpPtr); + if (Phi && Inductions.count(Phi)) { + + // Make sure that the pointer does not point to structs. + PointerType *GepPtrType = cast<PointerType>(GpPtr->getType()); + if (GepPtrType->getElementType()->isAggregateType()) + return 0; + + // Make sure that all of the index operands are loop invariant. + for (unsigned i = 1; i < NumOperands; ++i) + if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) + return 0; + + InductionDescriptor II = Inductions[Phi]; + return II.getConsecutiveDirection(); + } + + unsigned InductionOperand = getGEPInductionOperand(Gep); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0; i != NumOperands; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) + return 0; - const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : - ValueToValueMap(); + // We can emit wide load/stores only if the last non-zero index is the + // induction variable. + const SCEV *Last = nullptr; + if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep)) + Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); + else { + // Because of the multiplication by a stride we can have a s/zext cast. + // We are going to replace this stride by 1 so the cast is safe to ignore. + // + // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + // %0 = trunc i64 %indvars.iv to i32 + // %mul = mul i32 %0, %Stride1 + // %idxprom = zext i32 %mul to i64 << Safe cast. + // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom + // + Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(), + Gep->getOperand(InductionOperand), Gep); + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last)) + Last = + (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend) + ? C->getOperand() + : Last; + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) { + const SCEV *Step = AR->getStepRecurrence(*SE); + + // The memory is consecutive because the last index is consecutive + // and all other indices are loop invariant. + if (Step->isOne()) + return 1; + if (Step->isAllOnesValue()) + return -1; + } - int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); - if (Stride == 1 || Stride == -1) - return Stride; return 0; } @@ -2584,9 +2658,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // Handle consecutive loads/stores. GetElementPtrInst *Gep = getGEPInstruction(Ptr); if (ConsecutiveStride) { - if (Gep && - !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), - OrigLoop)) { + if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { setDebugLocFromInst(Builder, Gep); Value *PtrOperand = Gep->getPointerOperand(); Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; @@ -2599,6 +2671,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Ptr = Builder.Insert(Gep2); } else if (Gep) { setDebugLocFromInst(Builder, Gep); + assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), + OrigLoop) && + "Base ptr must be invariant"); // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); @@ -2627,6 +2702,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { } Ptr = Builder.Insert(Gep2); } else { // No GEP + // Use the induction element ptr. + assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); setDebugLocFromInst(Builder, Ptr); VectorParts &PtrVal = getVectorValue(Ptr); Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); diff --git a/llvm/test/Transforms/LoopVectorize/consec_no_gep.ll b/llvm/test/Transforms/LoopVectorize/consec_no_gep.ll deleted file mode 100644 index 4e906bb2659..00000000000 --- a/llvm/test/Transforms/LoopVectorize/consec_no_gep.ll +++ /dev/null @@ -1,43 +0,0 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -;; Check consecutive memory access without preceding GEP instruction - -; for (int i=0; i<len; i++) { -; *to++ = *from++; -; } - -; CHECK-LABEL: @consecutive_no_gep( -; CHECK: vector.body -; CHECK: %[[index:.*]] = phi i64 [ 0, %vector.ph ] -; CHECK: getelementptr float, float* %{{.*}}, i64 %[[index]] -; CHECK: load <4 x float> - -define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 { -entry: - %cmp2 = icmp sgt i32 %len, 0 - br i1 %cmp2, label %for.body.preheader, label %for.end - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ] - %to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ] - %incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1 - %val = load float, float* %from.addr.04, align 4 - %incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1 - store float %val, float* %to.addr.03, align 4 - %inc = add nsw i32 %i.05, 1 - %cmp = icmp slt i32 %inc, %len - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll index e0e1139e0eb..47d33352763 100644 --- a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll @@ -18,7 +18,6 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ] %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] - %a1.pn = getelementptr inbounds i32, i32* %a.pn, i64 0 %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1 %0 = load i32, i32* %incdec.ptr8, align 1 %add = add nuw nsw i32 %0, %acc.07 |