diff options
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 34 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll | 32 |
2 files changed, 56 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 61d9aced7bb..d286dffdf0e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -314,13 +314,13 @@ public: InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, unsigned VecWidth, - unsigned UnrollFactor) + const TargetTransformInfo *TTI, AssumptionCache *AC, + unsigned VecWidth, unsigned UnrollFactor) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), - VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), - Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), - TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr), - AddedSafetyChecks(false) {} + AC(AC), VF(VecWidth), UF(UnrollFactor), + Builder(PSE.getSE()->getContext()), Induction(nullptr), + OldInduction(nullptr), WidenMap(UnrollFactor), TripCount(nullptr), + VectorTripCount(nullptr), Legal(nullptr), AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). // MinimumBitWidths maps scalar integer values to the smallest bitwidth they @@ -524,6 +524,8 @@ protected: const TargetLibraryInfo *TLI; /// Target Transform Info. const TargetTransformInfo *TTI; + /// Assumption Cache. + AssumptionCache *AC; /// \brief LoopVersioning. It's only set up (non-null) if memchecks were /// used. @@ -591,8 +593,10 @@ public: InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, unsigned UnrollFactor) - : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {} + const TargetTransformInfo *TTI, AssumptionCache *AC, + unsigned UnrollFactor) + : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, 1, + UnrollFactor) {} private: void scalarizeInstruction(Instruction *Instr, @@ -1957,7 +1961,7 @@ struct LoopVectorize : public FunctionPass { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop then // interleave it. - InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC); + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC); Unroller.vectorize(&LVL, CM.MinBWs); emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), @@ -1965,7 +1969,7 @@ struct LoopVectorize : public FunctionPass { Twine(IC) + ")"); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC); LB.vectorize(&LVL, CM.MinBWs); ++LoopsVectorized; @@ -2728,6 +2732,11 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Place the cloned scalar in the new loop. Builder.Insert(Cloned); + // If we just cloned a new assumption, add it the assumption cache. + if (auto *II = dyn_cast<IntrinsicInst>(Cloned)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + // If the original scalar returns a value we need to place it in a vector // so that future users will be able to use it. if (!IsVoidRetTy) @@ -6096,6 +6105,11 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // Place the cloned scalar in the new loop. Builder.Insert(Cloned); + // If we just cloned a new assumption, add it the assumption cache. + if (auto *II = dyn_cast<IntrinsicInst>(Cloned)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + // If the original scalar returns a value we need to place it in a vector // so that future users will be able to use it. if (!IsVoidRetTy) diff --git a/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll b/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll new file mode 100644 index 00000000000..1add87db611 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -loop-vectorize -instcombine -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test1() { +entry: + %alloca = alloca float, align 4 + br label %loop_exit.dim.11.critedge + +loop_exit.dim.11.critedge: ; preds = %loop_body.dim.0 + %ptrint = ptrtoint float* %alloca to i64 + %maskedptr = and i64 %ptrint, 4 + %maskcond = icmp eq i64 %maskedptr, 0 + br label %loop_header.dim.017.preheader + +loop_header.dim.017.preheader: ; preds = %loop_exit.dim.016, %loop_exit.dim.11.critedge + br label %loop_body.dim.018 + +loop_body.dim.018: ; preds = %loop_body.dim.018, %loop_header.dim.017.preheader + %invar_address.dim.019.0135 = phi i64 [ 0, %loop_header.dim.017.preheader ], [ %0, %loop_body.dim.018 ] + call void @llvm.assume(i1 %maskcond) +; CHECK: call void @llvm.assume( +; CHECK-NOT: call void @llvm.assume( + %0 = add nuw nsw i64 %invar_address.dim.019.0135, 1 + %1 = icmp eq i64 %0, 256 + br i1 %1, label %loop_header.dim.017.preheader, label %loop_body.dim.018 +} + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #0 + +attributes #0 = { nounwind } |