diff options
author | Daniel Jasper <djasper@google.com> | 2016-12-19 08:22:17 +0000 |
---|---|---|
committer | Daniel Jasper <djasper@google.com> | 2016-12-19 08:22:17 +0000 |
commit | aec2fa352f533d230ab50b6c3002a1a664c9d6c2 (patch) | |
tree | 21fa530583cde5282092391e6891d959208f28d9 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | |
parent | e5f3eba9c31f4d00c73f4714df52ffced4532927 (diff) | |
download | bcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.tar.gz bcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.zip |
Revert @llvm.assume with operator bundles (r289755-r289757)
This creates non-linear behavior in the inliner (see more details in
r289755's commit thread).
llvm-svn: 290086
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 49 |
1 files changed, 35 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index afcb50b2795..70269ed61f5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -369,12 +369,12 @@ public: InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, + const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned VecWidth, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), - ORE(ORE), VF(VecWidth), UF(UnrollFactor), + AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Induction(nullptr), OldInduction(nullptr), VectorLoopValueMap(UnrollFactor, VecWidth), TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM), @@ -706,6 +706,8 @@ protected: const TargetLibraryInfo *TLI; /// Target Transform Info. const TargetTransformInfo *TTI; + /// Assumption Cache. + AssumptionCache *AC; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; @@ -788,11 +790,11 @@ public: InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, + const TargetTransformInfo *TTI, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM) - : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, ORE, 1, + : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1, UnrollFactor, LVL, CM) {} private: @@ -1848,10 +1850,11 @@ public: LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, + AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, const LoopVectorizeHints *Hints) : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - ORE(ORE), TheFunction(F), Hints(Hints) {} + AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {} /// Information about vectorization costs struct VectorizationFactor { @@ -2004,6 +2007,8 @@ public: const TargetLibraryInfo *TLI; /// Demanded bits analysis. DemandedBits *DB; + /// Assumption cache. + AssumptionCache *AC; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; @@ -2117,6 +2122,7 @@ struct LoopVectorize : public FunctionPass { auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits(); auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); @@ -2124,11 +2130,12 @@ struct LoopVectorize : public FunctionPass { std::function<const LoopAccessInfo &(Loop &)> GetLAA = [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; - return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, GetLAA, - *ORE); + return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, + GetLAA, *ORE); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addRequired<BlockFrequencyInfoWrapperPass>(); @@ -3056,6 +3063,11 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // Add the cloned scalar to the scalar map entry. Entry[Part][Lane] = Cloned; + // If we just cloned a new assumption, add it the assumption cache. + if (auto *II = dyn_cast<IntrinsicInst>(Cloned)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + // End if-block. if (IfPredicateInstr) PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp)); @@ -7168,6 +7180,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) @@ -7196,7 +7209,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { void LoopVectorizationCostModel::collectValuesToIgnore() { // Ignore ephemeral values. - CodeMetrics::collectEphemeralValues(TheLoop, ValuesToIgnore); + CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); // Ignore type-promoting instructions we identified during reduction // detection. @@ -7270,6 +7283,11 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // Add the cloned scalar to the scalar map entry. Entry[Part][0] = Cloned; + // If we just cloned a new assumption, add it the assumption cache. + if (auto *II = dyn_cast<IntrinsicInst>(Cloned)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + // End if-block. if (IfPredicateInstr) PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp)); @@ -7409,7 +7427,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { } // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, ORE, F, + LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, &Hints); CM.collectValuesToIgnore(); @@ -7545,7 +7563,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop, then // interleave it. - InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, ORE, IC, &LVL, &CM); + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, + &CM); Unroller.vectorize(); ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), @@ -7554,8 +7573,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { << NV("InterleaveCount", IC) << ")"); } else { // If we decided that it is *legal* to vectorize the loop, then do it. - InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, ORE, VF.Width, IC, &LVL, - &CM); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, + &LVL, &CM); LB.vectorize(); ++LoopsVectorized; @@ -7583,7 +7602,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool LoopVectorizePass::runImpl( Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, - DemandedBits &DB_, AliasAnalysis &AA_, + DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, std::function<const LoopAccessInfo &(Loop &)> &GetLAA_, OptimizationRemarkEmitter &ORE_) { @@ -7594,6 +7613,7 @@ bool LoopVectorizePass::runImpl( BFI = &BFI_; TLI = TLI_; AA = &AA_; + AC = &AC_; GetLAA = &GetLAA_; DB = &DB_; ORE = &ORE_; @@ -7643,6 +7663,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F); auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &DB = AM.getResult<DemandedBitsAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); @@ -7652,7 +7673,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, return LAM.getResult<LoopAccessAnalysis>(L); }; bool Changed = - runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, GetLAA, ORE); + runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA, ORE); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; |