Revert @llvm.assume with operator bundles (r289755-r289757)

This creates non-linear behavior in the inliner (see more details in r289755's commit thread). llvm-svn: 290086
author: Daniel Jasper <djasper@google.com> 2016-12-19 08:22:17 +0000
committer: Daniel Jasper <djasper@google.com> 2016-12-19 08:22:17 +0000
commit: aec2fa352f533d230ab50b6c3002a1a664c9d6c2 (patch)
tree: 21fa530583cde5282092391e6891d959208f28d9 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent: e5f3eba9c31f4d00c73f4714df52ffced4532927 (diff)
download: bcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.tar.gz
bcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.zip
1 files changed, 35 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index afcb50b2795..70269ed61f5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -369,12 +369,12 @@ public:
   InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
                       LoopInfo *LI, DominatorTree *DT,
                       const TargetLibraryInfo *TLI,
-                      const TargetTransformInfo *TTI,
+                      const TargetTransformInfo *TTI, AssumptionCache *AC,
                       OptimizationRemarkEmitter *ORE, unsigned VecWidth,
                       unsigned UnrollFactor, LoopVectorizationLegality *LVL,
                       LoopVectorizationCostModel *CM)
       : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
-        ORE(ORE), VF(VecWidth), UF(UnrollFactor),
+        AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
         Builder(PSE.getSE()->getContext()), Induction(nullptr),
         OldInduction(nullptr), VectorLoopValueMap(UnrollFactor, VecWidth),
         TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM),
@@ -706,6 +706,8 @@ protected:
   const TargetLibraryInfo *TLI;
   /// Target Transform Info.
   const TargetTransformInfo *TTI;
+  /// Assumption Cache.
+  AssumptionCache *AC;
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
@@ -788,11 +790,11 @@ public:
   InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
                     LoopInfo *LI, DominatorTree *DT,
                     const TargetLibraryInfo *TLI,
-                    const TargetTransformInfo *TTI,
+                    const TargetTransformInfo *TTI, AssumptionCache *AC,
                     OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
                     LoopVectorizationLegality *LVL,
                     LoopVectorizationCostModel *CM)
-      : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, ORE, 1,
+      : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
                             UnrollFactor, LVL, CM) {}
 
 private:
@@ -1848,10 +1850,11 @@ public:
                              LoopInfo *LI, LoopVectorizationLegality *Legal,
                              const TargetTransformInfo &TTI,
                              const TargetLibraryInfo *TLI, DemandedBits *DB,
+                             AssumptionCache *AC,
                              OptimizationRemarkEmitter *ORE, const Function *F,
                              const LoopVectorizeHints *Hints)
       : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
-        ORE(ORE), TheFunction(F), Hints(Hints) {}
+        AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {}
 
   /// Information about vectorization costs
   struct VectorizationFactor {
@@ -2004,6 +2007,8 @@ public:
   const TargetLibraryInfo *TLI;
   /// Demanded bits analysis.
   DemandedBits *DB;
+  /// Assumption cache.
+  AssumptionCache *AC;
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
@@ -2117,6 +2122,7 @@ struct LoopVectorize : public FunctionPass {
     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
     auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+    auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
     auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
     auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
@@ -2124,11 +2130,12 @@ struct LoopVectorize : public FunctionPass {
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
 
-    return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, GetLAA,
-                        *ORE);
+    return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
+                        GetLAA, *ORE);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
     AU.addRequiredID(LoopSimplifyID);
     AU.addRequiredID(LCSSAID);
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
@@ -3056,6 +3063,11 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
       // Add the cloned scalar to the scalar map entry.
       Entry[Part][Lane] = Cloned;
 
+      // If we just cloned a new assumption, add it the assumption cache.
+      if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+        if (II->getIntrinsicID() == Intrinsic::assume)
+          AC->registerAssumption(II);
+
       // End if-block.
       if (IfPredicateInstr)
         PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
@@ -7168,6 +7180,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
@@ -7196,7 +7209,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
 
 void LoopVectorizationCostModel::collectValuesToIgnore() {
   // Ignore ephemeral values.
-  CodeMetrics::collectEphemeralValues(TheLoop, ValuesToIgnore);
+  CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
 
   // Ignore type-promoting instructions we identified during reduction
   // detection.
@@ -7270,6 +7283,11 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
     // Add the cloned scalar to the scalar map entry.
     Entry[Part][0] = Cloned;
 
+    // If we just cloned a new assumption, add it the assumption cache.
+    if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+      if (II->getIntrinsicID() == Intrinsic::assume)
+        AC->registerAssumption(II);
+
     // End if-block.
     if (IfPredicateInstr)
       PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
@@ -7409,7 +7427,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   }
 
   // Use the cost model.
-  LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, ORE, F,
+  LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
                                 &Hints);
   CM.collectValuesToIgnore();
 
@@ -7545,7 +7563,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
     // If we decided that it is not legal to vectorize the loop, then
     // interleave it.
-    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, ORE, IC, &LVL, &CM);
+    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
+                               &CM);
     Unroller.vectorize();
 
     ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -7554,8 +7573,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
               << NV("InterleaveCount", IC) << ")");
   } else {
     // If we decided that it is *legal* to vectorize the loop, then do it.
-    InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, ORE, VF.Width, IC, &LVL,
-                           &CM);
+    InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
+                           &LVL, &CM);
     LB.vectorize();
     ++LoopsVectorized;
 
@@ -7583,7 +7602,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 bool LoopVectorizePass::runImpl(
     Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
     DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
-    DemandedBits &DB_, AliasAnalysis &AA_,
+    DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
     std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
     OptimizationRemarkEmitter &ORE_) {
 
@@ -7594,6 +7613,7 @@ bool LoopVectorizePass::runImpl(
   BFI = &BFI_;
   TLI = TLI_;
   AA = &AA_;
+  AC = &AC_;
   GetLAA = &GetLAA_;
   DB = &DB_;
   ORE = &ORE_;
@@ -7643,6 +7663,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
     auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F);
     auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
     auto &AA = AM.getResult<AAManager>(F);
+    auto &AC = AM.getResult<AssumptionAnalysis>(F);
     auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
     auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
 
@@ -7652,7 +7673,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
       return LAM.getResult<LoopAccessAnalysis>(L);
     };
     bool Changed =
-        runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, GetLAA, ORE);
+        runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA, ORE);
     if (!Changed)
       return PreservedAnalyses::all();
     PreservedAnalyses PA;
author	Daniel Jasper <djasper@google.com>	2016-12-19 08:22:17 +0000
committer	Daniel Jasper <djasper@google.com>	2016-12-19 08:22:17 +0000
commit	aec2fa352f533d230ab50b6c3002a1a664c9d6c2 (patch)
tree	21fa530583cde5282092391e6891d959208f28d9 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent	e5f3eba9c31f4d00c73f4714df52ffced4532927 (diff)
download	bcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.tar.gz bcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.zip