Revert "[LoopVectorize][PowerPC] Estimate int and float register pressure separately in loop-vectorize"

Also Revert "[LoopVectorize] Fix non-debug builds after rL374017" This reverts commit 9f41deccc0e648a006c9f38e11919f181b6c7e0a. This reverts commit 18b6fe07bcf44294f200bd2b526cb737ed275c04. The patch is breaking PowerPC internal build, checked with author, reverting on behalf of him for now due to timezone. llvm-svn: 374091
author: Jinsong Ji <jji@us.ibm.com> 2019-10-08 17:32:56 +0000
committer: Jinsong Ji <jji@us.ibm.com> 2019-10-08 17:32:56 +0000
commit: 9912232b461ab76b08497021019084360b137060 (patch)
tree: f20f57231994f8c8ac2e4720b0270ed9457bcbf1 /llvm/lib/Transforms/Vectorize
parent: 796a58107ad5116d1da88871528ac8ec5195bd5e (diff)
download: bcm5719-llvm-9912232b461ab76b08497021019084360b137060.tar.gz
bcm5719-llvm-9912232b461ab76b08497021019084360b137060.zip
2 files changed, 48 insertions, 107 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 18cc61f7a2b..7e95038a5eb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -983,11 +983,10 @@ public:
   /// of a loop.
   struct RegisterUsage {
     /// Holds the number of loop invariant values that are used in the loop.
-    /// The key is ClassID of target-provided register class.
-    SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs;
+    unsigned LoopInvariantRegs;
+
     /// Holds the maximum number of concurrent live intervals in the loop.
-    /// The key is ClassID of target-provided register class.
-    SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
+    unsigned MaxLocalUsers;
   };
 
   /// \return Returns information about the register usages of the loop for the
@@ -4963,14 +4962,9 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
 
     // Select the largest VF which doesn't require more registers than existing
     // ones.
+    unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
     for (int i = RUs.size() - 1; i >= 0; --i) {
-      bool Selected = true;
-      for (auto& pair : RUs[i].MaxLocalUsers) {
-        unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
-        if (pair.second > TargetNumRegisters)
-          Selected = false;
-      }
-      if (Selected) {
+      if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
         MaxVF = VFs[i];
         break;
       }
@@ -5121,12 +5115,22 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
   if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
     return 1;
 
+  unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
+  LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+                    << " registers\n");
+
+  if (VF == 1) {
+    if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
+      TargetNumRegisters = ForceTargetNumScalarRegs;
+  } else {
+    if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
+      TargetNumRegisters = ForceTargetNumVectorRegs;
+  }
+
   RegisterUsage R = calculateRegisterUsage({VF})[0];
   // We divide by these constants so assume that we have at least one
   // instruction that uses at least one register.
-  for (auto& pair : R.MaxLocalUsers) {
-    pair.second = std::max(pair.second, 1U);
-  }
+  R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
 
   // We calculate the interleave count using the following formula.
   // Subtract the number of loop invariants from the number of available
@@ -5139,35 +5143,13 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
   // We also want power of two interleave counts to ensure that the induction
   // variable of the vector loop wraps to zero, when tail is folded by masking;
   // this currently happens when OptForSize, in which case IC is set to 1 above.
-  unsigned IC = UINT_MAX;
+  unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+                              R.MaxLocalUsers);
 
-  for (auto& pair : R.MaxLocalUsers) {
-    unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
-    LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
-                      << " registers of "
-                      << TTI.getRegisterClassName(pair.first) << " register class\n");
-    if (VF == 1) {
-      if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
-        TargetNumRegisters = ForceTargetNumScalarRegs;
-    } else {
-      if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
-        TargetNumRegisters = ForceTargetNumVectorRegs;
-    }
-    unsigned MaxLocalUsers = pair.second;
-    unsigned LoopInvariantRegs = 0;
-    if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
-      LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
-
-    unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
-    // Don't count the induction variable as interleaved.
-    if (EnableIndVarRegisterHeur) {
-      TmpIC =
-          PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
-                        std::max(1U, (MaxLocalUsers - 1)));
-    }
-
-    IC = std::min(IC, TmpIC);
-  }
+  // Don't count the induction variable as interleaved.
+  if (EnableIndVarRegisterHeur)
+    IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
+                       std::max(1U, (R.MaxLocalUsers - 1)));
 
   // Clamp the interleave ranges to reasonable counts.
   unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
@@ -5349,7 +5331,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
 
   SmallVector<RegisterUsage, 8> RUs(VFs.size());
-  SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
+  SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
 
   LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
 
@@ -5379,45 +5361,21 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
 
     // For each VF find the maximum usage of registers.
     for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
-      // Count the number of live intervals.
-      SmallMapVector<unsigned, unsigned, 4> RegUsage;
-
       if (VFs[j] == 1) {
-        for (auto Inst : OpenIntervals) {
-          unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
-          if (RegUsage.find(ClassID) == RegUsage.end())
-            RegUsage[ClassID] = 1;
-          else
-            RegUsage[ClassID] += 1;
-        }
-      } else {
-        collectUniformsAndScalars(VFs[j]);
-        for (auto Inst : OpenIntervals) {
-          // Skip ignored values for VF > 1.
-          if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end())
-            continue;
-          if (isScalarAfterVectorization(Inst, VFs[j])) {
-            unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
-            if (RegUsage.find(ClassID) == RegUsage.end())
-              RegUsage[ClassID] = 1;
-            else
-              RegUsage[ClassID] += 1;
-          } else {
-            unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType());
-            if (RegUsage.find(ClassID) == RegUsage.end())
-              RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]);
-            else
-              RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]);
-          }
-        }
+        MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
+        continue;
       }
-    
-      for (auto& pair : RegUsage) {
-        if (MaxUsages[j].find(pair.first) != MaxUsages[j].end())
-          MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second);
-        else
-          MaxUsages[j][pair.first] = pair.second;
+      collectUniformsAndScalars(VFs[j]);
+      // Count the number of live intervals.
+      unsigned RegUsage = 0;
+      for (auto Inst : OpenIntervals) {
+        // Skip ignored values for VF > 1.
+        if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
+            isScalarAfterVectorization(Inst, VFs[j]))
+          continue;
+        RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
       }
+      MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
     }
 
     LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
@@ -5428,34 +5386,18 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   }
 
   for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
-    SmallMapVector<unsigned, unsigned, 4> Invariant;
-  
-    for (auto Inst : LoopInvariants) {
-      unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
-      unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType());
-      if (Invariant.find(ClassID) == Invariant.end())
-        Invariant[ClassID] = Usage;
-      else
-        Invariant[ClassID] += Usage;
+    unsigned Invariant = 0;
+    if (VFs[i] == 1)
+      Invariant = LoopInvariants.size();
+    else {
+      for (auto Inst : LoopInvariants)
+        Invariant += GetRegUsage(Inst->getType(), VFs[i]);
     }
 
     LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
-    LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: "
-                      << MaxUsages[i].size() << " item\n");
-    for (const auto& Pair : MaxUsages[i]) {
-      (void)Pair;
-      LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
-                        << TTI.getRegisterClassName(Pair.first)
-                        << ", " << Pair.second << " registers \n");
-    }
-    LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: "
-                      << Invariant.size() << " item\n");
-    for (const auto& Pair : Invariant) {
-      (void)Pair;
-      LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
-                        << TTI.getRegisterClassName(Pair.first)
-                        << ", " << Pair.second << " registers \n");
-    }
+    LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
+    LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant
+                      << '\n');
 
     RU.LoopInvariantRegs = Invariant;
     RU.MaxLocalUsers = MaxUsages[i];
@@ -7820,8 +7762,7 @@ bool LoopVectorizePass::runImpl(
   // The second condition is necessary because, even if the target has no
   // vector registers, loop vectorization may still enable scalar
   // interleaving.
-  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
-      TTI->getMaxInterleaveFactor(1) < 2)
+  if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
     return false;
 
   bool Changed = false;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a22153bbed1..99428c6c5de 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5237,7 +5237,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 
   // If the target claims to have no vector registers don't attempt
   // vectorization.
-  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
+  if (!TTI->getNumberOfRegisters(true))
     return false;
 
   // Don't vectorize when the attribute NoImplicitFloat is used.
author	Jinsong Ji <jji@us.ibm.com>	2019-10-08 17:32:56 +0000
committer	Jinsong Ji <jji@us.ibm.com>	2019-10-08 17:32:56 +0000
commit	9912232b461ab76b08497021019084360b137060 (patch)
tree	f20f57231994f8c8ac2e4720b0270ed9457bcbf1 /llvm/lib/Transforms/Vectorize
parent	796a58107ad5116d1da88871528ac8ec5195bd5e (diff)
download	bcm5719-llvm-9912232b461ab76b08497021019084360b137060.tar.gz bcm5719-llvm-9912232b461ab76b08497021019084360b137060.zip