2 files changed, 48 insertions, 107 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 18cc61f7a2b..7e95038a5eb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -983,11 +983,10 @@ public:
   /// of a loop.
   struct RegisterUsage {
     /// Holds the number of loop invariant values that are used in the loop.
-    /// The key is ClassID of target-provided register class.
-    SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs;
+    unsigned LoopInvariantRegs;
+
     /// Holds the maximum number of concurrent live intervals in the loop.
-    /// The key is ClassID of target-provided register class.
-    SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
+    unsigned MaxLocalUsers;
   };
 
   /// \return Returns information about the register usages of the loop for the
@@ -4963,14 +4962,9 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
 
     // Select the largest VF which doesn't require more registers than existing
     // ones.
+    unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
     for (int i = RUs.size() - 1; i >= 0; --i) {
-      bool Selected = true;
-      for (auto& pair : RUs[i].MaxLocalUsers) {
-        unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
-        if (pair.second > TargetNumRegisters)
-          Selected = false;
-      }
-      if (Selected) {
+      if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
         MaxVF = VFs[i];
         break;
       }
@@ -5121,12 +5115,22 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
   if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
     return 1;
 
+  unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
+  LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+                    << " registers\n");
+
+  if (VF == 1) {
+    if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
+      TargetNumRegisters = ForceTargetNumScalarRegs;
+  } else {
+    if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
+      TargetNumRegisters = ForceTargetNumVectorRegs;
+  }
+
   RegisterUsage R = calculateRegisterUsage({VF})[0];
   // We divide by these constants so assume that we have at least one
   // instruction that uses at least one register.
-  for (auto& pair : R.MaxLocalUsers) {
-    pair.second = std::max(pair.second, 1U);
-  }
+  R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
 
   // We calculate the interleave count using the following formula.
   // Subtract the number of loop invariants from the number of available
@@ -5139,35 +5143,13 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
   // We also want power of two interleave counts to ensure that the induction
   // variable of the vector loop wraps to zero, when tail is folded by masking;
   // this currently happens when OptForSize, in which case IC is set to 1 above.
-  unsigned IC = UINT_MAX;
+  unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+                              R.MaxLocalUsers);
 
-  for (auto& pair : R.MaxLocalUsers) {
-    unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
-    LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
-                      << " registers of "
-                      << TTI.getRegisterClassName(pair.first) << " register class\n");
-    if (VF == 1) {
-      if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
-        TargetNumRegisters = ForceTargetNumScalarRegs;
-    } else {
-      if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
-        TargetNumRegisters = ForceTargetNumVectorRegs;
-    }
-    unsigned MaxLocalUsers = pair.second;
-    unsigned LoopInvariantRegs = 0;
-    if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
-      LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
-
-    unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
-    // Don't count the induction variable as interleaved.
-    if (EnableIndVarRegisterHeur) {
-      TmpIC =
-          PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
-                        std::max(1U, (MaxLocalUsers - 1)));
-    }
-
-    IC = std::min(IC, TmpIC);
-  }
+  // Don't count the induction variable as interleaved.
+  if (EnableIndVarRegisterHeur)
+    IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
+                       std::max(1U, (R.MaxLocalUsers - 1)));
 
   // Clamp the interleave ranges to reasonable counts.
   unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
@@ -5349,7 +5331,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
 
   SmallVector<RegisterUsage, 8> RUs(VFs.size());
-  SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
+  SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
 
   LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
 
@@ -5379,45 +5361,21 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
 
     // For each VF find the maximum usage of registers.
     for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
-      // Count the number of live intervals.
-      SmallMapVector<unsigned, unsigned, 4> RegUsage;
-
       if (VFs[j] == 1) {
-        for (auto Inst : OpenIntervals) {
-          unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
-          if (RegUsage.find(ClassID) == RegUsage.end())
-            RegUsage[ClassID] = 1;
-          else
-            RegUsage[ClassID] += 1;
-        }
-      } else {
-        collectUniformsAndScalars(VFs[j]);
-        for (auto Inst : OpenIntervals) {
-          // Skip ignored values for VF > 1.
-          if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end())
-            continue;
-          if (isScalarAfterVectorization(Inst, VFs[j])) {
-            unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
-            if (RegUsage.find(ClassID) == RegUsage.end())
-              RegUsage[ClassID] = 1;
-            else
-              RegUsage[ClassID] += 1;
-          } else {
-            unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType());
-            if (RegUsage.find(ClassID) == RegUsage.end())
-              RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]);
-            else
-              RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]);
-          }
-        }
+        MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
+        continue;
       }
-    
-      for (auto& pair : RegUsage) {
-        if (MaxUsages[j].find(pair.first) != MaxUsages[j].end())
-          MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second);
-        else
-          MaxUsages[j][pair.first] = pair.second;
+      collectUniformsAndScalars(VFs[j]);
+      // Count the number of live intervals.
+      unsigned RegUsage = 0;
+      for (auto Inst : OpenIntervals) {
+        // Skip ignored values for VF > 1.
+        if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
+            isScalarAfterVectorization(Inst, VFs[j]))
+          continue;
+        RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
       }
+      MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
     }
 
     LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
@@ -5428,34 +5386,18 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   }
 
   for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
-    SmallMapVector<unsigned, unsigned, 4> Invariant;
-  
-    for (auto Inst : LoopInvariants) {
-      unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
-      unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType());
-      if (Invariant.find(ClassID) == Invariant.end())
-        Invariant[ClassID] = Usage;
-      else
-        Invariant[ClassID] += Usage;
+    unsigned Invariant = 0;
+    if (VFs[i] == 1)
+      Invariant = LoopInvariants.size();
+    else {
+      for (auto Inst : LoopInvariants)
+        Invariant += GetRegUsage(Inst->getType(), VFs[i]);
     }
 
     LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
-    LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: "
-                      << MaxUsages[i].size() << " item\n");
-    for (const auto& Pair : MaxUsages[i]) {
-      (void)Pair;
-      LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
-                        << TTI.getRegisterClassName(Pair.first)
-                        << ", " << Pair.second << " registers \n");
-    }
-    LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: "
-                      << Invariant.size() << " item\n");
-    for (const auto& Pair : Invariant) {
-      (void)Pair;
-      LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
-                        << TTI.getRegisterClassName(Pair.first)
-                        << ", " << Pair.second << " registers \n");
-    }
+    LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
+    LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant
+                      << '\n');
 
     RU.LoopInvariantRegs = Invariant;
     RU.MaxLocalUsers = MaxUsages[i];
@@ -7820,8 +7762,7 @@ bool LoopVectorizePass::runImpl(
   // The second condition is necessary because, even if the target has no
   // vector registers, loop vectorization may still enable scalar
   // interleaving.
-  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
-      TTI->getMaxInterleaveFactor(1) < 2)
+  if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
     return false;
 
   bool Changed = false;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a22153bbed1..99428c6c5de 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5237,7 +5237,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 
   // If the target claims to have no vector registers don't attempt
   // vectorization.
-  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
+  if (!TTI->getNumberOfRegisters(true))
     return false;
 
   // Don't vectorize when the attribute NoImplicitFloat is used.