summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp153
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2
2 files changed, 48 insertions, 107 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 18cc61f7a2b..7e95038a5eb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -983,11 +983,10 @@ public:
/// of a loop.
struct RegisterUsage {
/// Holds the number of loop invariant values that are used in the loop.
- /// The key is ClassID of target-provided register class.
- SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs;
+ unsigned LoopInvariantRegs;
+
/// Holds the maximum number of concurrent live intervals in the loop.
- /// The key is ClassID of target-provided register class.
- SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
+ unsigned MaxLocalUsers;
};
/// \return Returns information about the register usages of the loop for the
@@ -4963,14 +4962,9 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
// Select the largest VF which doesn't require more registers than existing
// ones.
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
for (int i = RUs.size() - 1; i >= 0; --i) {
- bool Selected = true;
- for (auto& pair : RUs[i].MaxLocalUsers) {
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
- if (pair.second > TargetNumRegisters)
- Selected = false;
- }
- if (Selected) {
+ if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
MaxVF = VFs[i];
break;
}
@@ -5121,12 +5115,22 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
+ LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+ << " registers\n");
+
+ if (VF == 1) {
+ if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumScalarRegs;
+ } else {
+ if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumVectorRegs;
+ }
+
RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
- for (auto& pair : R.MaxLocalUsers) {
- pair.second = std::max(pair.second, 1U);
- }
+ R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
// We calculate the interleave count using the following formula.
// Subtract the number of loop invariants from the number of available
@@ -5139,35 +5143,13 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
// We also want power of two interleave counts to ensure that the induction
// variable of the vector loop wraps to zero, when tail is folded by masking;
// this currently happens when OptForSize, in which case IC is set to 1 above.
- unsigned IC = UINT_MAX;
+ unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+ R.MaxLocalUsers);
- for (auto& pair : R.MaxLocalUsers) {
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
- LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
- << " registers of "
- << TTI.getRegisterClassName(pair.first) << " register class\n");
- if (VF == 1) {
- if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumScalarRegs;
- } else {
- if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumVectorRegs;
- }
- unsigned MaxLocalUsers = pair.second;
- unsigned LoopInvariantRegs = 0;
- if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
- LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
-
- unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
- // Don't count the induction variable as interleaved.
- if (EnableIndVarRegisterHeur) {
- TmpIC =
- PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
- std::max(1U, (MaxLocalUsers - 1)));
- }
-
- IC = std::min(IC, TmpIC);
- }
+ // Don't count the induction variable as interleaved.
+ if (EnableIndVarRegisterHeur)
+ IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
+ std::max(1U, (R.MaxLocalUsers - 1)));
// Clamp the interleave ranges to reasonable counts.
unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
@@ -5349,7 +5331,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
SmallVector<RegisterUsage, 8> RUs(VFs.size());
- SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
+ SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
@@ -5379,45 +5361,21 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
// For each VF find the maximum usage of registers.
for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
- // Count the number of live intervals.
- SmallMapVector<unsigned, unsigned, 4> RegUsage;
-
if (VFs[j] == 1) {
- for (auto Inst : OpenIntervals) {
- unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
- if (RegUsage.find(ClassID) == RegUsage.end())
- RegUsage[ClassID] = 1;
- else
- RegUsage[ClassID] += 1;
- }
- } else {
- collectUniformsAndScalars(VFs[j]);
- for (auto Inst : OpenIntervals) {
- // Skip ignored values for VF > 1.
- if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end())
- continue;
- if (isScalarAfterVectorization(Inst, VFs[j])) {
- unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
- if (RegUsage.find(ClassID) == RegUsage.end())
- RegUsage[ClassID] = 1;
- else
- RegUsage[ClassID] += 1;
- } else {
- unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType());
- if (RegUsage.find(ClassID) == RegUsage.end())
- RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]);
- else
- RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]);
- }
- }
+ MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
+ continue;
}
-
- for (auto& pair : RegUsage) {
- if (MaxUsages[j].find(pair.first) != MaxUsages[j].end())
- MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second);
- else
- MaxUsages[j][pair.first] = pair.second;
+ collectUniformsAndScalars(VFs[j]);
+ // Count the number of live intervals.
+ unsigned RegUsage = 0;
+ for (auto Inst : OpenIntervals) {
+ // Skip ignored values for VF > 1.
+ if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
+ isScalarAfterVectorization(Inst, VFs[j]))
+ continue;
+ RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
}
+ MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
}
LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
@@ -5428,34 +5386,18 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
}
for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
- SmallMapVector<unsigned, unsigned, 4> Invariant;
-
- for (auto Inst : LoopInvariants) {
- unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
- unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType());
- if (Invariant.find(ClassID) == Invariant.end())
- Invariant[ClassID] = Usage;
- else
- Invariant[ClassID] += Usage;
+ unsigned Invariant = 0;
+ if (VFs[i] == 1)
+ Invariant = LoopInvariants.size();
+ else {
+ for (auto Inst : LoopInvariants)
+ Invariant += GetRegUsage(Inst->getType(), VFs[i]);
}
LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
- LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: "
- << MaxUsages[i].size() << " item\n");
- for (const auto& Pair : MaxUsages[i]) {
- (void)Pair;
- LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
- << TTI.getRegisterClassName(Pair.first)
- << ", " << Pair.second << " registers \n");
- }
- LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: "
- << Invariant.size() << " item\n");
- for (const auto& Pair : Invariant) {
- (void)Pair;
- LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
- << TTI.getRegisterClassName(Pair.first)
- << ", " << Pair.second << " registers \n");
- }
+ LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
+ LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant
+ << '\n');
RU.LoopInvariantRegs = Invariant;
RU.MaxLocalUsers = MaxUsages[i];
@@ -7820,8 +7762,7 @@ bool LoopVectorizePass::runImpl(
// The second condition is necessary because, even if the target has no
// vector registers, loop vectorization may still enable scalar
// interleaving.
- if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
- TTI->getMaxInterleaveFactor(1) < 2)
+ if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
return false;
bool Changed = false;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a22153bbed1..99428c6c5de 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5237,7 +5237,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
// If the target claims to have no vector registers don't attempt
// vectorization.
- if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
+ if (!TTI->getNumberOfRegisters(true))
return false;
// Don't vectorize when the attribute NoImplicitFloat is used.
OpenPOWER on IntegriCloud