summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp151
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2
3 files changed, 108 insertions, 49 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 852bbefaf20..7f119175c4a 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1386,7 +1386,9 @@ void Cost::RateFormula(const Formula &F,
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
// additional instruction (at least fill).
- unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1;
+ // TODO: Need distinguish register class?
+ unsigned TTIRegNum = TTI->getNumberOfRegisters(
+ TTI->getRegisterClassForType(false, F.getType())) - 1;
if (C.NumRegs > TTIRegNum) {
// Cost already exceeded TTIRegNum, then only newly added register can add
// new instructions.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7e95038a5eb..11e1cd003b4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -983,10 +983,11 @@ public:
/// of a loop.
struct RegisterUsage {
/// Holds the number of loop invariant values that are used in the loop.
- unsigned LoopInvariantRegs;
-
+ /// The key is ClassID of target-provided register class.
+ SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs;
/// Holds the maximum number of concurrent live intervals in the loop.
- unsigned MaxLocalUsers;
+ /// The key is ClassID of target-provided register class.
+ SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
};
/// \return Returns information about the register usages of the loop for the
@@ -4962,9 +4963,14 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
// Select the largest VF which doesn't require more registers than existing
// ones.
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
for (int i = RUs.size() - 1; i >= 0; --i) {
- if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
+ bool Selected = true;
+ for (auto& pair : RUs[i].MaxLocalUsers) {
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
+ if (pair.second > TargetNumRegisters)
+ Selected = false;
+ }
+ if (Selected) {
MaxVF = VFs[i];
break;
}
@@ -5115,22 +5121,12 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
- LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
- << " registers\n");
-
- if (VF == 1) {
- if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumScalarRegs;
- } else {
- if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumVectorRegs;
- }
-
RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
- R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
+ for (auto& pair : R.MaxLocalUsers) {
+ pair.second = std::max(pair.second, 1U);
+ }
// We calculate the interleave count using the following formula.
// Subtract the number of loop invariants from the number of available
@@ -5143,13 +5139,35 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
// We also want power of two interleave counts to ensure that the induction
// variable of the vector loop wraps to zero, when tail is folded by masking;
// this currently happens when OptForSize, in which case IC is set to 1 above.
- unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
- R.MaxLocalUsers);
+ unsigned IC = UINT_MAX;
- // Don't count the induction variable as interleaved.
- if (EnableIndVarRegisterHeur)
- IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
- std::max(1U, (R.MaxLocalUsers - 1)));
+ for (auto& pair : R.MaxLocalUsers) {
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
+ LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+ << " registers of "
+ << TTI.getRegisterClassName(pair.first) << " register class\n");
+ if (VF == 1) {
+ if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumScalarRegs;
+ } else {
+ if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumVectorRegs;
+ }
+ unsigned MaxLocalUsers = pair.second;
+ unsigned LoopInvariantRegs = 0;
+ if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
+ LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
+
+ unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
+ // Don't count the induction variable as interleaved.
+ if (EnableIndVarRegisterHeur) {
+ TmpIC =
+ PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
+ std::max(1U, (MaxLocalUsers - 1)));
+ }
+
+ IC = std::min(IC, TmpIC);
+ }
// Clamp the interleave ranges to reasonable counts.
unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
@@ -5331,7 +5349,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
SmallVector<RegisterUsage, 8> RUs(VFs.size());
- SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
+ SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
@@ -5361,21 +5379,45 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
// For each VF find the maximum usage of registers.
for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
+ // Count the number of live intervals.
+ SmallMapVector<unsigned, unsigned, 4> RegUsage;
+
if (VFs[j] == 1) {
- MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
- continue;
+ for (auto Inst : OpenIntervals) {
+ unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
+ if (RegUsage.find(ClassID) == RegUsage.end())
+ RegUsage[ClassID] = 1;
+ else
+ RegUsage[ClassID] += 1;
+ }
+ } else {
+ collectUniformsAndScalars(VFs[j]);
+ for (auto Inst : OpenIntervals) {
+ // Skip ignored values for VF > 1.
+ if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end())
+ continue;
+ if (isScalarAfterVectorization(Inst, VFs[j])) {
+ unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
+ if (RegUsage.find(ClassID) == RegUsage.end())
+ RegUsage[ClassID] = 1;
+ else
+ RegUsage[ClassID] += 1;
+ } else {
+ unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType());
+ if (RegUsage.find(ClassID) == RegUsage.end())
+ RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]);
+ else
+ RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]);
+ }
+ }
}
- collectUniformsAndScalars(VFs[j]);
- // Count the number of live intervals.
- unsigned RegUsage = 0;
- for (auto Inst : OpenIntervals) {
- // Skip ignored values for VF > 1.
- if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
- isScalarAfterVectorization(Inst, VFs[j]))
- continue;
- RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
+
+ for (auto& pair : RegUsage) {
+ if (MaxUsages[j].find(pair.first) != MaxUsages[j].end())
+ MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second);
+ else
+ MaxUsages[j][pair.first] = pair.second;
}
- MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
}
LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
@@ -5386,18 +5428,32 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
}
for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
- unsigned Invariant = 0;
- if (VFs[i] == 1)
- Invariant = LoopInvariants.size();
- else {
- for (auto Inst : LoopInvariants)
- Invariant += GetRegUsage(Inst->getType(), VFs[i]);
+ SmallMapVector<unsigned, unsigned, 4> Invariant;
+
+ for (auto Inst : LoopInvariants) {
+ unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
+ unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType());
+ if (Invariant.find(ClassID) == Invariant.end())
+ Invariant[ClassID] = Usage;
+ else
+ Invariant[ClassID] += Usage;
}
LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
- LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
- LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant
- << '\n');
+ LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: "
+ << MaxUsages[i].size() << " item\n");
+ for (const auto& pair : MaxUsages[i]) {
+ LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
+ << TTI.getRegisterClassName(pair.first)
+ << ", " << pair.second << " registers \n");
+ }
+ LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: "
+ << Invariant.size() << " item\n");
+ for (const auto& pair : Invariant) {
+ LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
+ << TTI.getRegisterClassName(pair.first)
+ << ", " << pair.second << " registers \n");
+ }
RU.LoopInvariantRegs = Invariant;
RU.MaxLocalUsers = MaxUsages[i];
@@ -7762,7 +7818,8 @@ bool LoopVectorizePass::runImpl(
// The second condition is necessary because, even if the target has no
// vector registers, loop vectorization may still enable scalar
// interleaving.
- if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
+ if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
+ TTI->getMaxInterleaveFactor(1) < 2)
return false;
bool Changed = false;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 99428c6c5de..a22153bbed1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5237,7 +5237,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
// If the target claims to have no vector registers don't attempt
// vectorization.
- if (!TTI->getNumberOfRegisters(true))
+ if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
return false;
// Don't vectorize when the attribute NoImplicitFloat is used.
OpenPOWER on IntegriCloud