diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 153 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 |
2 files changed, 48 insertions, 107 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 18cc61f7a2b..7e95038a5eb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -983,11 +983,10 @@ public: /// of a loop. struct RegisterUsage { /// Holds the number of loop invariant values that are used in the loop. - /// The key is ClassID of target-provided register class. - SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs; + unsigned LoopInvariantRegs; + /// Holds the maximum number of concurrent live intervals in the loop. - /// The key is ClassID of target-provided register class. - SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers; + unsigned MaxLocalUsers; }; /// \return Returns information about the register usages of the loop for the @@ -4963,14 +4962,9 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { // Select the largest VF which doesn't require more registers than existing // ones. + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true); for (int i = RUs.size() - 1; i >= 0; --i) { - bool Selected = true; - for (auto& pair : RUs[i].MaxLocalUsers) { - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); - if (pair.second > TargetNumRegisters) - Selected = false; - } - if (Selected) { + if (RUs[i].MaxLocalUsers <= TargetNumRegisters) { MaxVF = VFs[i]; break; } @@ -5121,12 +5115,22 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF, if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); + LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters + << " registers\n"); + + if (VF == 1) { + if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumScalarRegs; + } else { + if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumVectorRegs; + } + RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. - for (auto& pair : R.MaxLocalUsers) { - pair.second = std::max(pair.second, 1U); - } + R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); // We calculate the interleave count using the following formula. // Subtract the number of loop invariants from the number of available @@ -5139,35 +5143,13 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF, // We also want power of two interleave counts to ensure that the induction // variable of the vector loop wraps to zero, when tail is folded by masking; // this currently happens when OptForSize, in which case IC is set to 1 above. - unsigned IC = UINT_MAX; + unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) / + R.MaxLocalUsers); - for (auto& pair : R.MaxLocalUsers) { - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); - LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters - << " registers of " - << TTI.getRegisterClassName(pair.first) << " register class\n"); - if (VF == 1) { - if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumScalarRegs; - } else { - if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumVectorRegs; - } - unsigned MaxLocalUsers = pair.second; - unsigned LoopInvariantRegs = 0; - if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end()) - LoopInvariantRegs = R.LoopInvariantRegs[pair.first]; - - unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers); - // Don't count the induction variable as interleaved. - if (EnableIndVarRegisterHeur) { - TmpIC = - PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) / - std::max(1U, (MaxLocalUsers - 1))); - } - - IC = std::min(IC, TmpIC); - } + // Don't count the induction variable as interleaved. + if (EnableIndVarRegisterHeur) + IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / + std::max(1U, (R.MaxLocalUsers - 1))); // Clamp the interleave ranges to reasonable counts. unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF); @@ -5349,7 +5331,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { const DataLayout &DL = TheFunction->getParent()->getDataLayout(); SmallVector<RegisterUsage, 8> RUs(VFs.size()); - SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size()); + SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0); LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); @@ -5379,45 +5361,21 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { - // Count the number of live intervals. - SmallMapVector<unsigned, unsigned, 4> RegUsage; - if (VFs[j] == 1) { - for (auto Inst : OpenIntervals) { - unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); - if (RegUsage.find(ClassID) == RegUsage.end()) - RegUsage[ClassID] = 1; - else - RegUsage[ClassID] += 1; - } - } else { - collectUniformsAndScalars(VFs[j]); - for (auto Inst : OpenIntervals) { - // Skip ignored values for VF > 1. - if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end()) - continue; - if (isScalarAfterVectorization(Inst, VFs[j])) { - unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); - if (RegUsage.find(ClassID) == RegUsage.end()) - RegUsage[ClassID] = 1; - else - RegUsage[ClassID] += 1; - } else { - unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType()); - if (RegUsage.find(ClassID) == RegUsage.end()) - RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]); - else - RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]); - } - } + MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size()); + continue; } - - for (auto& pair : RegUsage) { - if (MaxUsages[j].find(pair.first) != MaxUsages[j].end()) - MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second); - else - MaxUsages[j][pair.first] = pair.second; + collectUniformsAndScalars(VFs[j]); + // Count the number of live intervals. + unsigned RegUsage = 0; + for (auto Inst : OpenIntervals) { + // Skip ignored values for VF > 1. + if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() || + isScalarAfterVectorization(Inst, VFs[j])) + continue; + RegUsage += GetRegUsage(Inst->getType(), VFs[j]); } + MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " @@ -5428,34 +5386,18 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { } for (unsigned i = 0, e = VFs.size(); i < e; ++i) { - SmallMapVector<unsigned, unsigned, 4> Invariant; - - for (auto Inst : LoopInvariants) { - unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]); - unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType()); - if (Invariant.find(ClassID) == Invariant.end()) - Invariant[ClassID] = Usage; - else - Invariant[ClassID] += Usage; + unsigned Invariant = 0; + if (VFs[i] == 1) + Invariant = LoopInvariants.size(); + else { + for (auto Inst : LoopInvariants) + Invariant += GetRegUsage(Inst->getType(), VFs[i]); } LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); - LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " - << MaxUsages[i].size() << " item\n"); - for (const auto& Pair : MaxUsages[i]) { - (void)Pair; - LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: " - << TTI.getRegisterClassName(Pair.first) - << ", " << Pair.second << " registers \n"); - } - LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " - << Invariant.size() << " item\n"); - for (const auto& Pair : Invariant) { - (void)Pair; - LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: " - << TTI.getRegisterClassName(Pair.first) - << ", " << Pair.second << " registers \n"); - } + LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant + << '\n'); RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; @@ -7820,8 +7762,7 @@ bool LoopVectorizePass::runImpl( // The second condition is necessary because, even if the target has no // vector registers, loop vectorization may still enable scalar // interleaving. - if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) && - TTI->getMaxInterleaveFactor(1) < 2) + if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) return false; bool Changed = false; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a22153bbed1..99428c6c5de 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5237,7 +5237,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // If the target claims to have no vector registers don't attempt // vectorization. - if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) + if (!TTI->getNumberOfRegisters(true)) return false; // Don't vectorize when the attribute NoImplicitFloat is used. |

