diff options
author | Cong Hou <congh@google.com> | 2015-10-29 05:35:22 +0000 |
---|---|---|
committer | Cong Hou <congh@google.com> | 2015-10-29 05:35:22 +0000 |
commit | 45bd8ce64c53f86711f5a5ce61db751a3640d0d1 (patch) | |
tree | 7ec5dd3204e584b00880ce66fcc73b9dd3bcd723 /llvm/lib/Transforms | |
parent | 15c0fbaae150725cbf9c8d1a4cb2f2a90f23de86 (diff) | |
download | bcm5719-llvm-45bd8ce64c53f86711f5a5ce61db751a3640d0d1.tar.gz bcm5719-llvm-45bd8ce64c53f86711f5a5ce61db751a3640d0d1.zip |
Revert the revision 251592 as it fails a test on some platforms.
llvm-svn: 251617
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 121 |
1 files changed, 28 insertions, 93 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7a28473c25c..ae5ec8cb88a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -126,11 +126,6 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), "trip count that is smaller than this " "value.")); -static cl::opt<bool> MaximizeBandwidth( - "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, - cl::desc("Maximize bandwidth when selecting vectorization factor which " - "will be determined by the smallest type in loop.")); - /// This enables versioning on the strides of symbolically striding memory /// accesses in code like the following. /// for (i = 0; i < N; ++i) @@ -1382,10 +1377,10 @@ public: /// possible. VectorizationFactor selectVectorizationFactor(bool OptForSize); - /// \return The size (in bits) of the smallest and widest types in the code - /// that needs to be vectorized. We ignore values that remain scalar such as + /// \return The size (in bits) of the widest type in the code that + /// needs to be vectorized. We ignore values that remain scalar such as /// 64 bit loop indices. - std::pair<unsigned, unsigned> getSmallestAndWidestTypes(); + unsigned getWidestType(); /// \return The desired interleave count. /// If interleave count has been specified by metadata it will be returned. @@ -1412,10 +1407,8 @@ public: unsigned NumInstructions; }; - /// \return Returns information about the register usages of the loop for the - /// given vectorization factors. - SmallVector<RegisterUsage, 8> - calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs); + /// \return information about the register usage of the loop. + RegisterUsage calculateRegisterUsage(); private: /// Returns the expected execution cost. The unit of the cost does @@ -4714,8 +4707,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); - unsigned SmallestType, WidestType; - std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); + unsigned WidestType = getWidestType(); unsigned WidestRegister = TTI.getRegisterBitWidth(true); unsigned MaxSafeDepDist = -1U; if (Legal->getMaxSafeDepDistBytes() != -1U) @@ -4723,9 +4715,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { WidestRegister = ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist); unsigned MaxVectorSize = WidestRegister / WidestType; - - DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " - << WidestType << " bits.\n"); + DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n"); DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister << " bits.\n"); @@ -4738,26 +4728,6 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { " into one vector!"); unsigned VF = MaxVectorSize; - if (MaximizeBandwidth && !OptForSize) { - // Collect all viable vectorization factors. - SmallVector<unsigned, 8> VFs; - unsigned NewMaxVectorSize = WidestRegister / SmallestType; - for (unsigned VS = MaxVectorSize; VS <= NewMaxVectorSize; VS *= 2) - VFs.push_back(VS); - - // For each VF calculate its register usage. - auto RUs = calculateRegisterUsage(VFs); - - // Select the largest VF which doesn't require more registers than existing - // ones. - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true); - for (int i = RUs.size() - 1; i >= 0; --i) { - if (RUs[i].MaxLocalUsers <= TargetNumRegisters) { - VF = VFs[i]; - break; - } - } - } // If we optimize the program for size, avoid creating the tail loop. if (OptForSize) { @@ -4833,9 +4803,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { return Factor; } -std::pair<unsigned, unsigned> -LoopVectorizationCostModel::getSmallestAndWidestTypes() { - unsigned MinWidth = -1U; +unsigned LoopVectorizationCostModel::getWidestType() { unsigned MaxWidth = 8; const DataLayout &DL = TheFunction->getParent()->getDataLayout(); @@ -4875,14 +4843,12 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { if (T->isPointerTy() && !isConsecutiveLoadOrStore(&*it)) continue; - MinWidth = std::min(MinWidth, - (unsigned)DL.getTypeSizeInBits(T->getScalarType())); MaxWidth = std::max(MaxWidth, (unsigned)DL.getTypeSizeInBits(T->getScalarType())); } } - return {MinWidth, MaxWidth}; + return MaxWidth; } unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, @@ -4928,7 +4894,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, TargetNumRegisters = ForceTargetNumVectorRegs; } - RegisterUsage R = calculateRegisterUsage({VF})[0]; + LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage(); // We divide by these constants so assume that we have at least one // instruction that uses at least one register. R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); @@ -5038,9 +5004,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; } -SmallVector<LoopVectorizationCostModel::RegisterUsage, 8> -LoopVectorizationCostModel::calculateRegisterUsage( - const SmallVector<unsigned, 8> &VFs) { +LoopVectorizationCostModel::RegisterUsage +LoopVectorizationCostModel::calculateRegisterUsage() { // This function calculates the register usage by measuring the highest number // of values that are alive at a single location. Obviously, this is a very // rough estimation. We scan the loop in a topological order in order and @@ -5061,8 +5026,8 @@ LoopVectorizationCostModel::calculateRegisterUsage( LoopBlocksDFS DFS(TheLoop); DFS.perform(LI); - RegisterUsage RU; - RU.NumInstructions = 0; + RegisterUsage R; + R.NumInstructions = 0; // Each 'key' in the map opens a new interval. The values // of the map are the index of the 'last seen' usage of the @@ -5081,7 +5046,7 @@ LoopVectorizationCostModel::calculateRegisterUsage( unsigned Index = 0; for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO(); bb != be; ++bb) { - RU.NumInstructions += (*bb)->size(); + R.NumInstructions += (*bb)->size(); for (Instruction &I : **bb) { IdxToInstr[Index++] = &I; @@ -5116,20 +5081,10 @@ LoopVectorizationCostModel::calculateRegisterUsage( TransposeEnds[it->second].push_back(it->first); SmallSet<Instruction*, 8> OpenIntervals; + unsigned MaxUsage = 0; - // Get the size of the widest register. - unsigned MaxSafeDepDist = -1U; - if (Legal->getMaxSafeDepDistBytes() != -1U) - MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8; - unsigned WidestRegister = - std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist); - const DataLayout &DL = TheFunction->getParent()->getDataLayout(); - - SmallVector<RegisterUsage, 8> RUs(VFs.size()); - SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0); DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); - for (unsigned int i = 0; i < Index; ++i) { Instruction *I = IdxToInstr[i]; // Ignore instructions that are never used within the loop. @@ -5141,47 +5096,27 @@ LoopVectorizationCostModel::calculateRegisterUsage( // Remove all of the instructions that end at this location. InstrList &List = TransposeEnds[i]; - for (unsigned int j = 0, e = List.size(); j < e; ++j) + for (unsigned int j=0, e = List.size(); j < e; ++j) OpenIntervals.erase(List[j]); - // For each VF find the maximum usage of registers. - for (unsigned j = 0, e = VFs.size(); j < e; ++j) { - // Count the number of live interals. - unsigned RegUsage = 0; - for (auto Inst : OpenIntervals) { - unsigned TypeSize = - DL.getTypeSizeInBits(Inst->getType()->getScalarType()); - RegUsage += std::max<unsigned>(1, VFs[j] * TypeSize / WidestRegister); - } - MaxUsages[j] = std::max(MaxUsages[j], RegUsage); - } + // Count the number of live interals. + MaxUsage = std::max(MaxUsage, OpenIntervals.size()); - DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " - << OpenIntervals.size() << '\n'); + DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " << + OpenIntervals.size() << '\n'); // Add the current instruction to the list of open intervals. OpenIntervals.insert(I); } - for (unsigned i = 0, e = VFs.size(); i < e; ++i) { - unsigned Invariant = 0; - for (auto Inst : LoopInvariants) { - unsigned TypeSize = - DL.getTypeSizeInBits(Inst->getType()->getScalarType()); - Invariant += std::max<unsigned>(1, VFs[i] * TypeSize / WidestRegister); - } - - DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); - DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); - DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); - DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n'); - - RU.LoopInvariantRegs = Invariant; - RU.MaxLocalUsers = MaxUsages[i]; - RUs[i] = RU; - } + unsigned Invariant = LoopInvariants.size(); + DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n'); + DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); + DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n'); - return RUs; + R.LoopInvariantRegs = Invariant; + R.MaxLocalUsers = MaxUsage; + return R; } unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { |