summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorCong Hou <congh@google.com>2015-10-29 05:35:22 +0000
committerCong Hou <congh@google.com>2015-10-29 05:35:22 +0000
commit45bd8ce64c53f86711f5a5ce61db751a3640d0d1 (patch)
tree7ec5dd3204e584b00880ce66fcc73b9dd3bcd723 /llvm/lib/Transforms
parent15c0fbaae150725cbf9c8d1a4cb2f2a90f23de86 (diff)
downloadbcm5719-llvm-45bd8ce64c53f86711f5a5ce61db751a3640d0d1.tar.gz
bcm5719-llvm-45bd8ce64c53f86711f5a5ce61db751a3640d0d1.zip
Revert the revision 251592 as it fails a test on some platforms.
llvm-svn: 251617
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp121
1 files changed, 28 insertions, 93 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7a28473c25c..ae5ec8cb88a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -126,11 +126,6 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
"trip count that is smaller than this "
"value."));
-static cl::opt<bool> MaximizeBandwidth(
- "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
- cl::desc("Maximize bandwidth when selecting vectorization factor which "
- "will be determined by the smallest type in loop."));
-
/// This enables versioning on the strides of symbolically striding memory
/// accesses in code like the following.
/// for (i = 0; i < N; ++i)
@@ -1382,10 +1377,10 @@ public:
/// possible.
VectorizationFactor selectVectorizationFactor(bool OptForSize);
- /// \return The size (in bits) of the smallest and widest types in the code
- /// that needs to be vectorized. We ignore values that remain scalar such as
+ /// \return The size (in bits) of the widest type in the code that
+ /// needs to be vectorized. We ignore values that remain scalar such as
/// 64 bit loop indices.
- std::pair<unsigned, unsigned> getSmallestAndWidestTypes();
+ unsigned getWidestType();
/// \return The desired interleave count.
/// If interleave count has been specified by metadata it will be returned.
@@ -1412,10 +1407,8 @@ public:
unsigned NumInstructions;
};
- /// \return Returns information about the register usages of the loop for the
- /// given vectorization factors.
- SmallVector<RegisterUsage, 8>
- calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs);
+ /// \return information about the register usage of the loop.
+ RegisterUsage calculateRegisterUsage();
private:
/// Returns the expected execution cost. The unit of the cost does
@@ -4714,8 +4707,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
- unsigned SmallestType, WidestType;
- std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
+ unsigned WidestType = getWidestType();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
unsigned MaxSafeDepDist = -1U;
if (Legal->getMaxSafeDepDistBytes() != -1U)
@@ -4723,9 +4715,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
-
- DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
- << WidestType << " bits.\n");
+ DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
DEBUG(dbgs() << "LV: The Widest register is: "
<< WidestRegister << " bits.\n");
@@ -4738,26 +4728,6 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
" into one vector!");
unsigned VF = MaxVectorSize;
- if (MaximizeBandwidth && !OptForSize) {
- // Collect all viable vectorization factors.
- SmallVector<unsigned, 8> VFs;
- unsigned NewMaxVectorSize = WidestRegister / SmallestType;
- for (unsigned VS = MaxVectorSize; VS <= NewMaxVectorSize; VS *= 2)
- VFs.push_back(VS);
-
- // For each VF calculate its register usage.
- auto RUs = calculateRegisterUsage(VFs);
-
- // Select the largest VF which doesn't require more registers than existing
- // ones.
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
- for (int i = RUs.size() - 1; i >= 0; --i) {
- if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
- VF = VFs[i];
- break;
- }
- }
- }
// If we optimize the program for size, avoid creating the tail loop.
if (OptForSize) {
@@ -4833,9 +4803,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
return Factor;
}
-std::pair<unsigned, unsigned>
-LoopVectorizationCostModel::getSmallestAndWidestTypes() {
- unsigned MinWidth = -1U;
+unsigned LoopVectorizationCostModel::getWidestType() {
unsigned MaxWidth = 8;
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
@@ -4875,14 +4843,12 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
if (T->isPointerTy() && !isConsecutiveLoadOrStore(&*it))
continue;
- MinWidth = std::min(MinWidth,
- (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
MaxWidth = std::max(MaxWidth,
(unsigned)DL.getTypeSizeInBits(T->getScalarType()));
}
}
- return {MinWidth, MaxWidth};
+ return MaxWidth;
}
unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
@@ -4928,7 +4894,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
TargetNumRegisters = ForceTargetNumVectorRegs;
}
- RegisterUsage R = calculateRegisterUsage({VF})[0];
+ LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
@@ -5038,9 +5004,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
return 1;
}
-SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
-LoopVectorizationCostModel::calculateRegisterUsage(
- const SmallVector<unsigned, 8> &VFs) {
+LoopVectorizationCostModel::RegisterUsage
+LoopVectorizationCostModel::calculateRegisterUsage() {
// This function calculates the register usage by measuring the highest number
// of values that are alive at a single location. Obviously, this is a very
// rough estimation. We scan the loop in a topological order in order and
@@ -5061,8 +5026,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(
LoopBlocksDFS DFS(TheLoop);
DFS.perform(LI);
- RegisterUsage RU;
- RU.NumInstructions = 0;
+ RegisterUsage R;
+ R.NumInstructions = 0;
// Each 'key' in the map opens a new interval. The values
// of the map are the index of the 'last seen' usage of the
@@ -5081,7 +5046,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(
unsigned Index = 0;
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
be = DFS.endRPO(); bb != be; ++bb) {
- RU.NumInstructions += (*bb)->size();
+ R.NumInstructions += (*bb)->size();
for (Instruction &I : **bb) {
IdxToInstr[Index++] = &I;
@@ -5116,20 +5081,10 @@ LoopVectorizationCostModel::calculateRegisterUsage(
TransposeEnds[it->second].push_back(it->first);
SmallSet<Instruction*, 8> OpenIntervals;
+ unsigned MaxUsage = 0;
- // Get the size of the widest register.
- unsigned MaxSafeDepDist = -1U;
- if (Legal->getMaxSafeDepDistBytes() != -1U)
- MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
- unsigned WidestRegister =
- std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist);
- const DataLayout &DL = TheFunction->getParent()->getDataLayout();
-
- SmallVector<RegisterUsage, 8> RUs(VFs.size());
- SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
-
for (unsigned int i = 0; i < Index; ++i) {
Instruction *I = IdxToInstr[i];
// Ignore instructions that are never used within the loop.
@@ -5141,47 +5096,27 @@ LoopVectorizationCostModel::calculateRegisterUsage(
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
- for (unsigned int j = 0, e = List.size(); j < e; ++j)
+ for (unsigned int j=0, e = List.size(); j < e; ++j)
OpenIntervals.erase(List[j]);
- // For each VF find the maximum usage of registers.
- for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
- // Count the number of live interals.
- unsigned RegUsage = 0;
- for (auto Inst : OpenIntervals) {
- unsigned TypeSize =
- DL.getTypeSizeInBits(Inst->getType()->getScalarType());
- RegUsage += std::max<unsigned>(1, VFs[j] * TypeSize / WidestRegister);
- }
- MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
- }
+ // Count the number of live interals.
+ MaxUsage = std::max(MaxUsage, OpenIntervals.size());
- DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
- << OpenIntervals.size() << '\n');
+ DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
+ OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
- for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
- unsigned Invariant = 0;
- for (auto Inst : LoopInvariants) {
- unsigned TypeSize =
- DL.getTypeSizeInBits(Inst->getType()->getScalarType());
- Invariant += std::max<unsigned>(1, VFs[i] * TypeSize / WidestRegister);
- }
-
- DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
- DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
- DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
- DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n');
-
- RU.LoopInvariantRegs = Invariant;
- RU.MaxLocalUsers = MaxUsages[i];
- RUs[i] = RU;
- }
+ unsigned Invariant = LoopInvariants.size();
+ DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
+ DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+ DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
- return RUs;
+ R.LoopInvariantRegs = Invariant;
+ R.MaxLocalUsers = MaxUsage;
+ return R;
}
unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
OpenPOWER on IntegriCloud