summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2016-12-12 21:11:04 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2016-12-12 21:11:04 +0000
commit92ce0230b54de190e3b5f3dfedb0ed043b59a612 (patch)
tree03c32cabb42a1243870e29a5cd36327e04b1c0d7 /llvm/lib/Transforms/Vectorize
parent035af9b346556edbb116dae63836f8b69c99c57c (diff)
downloadbcm5719-llvm-92ce0230b54de190e3b5f3dfedb0ed043b59a612.tar.gz
bcm5719-llvm-92ce0230b54de190e3b5f3dfedb0ed043b59a612.zip
[SLP] Fix sign-extends for type-shrinking
This patch ensures the correct minimum bit width during type-shrinking. Previously when type-shrinking, we always sign-extended values back to their original width. However, if we are going to sign-extend, and the sign bit is unknown, we have to increase the minimum bit width by one bit so the sign-extend will fill the upper bits correctly. If the sign bit is known to be zero, we can perform a zero-extend instead. This should fix PR31243. Reference: https://llvm.org/bugs/show_bug.cgi?id=31243 Differential Revision: https://reviews.llvm.org/D27466 llvm-svn: 289470
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp77
1 files changed, 62 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 867de25841b..bcaa8439cff 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -910,8 +910,11 @@ private:
IRBuilder<> Builder;
/// A map of scalar integer values to the smallest bit width with which they
- /// can legally be represented.
- MapVector<Value *, uint64_t> MinBWs;
+ /// can legally be represented. The values map to (width, signed) pairs,
+ /// where "width" indicates the minimum bit width and "signed" is True if the
+ /// value must be signed-extended, rather than zero-extended, back to its
+ /// original width.
+ MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
};
} // end namespace llvm
@@ -1572,8 +1575,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// If we have computed a smaller type for the expression, update VecTy so
// that the costs will be accurate.
if (MinBWs.count(VL[0]))
- VecTy = VectorType::get(IntegerType::get(F->getContext(), MinBWs[VL[0]]),
- VL.size());
+ VecTy = VectorType::get(
+ IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
if (E->NeedToGather) {
if (allConstant(VL))
@@ -1929,10 +1932,12 @@ int BoUpSLP::getTreeCost() {
auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
auto *ScalarRoot = VectorizableTree[0].Scalars[0];
if (MinBWs.count(ScalarRoot)) {
- auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
+ auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
+ auto Extend =
+ MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt;
VecTy = VectorType::get(MinTy, BundleWidth);
- ExtractCost += TTI->getExtractWithExtendCost(
- Instruction::SExt, EU.Scalar->getType(), VecTy, EU.Lane);
+ ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
+ VecTy, EU.Lane);
} else {
ExtractCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
@@ -2718,7 +2723,7 @@ Value *BoUpSLP::vectorizeTree() {
if (auto *I = dyn_cast<Instruction>(VectorRoot))
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
auto BundleWidth = VectorizableTree[0].Scalars.size();
- auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
+ auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto *VecTy = VectorType::get(MinTy, BundleWidth);
auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
VectorizableTree[0].VectorizedValue = Trunc;
@@ -2726,6 +2731,16 @@ Value *BoUpSLP::vectorizeTree() {
DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
+ // If necessary, sign-extend or zero-extend ScalarRoot to the larger type
+ // specified by ScalarType.
+ auto extend = [&](Value *ScalarRoot, Value *Ex, Type *ScalarType) {
+ if (!MinBWs.count(ScalarRoot))
+ return Ex;
+ if (MinBWs[ScalarRoot].second)
+ return Builder.CreateSExt(Ex, ScalarType);
+ return Builder.CreateZExt(Ex, ScalarType);
+ };
+
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -2760,8 +2775,7 @@ Value *BoUpSLP::vectorizeTree() {
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
}
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
- if (MinBWs.count(ScalarRoot))
- Ex = Builder.CreateSExt(Ex, Scalar->getType());
+ Ex = extend(ScalarRoot, Ex, Scalar->getType());
CSEBlocks.insert(PH->getIncomingBlock(i));
PH->setOperand(i, Ex);
}
@@ -2769,16 +2783,14 @@ Value *BoUpSLP::vectorizeTree() {
} else {
Builder.SetInsertPoint(cast<Instruction>(User));
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
- if (MinBWs.count(ScalarRoot))
- Ex = Builder.CreateSExt(Ex, Scalar->getType());
+ Ex = extend(ScalarRoot, Ex, Scalar->getType());
CSEBlocks.insert(cast<Instruction>(User)->getParent());
User->replaceUsesOfWith(Scalar, Ex);
}
} else {
Builder.SetInsertPoint(&F->getEntryBlock().front());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
- if (MinBWs.count(ScalarRoot))
- Ex = Builder.CreateSExt(Ex, Scalar->getType());
+ Ex = extend(ScalarRoot, Ex, Scalar->getType());
CSEBlocks.insert(&F->getEntryBlock());
User->replaceUsesOfWith(Scalar, Ex);
}
@@ -3499,6 +3511,11 @@ void BoUpSLP::computeMinimumValueSizes() {
Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
}
+ // True if the roots can be zero-extended back to their original type, rather
+ // than sign-extended. We know that if the leading bits are not demanded, we
+ // can safely zero-extend. So we initialize IsKnownPositive to True.
+ bool IsKnownPositive = true;
+
// If all the bits of the roots are demanded, we can try a little harder to
// compute a narrower type. This can happen, for example, if the roots are
// getelementptr indices. InstCombine promotes these indices to the pointer
@@ -3510,11 +3527,41 @@ void BoUpSLP::computeMinimumValueSizes() {
// compute the number of high-order bits we can truncate.
if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) {
MaxBitWidth = 8u;
+
+ // Determine if the sign bit of all the roots is known to be zero. If not,
+ // IsKnownPositive is set to False.
+ IsKnownPositive = all_of(TreeRoot, [&](Value *R) {
+ bool KnownZero = false;
+ bool KnownOne = false;
+ ComputeSignBit(R, KnownZero, KnownOne, *DL);
+ return KnownZero;
+ });
+
+ // Determine the maximum number of bits required to store the scalar
+ // values.
for (auto *Scalar : ToDemote) {
auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT);
auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
}
+
+ // If we can't prove that the sign bit is zero, we must add one to the
+ // maximum bit width to account for the unknown sign bit. This preserves
+ // the existing sign bit so we can safely sign-extend the root back to the
+ // original type. Otherwise, if we know the sign bit is zero, we will
+ // zero-extend the root instead.
+ //
+ // FIXME: This is somewhat suboptimal, as there will be cases where adding
+ // one to the maximum bit width will yield a larger-than-necessary
+ // type. In general, we need to add an extra bit only if we can't
+ // prove that the upper bit of the original type is equal to the
+ // upper bit of the proposed smaller type. If these two bits are the
+ // same (either zero or one) we know that sign-extending from the
+ // smaller type will result in the same value. Here, since we can't
+ // yet prove this, we are just making the proposed smaller type
+ // larger to ensure correctness.
+ if (!IsKnownPositive)
+ ++MaxBitWidth;
}
// Round MaxBitWidth up to the next power-of-two.
@@ -3534,7 +3581,7 @@ void BoUpSLP::computeMinimumValueSizes() {
// Finally, map the values we can demote to the maximum bit with we computed.
for (auto *Scalar : ToDemote)
- MinBWs[Scalar] = MaxBitWidth;
+ MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive);
}
namespace {
OpenPOWER on IntegriCloud