[LV] Don't attempt to type-shrink scalarized instructions

After r288909, instructions feeding predicated instructions may be scalarized if profitable. Since these instructions will remain scalar, we shouldn't attempt to type-shrink them. We should only truncate vector types to their minimal bit widths. This bug was exposed by enabling the vectorization of loops containing conditional stores by default. llvm-svn: 289958
author: Matthew Simpson <mssimpso@codeaurora.org> 2016-12-16 16:52:35 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2016-12-16 16:52:35 +0000
commit: 099af810de82cb7502319ea03e68883be516b5e0 (patch)
tree: 1734bebadf17909862338d4a2c22387af685c0a5 /llvm/lib/Transforms
parent: 27978005955754bf11b71766c08b768e9fb5c040 (diff)
download: bcm5719-llvm-099af810de82cb7502319ea03e68883be516b5e0.tar.gz
bcm5719-llvm-099af810de82cb7502319ea03e68883be516b5e0.zip
1 files changed, 21 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e434ca2e9db..f52b27ae5af 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1917,6 +1917,13 @@ public:
     return Scalars->second.count(I);
   }
 
+  /// \returns True if instruction \p I can be truncated to a smaller bitwidth
+  /// for vectorization factor \p VF.
+  bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
+    return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
+           !Legal->isScalarAfterVectorization(I);
+  }
+
 private:
   /// The vectorization cost is a combination of the cost itself and a boolean
   /// indicating whether any of the contributing operations will actually
@@ -3725,6 +3732,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
   //
   SmallPtrSet<Value *, 4> Erased;
   for (const auto &KV : Cost->getMinimalBitwidths()) {
+    // If the value wasn't vectorized, we must maintain the original scalar
+    // type. The absence of the value from VectorLoopValueMap indicates that it
+    // wasn't vectorized.
+    if (!VectorLoopValueMap.hasVector(KV.first))
+      continue;
     VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
     for (Value *&I : Parts) {
       if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
@@ -3817,6 +3829,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
 
   // We'll have created a bunch of ZExts that are now parentless. Clean up.
   for (const auto &KV : Cost->getMinimalBitwidths()) {
+    // If the value wasn't vectorized, we must maintain the original scalar
+    // type. The absence of the value from VectorLoopValueMap indicates that it
+    // wasn't vectorized.
+    if (!VectorLoopValueMap.hasVector(KV.first))
+      continue;
     VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
     for (Value *&I : Parts) {
       ZExtInst *Inst = dyn_cast<ZExtInst>(I);
@@ -6837,7 +6854,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                                                         unsigned VF,
                                                         Type *&VectorTy) {
   Type *RetTy = I->getType();
-  if (VF > 1 && MinBWs.count(I))
+  if (canTruncateToMinimalBitwidth(I, VF))
     RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
   VectorTy = ToVectorTy(RetTy, VF);
   auto SE = PSE.getSE();
@@ -6958,9 +6975,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
     Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
-    auto It = MinBWs.find(Op0AsInstruction);
-    if (VF > 1 && It != MinBWs.end())
-      ValTy = IntegerType::get(ValTy->getContext(), It->second);
+    if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
+      ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
     VectorTy = ToVectorTy(ValTy, VF);
     return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
   }
@@ -7108,7 +7124,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
 
     Type *SrcScalarTy = I->getOperand(0)->getType();
     Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
-    if (VF > 1 && MinBWs.count(I)) {
+    if (canTruncateToMinimalBitwidth(I, VF)) {
       // This cast is going to be shrunk. This may remove the cast or it might
       // turn it into slightly different cast. For example, if MinBW == 16,
       // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
author	Matthew Simpson <mssimpso@codeaurora.org>	2016-12-16 16:52:35 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2016-12-16 16:52:35 +0000
commit	099af810de82cb7502319ea03e68883be516b5e0 (patch)
tree	1734bebadf17909862338d4a2c22387af685c0a5 /llvm/lib/Transforms
parent	27978005955754bf11b71766c08b768e9fb5c040 (diff)
download	bcm5719-llvm-099af810de82cb7502319ea03e68883be516b5e0.tar.gz bcm5719-llvm-099af810de82cb7502319ea03e68883be516b5e0.zip