diff options
author | Matthew Simpson <mssimpso@codeaurora.org> | 2016-12-16 16:52:35 +0000 |
---|---|---|
committer | Matthew Simpson <mssimpso@codeaurora.org> | 2016-12-16 16:52:35 +0000 |
commit | 099af810de82cb7502319ea03e68883be516b5e0 (patch) | |
tree | 1734bebadf17909862338d4a2c22387af685c0a5 /llvm/lib/Transforms | |
parent | 27978005955754bf11b71766c08b768e9fb5c040 (diff) | |
download | bcm5719-llvm-099af810de82cb7502319ea03e68883be516b5e0.tar.gz bcm5719-llvm-099af810de82cb7502319ea03e68883be516b5e0.zip |
[LV] Don't attempt to type-shrink scalarized instructions
After r288909, instructions feeding predicated instructions may be scalarized
if profitable. Since these instructions will remain scalar, we shouldn't
attempt to type-shrink them. We should only truncate vector types to their
minimal bit widths. This bug was exposed by enabling the vectorization of loops
containing conditional stores by default.
llvm-svn: 289958
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 26 |
1 files changed, 21 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e434ca2e9db..f52b27ae5af 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1917,6 +1917,13 @@ public: return Scalars->second.count(I); } + /// \returns True if instruction \p I can be truncated to a smaller bitwidth + /// for vectorization factor \p VF. + bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const { + return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) && + !Legal->isScalarAfterVectorization(I); + } + private: /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually @@ -3725,6 +3732,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // SmallPtrSet<Value *, 4> Erased; for (const auto &KV : Cost->getMinimalBitwidths()) { + // If the value wasn't vectorized, we must maintain the original scalar + // type. The absence of the value from VectorLoopValueMap indicates that it + // wasn't vectorized. + if (!VectorLoopValueMap.hasVector(KV.first)) + continue; VectorParts &Parts = VectorLoopValueMap.getVector(KV.first); for (Value *&I : Parts) { if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I)) @@ -3817,6 +3829,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // We'll have created a bunch of ZExts that are now parentless. Clean up. for (const auto &KV : Cost->getMinimalBitwidths()) { + // If the value wasn't vectorized, we must maintain the original scalar + // type. The absence of the value from VectorLoopValueMap indicates that it + // wasn't vectorized. + if (!VectorLoopValueMap.hasVector(KV.first)) + continue; VectorParts &Parts = VectorLoopValueMap.getVector(KV.first); for (Value *&I : Parts) { ZExtInst *Inst = dyn_cast<ZExtInst>(I); @@ -6837,7 +6854,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, Type *&VectorTy) { Type *RetTy = I->getType(); - if (VF > 1 && MinBWs.count(I)) + if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); VectorTy = ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); @@ -6958,9 +6975,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0)); - auto It = MinBWs.find(Op0AsInstruction); - if (VF > 1 && It != MinBWs.end()) - ValTy = IntegerType::get(ValTy->getContext(), It->second); + if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) + ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); } @@ -7108,7 +7124,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, Type *SrcScalarTy = I->getOperand(0)->getType(); Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF); - if (VF > 1 && MinBWs.count(I)) { + if (canTruncateToMinimalBitwidth(I, VF)) { // This cast is going to be shrunk. This may remove the cast or it might // turn it into slightly different cast. For example, if MinBW == 16, // "zext i8 %1 to i32" becomes "zext i8 %1 to i16". |