summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp26
1 files changed, 21 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e434ca2e9db..f52b27ae5af 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1917,6 +1917,13 @@ public:
return Scalars->second.count(I);
}
+ /// \returns True if instruction \p I can be truncated to a smaller bitwidth
+ /// for vectorization factor \p VF.
+ bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
+ return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
+ !Legal->isScalarAfterVectorization(I);
+ }
+
private:
/// The vectorization cost is a combination of the cost itself and a boolean
/// indicating whether any of the contributing operations will actually
@@ -3725,6 +3732,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
//
SmallPtrSet<Value *, 4> Erased;
for (const auto &KV : Cost->getMinimalBitwidths()) {
+ // If the value wasn't vectorized, we must maintain the original scalar
+ // type. The absence of the value from VectorLoopValueMap indicates that it
+ // wasn't vectorized.
+ if (!VectorLoopValueMap.hasVector(KV.first))
+ continue;
VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
for (Value *&I : Parts) {
if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
@@ -3817,6 +3829,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
// We'll have created a bunch of ZExts that are now parentless. Clean up.
for (const auto &KV : Cost->getMinimalBitwidths()) {
+ // If the value wasn't vectorized, we must maintain the original scalar
+ // type. The absence of the value from VectorLoopValueMap indicates that it
+ // wasn't vectorized.
+ if (!VectorLoopValueMap.hasVector(KV.first))
+ continue;
VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
for (Value *&I : Parts) {
ZExtInst *Inst = dyn_cast<ZExtInst>(I);
@@ -6837,7 +6854,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
unsigned VF,
Type *&VectorTy) {
Type *RetTy = I->getType();
- if (VF > 1 && MinBWs.count(I))
+ if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
VectorTy = ToVectorTy(RetTy, VF);
auto SE = PSE.getSE();
@@ -6958,9 +6975,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType();
Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
- auto It = MinBWs.find(Op0AsInstruction);
- if (VF > 1 && It != MinBWs.end())
- ValTy = IntegerType::get(ValTy->getContext(), It->second);
+ if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
+ ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
@@ -7108,7 +7124,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *SrcScalarTy = I->getOperand(0)->getType();
Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
- if (VF > 1 && MinBWs.count(I)) {
+ if (canTruncateToMinimalBitwidth(I, VF)) {
// This cast is going to be shrunk. This may remove the cast or it might
// turn it into slightly different cast. For example, if MinBW == 16,
// "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
OpenPOWER on IntegriCloud