[LV] Exclude loop-invariant inputs from scalar cost computation.

Loop invariant operands do not need to be scalarized, as we are using the values outside the loop. We should ignore them when computing the scalarization overhead. Fixes PR41294 Reviewers: hsaito, rengolin, dcaballe, Ayal Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D59995 llvm-svn: 366030
author: Florian Hahn <flo@fhahn.com> 2019-07-14 20:12:36 +0000
committer: Florian Hahn <flo@fhahn.com> 2019-07-14 20:12:36 +0000
commit: 9428d95ce7f84844a076fe13219db96a78e3bd44 (patch)
tree: e447793846bef77b8186d9b38544ed00168491e1 /llvm/lib
parent: 8111807a03c7ecc340fe2d8497b422b09e111fe9 (diff)
download: bcm5719-llvm-9428d95ce7f84844a076fe13219db96a78e3bd44.tar.gz
bcm5719-llvm-9428d95ce7f84844a076fe13219db96a78e3bd44.zip
1 files changed, 42 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c1bb43bc5bd..22cf9c7db94 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1179,7 +1179,7 @@ public:
   /// VF. Return the cost of the instruction, including scalarization overhead
   /// if it's needed. The flag NeedToScalarize shows if the call needs to be
   /// scalarized -
-  // i.e. either vector version isn't available, or is too expensive.
+  /// i.e. either vector version isn't available, or is too expensive.
   unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
 
 private:
@@ -1332,6 +1332,30 @@ private:
 
   DecisionList WideningDecisions;
 
+  /// Returns true if \p V is expected to be vectorized and it needs to be
+  /// extracted.
+  bool needsExtract(Value *V, unsigned VF) const {
+    Instruction *I = dyn_cast<Instruction>(V);
+    if (VF == 1 || !I || !TheLoop->contains(I) || TheLoop->isLoopInvariant(I))
+      return false;
+
+    // Assume we can vectorize V (and hence we need extraction) if the
+    // scalars are not computed yet. This can happen, because it is called
+    // via getScalarizationOverhead from setCostBasedWideningDecision, before
+    // the scalars are collected. That should be a safe assumption in most
+    // cases, because we check if the operands have vectorizable types
+    // beforehand in LoopVectorizationLegality.
+    return Scalars.find(VF) == Scalars.end() ||
+           !isScalarAfterVectorization(I, VF);
+  };
+
+  /// Returns a range containing only operands needing to be extracted.
+  SmallVector<Value *, 4> filterExtractingOperands(Instruction::op_range Ops,
+                                                   unsigned VF) {
+    return SmallVector<Value *, 4>(make_filter_range(
+        Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
+  }
+
 public:
   /// The loop that we evaluate.
   Loop *TheLoop;
@@ -3125,8 +3149,11 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
   if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
     FMF = FPMO->getFastMathFlags();
 
-  SmallVector<Value *, 4> Operands(CI->arg_operands());
-  return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF);
+  // Skip operands that do not require extraction/scalarization and do not incur
+  // any overhead.
+  return TTI.getIntrinsicInstrCost(
+      ID, CI->getType(), filterExtractingOperands(CI->arg_operands(), VF), FMF,
+      VF);
 }
 
 static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
@@ -5346,15 +5373,6 @@ int LoopVectorizationCostModel::computePredInstDiscount(
     return true;
   };
 
-  // Returns true if an operand that cannot be scalarized must be extracted
-  // from a vector. We will account for this scalarization overhead below. Note
-  // that the non-void predicated instructions are placed in their own blocks,
-  // and their return values are inserted into vectors. Thus, an extract would
-  // still be required.
-  auto needsExtract = [&](Instruction *I) -> bool {
-    return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF);
-  };
-
   // Compute the expected cost discount from scalarizing the entire expression
   // feeding the predicated instruction. We currently only consider expressions
   // that are single-use instruction chains.
@@ -5394,7 +5412,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
                "Instruction has non-scalar type");
         if (canBeScalarized(J))
           Worklist.push_back(J);
-        else if (needsExtract(J))
+        else if (needsExtract(J, VF))
           ScalarCost += TTI.getScalarizationOverhead(
                               ToVectorTy(J->getType(),VF), false, true);
       }
@@ -5684,16 +5702,18 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
   if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
     return Cost;
 
-  if (CallInst *CI = dyn_cast<CallInst>(I)) {
-    SmallVector<const Value *, 4> Operands(CI->arg_operands());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  } else if (!isa<StoreInst>(I) ||
-             !TTI.supportsEfficientVectorElementLoadStore()) {
-    SmallVector<const Value *, 4> Operands(I->operand_values());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  }
+  // Some targets support efficient element stores.
+  if (isa<StoreInst>(I) && TTI.supportsEfficientVectorElementLoadStore())
+    return Cost;
 
-  return Cost;
+  // Collect operands to consider.
+  CallInst *CI = dyn_cast<CallInst>(I);
+  Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands();
+
+  // Skip operands that do not require extraction/scalarization and do not incur
+  // any overhead.
+  return Cost + TTI.getOperandsScalarizationOverhead(
+                    filterExtractingOperands(Ops, VF), VF);
 }
 
 void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
author	Florian Hahn <flo@fhahn.com>	2019-07-14 20:12:36 +0000
committer	Florian Hahn <flo@fhahn.com>	2019-07-14 20:12:36 +0000
commit	9428d95ce7f84844a076fe13219db96a78e3bd44 (patch)
tree	e447793846bef77b8186d9b38544ed00168491e1 /llvm/lib
parent	8111807a03c7ecc340fe2d8497b422b09e111fe9 (diff)
download	bcm5719-llvm-9428d95ce7f84844a076fe13219db96a78e3bd44.tar.gz bcm5719-llvm-9428d95ce7f84844a076fe13219db96a78e3bd44.zip