summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2019-03-28 10:37:12 +0000
committerFlorian Hahn <flo@fhahn.com>2019-03-28 10:37:12 +0000
commite21ed594d8ab9decec31a13cf791579e2ded682c (patch)
tree5df12ebebb3a725fe387ddcb4f5bae649d06690b /llvm/lib
parent22be913ac00bd220af46f7e1f4f5ea59027c0b11 (diff)
downloadbcm5719-llvm-e21ed594d8ab9decec31a13cf791579e2ded682c.tar.gz
bcm5719-llvm-e21ed594d8ab9decec31a13cf791579e2ded682c.zip
[VPlan] Determine Vector Width programmatically.
With this change, the VPlan native path is triggered with the directive: #pragma clang loop vectorize(enable) There is no need to specify the vectorize_width(N) clause. Patch by Francesco Petrogalli <francesco.petrogalli@arm.com> Differential Revision: https://reviews.llvm.org/D57598 llvm-svn: 357156
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h4
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp53
2 files changed, 37 insertions, 20 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7264eb97e1b..8b7cf300eef 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -174,6 +174,10 @@ struct VectorizationFactor {
// Width 1 means no vectorization, cost 0 means uncomputed cost.
static VectorizationFactor Disabled() { return {1, 0}; }
+
+ bool operator==(const VectorizationFactor &rhs) const {
+ return Width == rhs.Width && Cost == rhs.Cost;
+ }
};
/// Planner drives the vectorization process after having passed
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b57012291df..c59743c9ff9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1383,12 +1383,6 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
return false;
}
- if (!Hints.getWidth()) {
- LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
- Hints.emitRemarkWithHints();
- return false;
- }
-
if (Hints.getInterleave() > 1) {
// TODO: Interleave support is future work.
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
@@ -6081,31 +6075,48 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
}
}
+// TODO: we could return a pair of values that specify the max VF and
+// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
+// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
+// doesn't have a cost model that can choose which plan to execute if
+// more than one is generated.
+unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
+ LoopVectorizationCostModel &CM) {
+ unsigned WidestType;
+ std::tie(std::ignore, WidestType) = CM.getSmallestAndWidestTypes();
+ return WidestVectorRegBits / WidestType;
+}
+
VectorizationFactor
LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
unsigned UserVF) {
+ unsigned VF = UserVF;
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
// the vectorization pipeline.
if (!OrigLoop->empty()) {
- // TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
- // This won't be necessary when UserVF is not required in the VPlan-native
- // path.
- if (VPlanBuildStressTest && !UserVF)
- UserVF = 4;
+ // If the user doesn't provide a vectorization factor, determine a
+ // reasonable one.
+ if (!UserVF) {
+ // We set VF to 4 for stress testing.
+ if (VPlanBuildStressTest)
+ VF = 4;
+ else
+ VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+ }
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
- assert(UserVF && "Expected UserVF for outer loop vectorization.");
- assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
- buildVPlans(UserVF, UserVF);
+ assert(isPowerOf2_32(VF) && "VF needs to be a power of two");
+ LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user VF " : "computed VF ")
+ << VF << " to build VPlans.\n");
+ buildVPlans(VF, VF);
// For VPlan build stress testing, we bail out after VPlan construction.
if (VPlanBuildStressTest)
return VectorizationFactor::Disabled();
- return {UserVF, 0};
+ return {VF, 0};
}
LLVM_DEBUG(
@@ -7128,7 +7139,7 @@ static bool processLoopInVPlanNativePath(
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
// Get user vectorization factor.
- unsigned UserVF = Hints.getWidth();
+ const unsigned UserVF = Hints.getWidth();
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -7136,16 +7147,18 @@ static bool processLoopInVPlanNativePath(
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
+ const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
// If we are stress testing VPlan builds, do not attempt to generate vector
// code. Masked vector code generation support will follow soon.
- if (VPlanBuildStressTest || EnableVPlanPredication)
+ // Also, do not attempt to vectorize if no vector code will be produced.
+ if (VPlanBuildStressTest || EnableVPlanPredication ||
+ VectorizationFactor::Disabled() == VF)
return false;
LVP.setBestPlan(VF.Width, 1);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1, LVL,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
&CM);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
OpenPOWER on IntegriCloud