summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
authorMircea Trofin <mtrofin@google.com>2018-02-07 23:29:52 +0000
committerMircea Trofin <mtrofin@google.com>2018-02-07 23:29:52 +0000
commit06ac8cfbd103b2a024d6c1c01ae8912ebce523a0 (patch)
tree3eb50f8e1067ec2f2c10261508820f598a0ea5ca /llvm/lib/Transforms/Vectorize
parent8e6107a0e49940646d08e5d703d2d128000bcdea (diff)
downloadbcm5719-llvm-06ac8cfbd103b2a024d6c1c01ae8912ebce523a0.tar.gz
bcm5719-llvm-06ac8cfbd103b2a024d6c1c01ae8912ebce523a0.zip
Verify profile data confirms large loop trip counts.
Summary: Loops with inequality comparers, such as: // unsigned bound for (unsigned i = 1; i < bound; ++i) {...} have getSmallConstantMaxTripCount report a large maximum static trip count - in this case, 0xffff fffe. However, profiling info may show that the trip count is much smaller, and thus counter-recommend vectorization. This change: - flips loop-vectorize-with-block-frequency on by default. - validates profiled loop frequency data supports vectorization, when static info appears to not counter-recommend it. Absence of profile data means we rely on static data, just as we've done so far. Reviewers: twoh, mkuper, davidxl, tejohnson, Ayal Reviewed By: davidxl Subscribers: bkramer, llvm-commits Differential Revision: https://reviews.llvm.org/D42946 llvm-svn: 324543
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp24
1 files changed, 20 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f5ec20fda85..973617cc0c7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -208,7 +208,7 @@ static cl::opt<unsigned> SmallLoopCost(
"The cost of a loop that is considered 'small' by the interleaver."));
static cl::opt<bool> LoopVectorizeWithBlockFrequency(
- "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,
+ "loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden,
cl::desc("Enable the use of the block frequency analysis to access PGO "
"heuristics minimizing code growth in cold regions and being more "
"aggressive in hot regions."));
@@ -8347,9 +8347,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
// count by optimizing for size, to minimize overheads.
- unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);
- bool HasExpectedTC = (ExpectedTC > 0);
-
+ // Prefer constant trip counts over profile data, over upper bound estimate.
+ unsigned ExpectedTC = 0;
+ bool HasExpectedTC = false;
+ if (const SCEVConstant *ConstExits =
+ dyn_cast<SCEVConstant>(SE->getBackedgeTakenCount(L))) {
+ const APInt &ExitsCount = ConstExits->getAPInt();
+ // We are interested in small values for ExpectedTC. Skip over those that
+ // can't fit an unsigned.
+ if (ExitsCount.ult(std::numeric_limits<unsigned>::max())) {
+ ExpectedTC = static_cast<unsigned>(ExitsCount.getZExtValue()) + 1;
+ HasExpectedTC = true;
+ }
+ }
+ // ExpectedTC may be large because it's bound by a variable. Check
+ // profiling information to validate we should vectorize.
if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {
auto EstimatedTC = getLoopEstimatedTripCount(L);
if (EstimatedTC) {
@@ -8357,6 +8369,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
HasExpectedTC = true;
}
}
+ if (!HasExpectedTC) {
+ ExpectedTC = SE->getSmallConstantMaxTripCount(L);
+ HasExpectedTC = (ExpectedTC > 0);
+ }
if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
OpenPOWER on IntegriCloud