diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-01-07 21:54:51 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-01-07 21:54:51 +0000 |
commit | c60d7d96f5c8f7200fca9dc2193fa115f51d8561 (patch) | |
tree | 2d02065a4061fde19dc3777432c3fca2ec63f4de /llvm/lib | |
parent | 802b62871eaff81980aeaafebd639319cc93c051 (diff) | |
download | bcm5719-llvm-c60d7d96f5c8f7200fca9dc2193fa115f51d8561.tar.gz bcm5719-llvm-c60d7d96f5c8f7200fca9dc2193fa115f51d8561.zip |
LoopVectorizer: When we vectorizer and widen loops we process many elements at once. This is a good thing, except for
small loops. On small loops post-loop that handles scalars (and runs slower) can take more time to execute than the
rest of the loop. This patch disables widening of loops with a small static trip count.
llvm-svn: 171798
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2c1af1d8d23..b266d9dc09a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -101,7 +101,13 @@ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); /// We don't vectorize loops with a known constant trip count below this number. -static const unsigned TinyTripCountThreshold = 16; +static const unsigned TinyTripCountVectorThreshold = 16; + +/// We don't unroll loops with a known constant trip count below this number. +static const unsigned TinyTripCountUnrollThreshold = 128; + +/// We don't unroll loops that are larget than this threshold. +static const unsigned MaxLoopSizeThreshold = 32; /// When performing a runtime memory check, do not check more than this /// number of pointers. Notice that the check is quadratic! @@ -2016,7 +2022,7 @@ bool LoopVectorizationLegality::canVectorize() { // Do not loop-vectorize loops with a tiny trip count. unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch); - if (TC > 0u && TC < TinyTripCountThreshold) { + if (TC > 0u && TC < TinyTripCountVectorThreshold) { DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is not worth vectorizing.\n"); return false; @@ -2678,6 +2684,12 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, if (OptForSize) return 1; + // Do not unroll loops with a relatively small trip count. + unsigned TC = SE->getSmallConstantTripCount(TheLoop, + TheLoop->getLoopLatch()); + if (TC > 1 && TC < TinyTripCountUnrollThreshold) + return 1; + unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(true); DEBUG(dbgs() << "LV: The target has " << TargetVectorRegisters << " vector registers\n"); @@ -2698,7 +2710,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, // We don't want to unroll the loops to the point where they do not fit into // the decoded cache. Assume that we only allow 32 IR instructions. - UF = std::min(UF, (32 / R.NumInstructions)); + UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions)); // Clamp the unroll factor ranges to reasonable factors. if (UF > MaxUnrollSize) |