summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp24
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll2
2 files changed, 13 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e6c481f81ee..44623dd0ebc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -200,9 +200,10 @@ static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));
-/// We don't interleave loops with a known constant trip count below this
-/// number.
-static const unsigned TinyTripCountInterleaveThreshold = 128;
+static cl::opt<unsigned> TinyTripCountInterleaveThreshold(
+ "tiny-trip-count-interleave-threshold", cl::init(128), cl::Hidden,
+ cl::desc("We don't interleave loops with a estimated constant trip count "
+ "below this number"));
static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
@@ -5143,9 +5144,10 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
if (Legal->getMaxSafeDepDistBytes() != -1U)
return 1;
- // Do not interleave loops with a relatively small trip count.
- unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
+ // Do not interleave loops with a relatively small known or estimated trip
+ // count.
+ auto BestKnownTC = getSmallBestKnownTC(*PSE.getSE(), TheLoop);
+ if (BestKnownTC && *BestKnownTC < TinyTripCountInterleaveThreshold)
return 1;
RegisterUsage R = calculateRegisterUsage({VF})[0];
@@ -5208,12 +5210,10 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor;
}
- // If the trip count is constant, limit the interleave count to be less than
- // the trip count divided by VF.
- if (TC > 0) {
- assert(TC >= VF && "VF exceeds trip count?");
- if ((TC / VF) < MaxInterleaveCount)
- MaxInterleaveCount = (TC / VF);
+ // If trip count is known or estimated compile time constant, limit the
+ // interleave count to be less than the trip count divided by VF.
+ if (BestKnownTC) {
+ MaxInterleaveCount = std::min(*BestKnownTC / VF, MaxInterleaveCount);
}
// If we did not calculate the cost for VF (because the user selected the VF)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
index fbc4ab2aa86..f593b9423e3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
@@ -3,7 +3,7 @@
; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations (hotness: 300)
; CHECK: remark: no_fpmath.c:6:14: loop not vectorized
-; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2) (hotness: 300)
+; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 1) (hotness: 300)
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
OpenPOWER on IntegriCloud