diff options
author | Ayal Zaks <ayal.zaks@intel.com> | 2017-06-30 08:02:35 +0000 |
---|---|---|
committer | Ayal Zaks <ayal.zaks@intel.com> | 2017-06-30 08:02:35 +0000 |
commit | 8d26f0a602f8f21f99d38d12fd0f2fb21da1409f (patch) | |
tree | 3b7d574d9c900ee3620820778230ae72dce516de /llvm/test/Transforms/LoopVectorize/small-loop.ll | |
parent | cc78ea6985707d8be08be362d859aeb4044b39c2 (diff) | |
download | bcm5719-llvm-8d26f0a602f8f21f99d38d12fd0f2fb21da1409f.tar.gz bcm5719-llvm-8d26f0a602f8f21f99d38d12fd0f2fb21da1409f.zip |
[LV] Optimize for size when vectorizing loops with tiny trip count
It may be detrimental to vectorize loops with very small trip count, as various
costs of the vectorized loop body as well as enclosing overheads including
runtime tests and scalar iterations may outweigh the gains of vectorizing. The
current cost model measures the cost of the vectorized loop body only, expecting
it will amortize other costs, and loops with known or expected very small trip
counts are not vectorized at all. This patch allows loops with very small trip
counts to be vectorized, but under OptForSize constraints, which ensure the cost
of the loop body is dominant, having no runtime guards nor scalar iterations.
Patch inspired by D32451.
Differential Revision: https://reviews.llvm.org/D34373
llvm-svn: 306803
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize/small-loop.ll')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/small-loop.ll | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/small-loop.ll b/llvm/test/Transforms/LoopVectorize/small-loop.ll index 9a5dc4aa1b7..378283b464b 100644 --- a/llvm/test/Transforms/LoopVectorize/small-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/small-loop.ll @@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 @c = common global [2048 x i32] zeroinitializer, align 16 ;CHECK-LABEL: @example1( -;CHECK-NOT: load <4 x i32> +;CHECK: load <4 x i32> ;CHECK: ret void define void @example1() nounwind uwtable ssp { br label %1 @@ -23,8 +23,8 @@ define void @example1() nounwind uwtable ssp { store i32 %6, i32* %7, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count. - br i1 %exitcond, label %8, label %1 + %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count + br i1 %exitcond, label %8, label %1 ; w/o scalar iteration overhead. ; <label>:8 ; preds = %1 ret void |