summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-01-04 17:48:25 +0000
committerNadav Rotem <nrotem@apple.com>2013-01-04 17:48:25 +0000
commite1d5c4b8b9cc0070d0793b7865f6d3125eecdd94 (patch)
treed4f057408319f5efe879901916e439da1b6ca452 /llvm/test
parentc616a5408a0dbcc19ed19cc39d454bf7c2276d55 (diff)
downloadbcm5719-llvm-e1d5c4b8b9cc0070d0793b7865f6d3125eecdd94.tar.gz
bcm5719-llvm-e1d5c4b8b9cc0070d0793b7865f6d3125eecdd94.zip
LoopVectorizer:
1. Add code to estimate register pressure. 2. Add code to select the unroll factor based on register pressure. 3. Add bits to TargetTransformInfo to provide the number of registers. llvm-svn: 171469
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll21
-rw-r--r--llvm/test/Transforms/LoopVectorize/gcc-examples.ll39
2 files changed, 58 insertions, 2 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index 156e7454a4c..0f21ba678c3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -licm -S | FileCheck %s -check-prefix=UNROLL
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -13,6 +14,15 @@ target triple = "x86_64-apple-macosx10.8.0"
;CHECK: add nsw <4 x i32>
;CHECK: store <4 x i32>
;CHECK: ret void
+
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example1() nounwind uwtable ssp {
br label %1
@@ -34,13 +44,20 @@ define void @example1() nounwind uwtable ssp {
ret void
}
-
-; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive.
+; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive.
;CHECK: @example10b
;CHECK: load <4 x i16>
;CHECK: sext <4 x i16>
;CHECK: store <4 x i32>
;CHECK: ret void
+;UNROLL: @example10b
+;UNROLL: load <4 x i16>
+;UNROLL: load <4 x i16>
+;UNROLL: load <4 x i16>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
br label %1
diff --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
index b0f5a80d1e6..652c2a0d025 100644
--- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -licm -S | FileCheck %s -check-prefix=UNROLL
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -24,6 +25,20 @@ target triple = "x86_64-apple-macosx10.8.0"
;CHECK: add nsw <4 x i32>
;CHECK: store <4 x i32>
;CHECK: ret void
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example1() nounwind uwtable ssp {
br label %1
@@ -48,6 +63,12 @@ define void @example1() nounwind uwtable ssp {
;CHECK: @example2
;CHECK: store <4 x i32>
;CHECK: ret void
+;UNROLL: @example2
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph5, label %.preheader
@@ -92,6 +113,12 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
;CHECK: @example3
;CHECK: <4 x i32>
;CHECK: ret void
+;UNROLL: @example3
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: ret void
define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
%1 = icmp eq i32 %n, 0
br i1 %1, label %._crit_edge, label %.lr.ph
@@ -115,6 +142,12 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
;CHECK: @example4
;CHECK: load <4 x i32>
;CHECK: ret void
+;UNROLL: @example4
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: ret void
define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
%1 = add nsw i32 %n, -1
%2 = icmp eq i32 %n, 0
@@ -175,6 +208,12 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
;CHECK: @example8
;CHECK: store <4 x i32>
;CHECK: ret void
+;UNROLL: @example8
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
define void @example8(i32 %x) nounwind uwtable ssp {
br label %.preheader
OpenPOWER on IntegriCloud