diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-01-04 17:48:25 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-01-04 17:48:25 +0000 |
commit | e1d5c4b8b9cc0070d0793b7865f6d3125eecdd94 (patch) | |
tree | d4f057408319f5efe879901916e439da1b6ca452 /llvm/test/Transforms | |
parent | c616a5408a0dbcc19ed19cc39d454bf7c2276d55 (diff) | |
download | bcm5719-llvm-e1d5c4b8b9cc0070d0793b7865f6d3125eecdd94.tar.gz bcm5719-llvm-e1d5c4b8b9cc0070d0793b7865f6d3125eecdd94.zip |
LoopVectorizer:
1. Add code to estimate register pressure.
2. Add code to select the unroll factor based on register pressure.
3. Add bits to TargetTransformInfo to provide the number of registers.
llvm-svn: 171469
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll | 21 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/gcc-examples.ll | 39 |
2 files changed, 58 insertions, 2 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll index 156e7454a4c..0f21ba678c3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -licm -S | FileCheck %s -check-prefix=UNROLL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -13,6 +14,15 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void + +;UNROLL: @example1 +;UNROLL: load <4 x i32> +;UNROLL: load <4 x i32> +;UNROLL: add nsw <4 x i32> +;UNROLL: add nsw <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: ret void define void @example1() nounwind uwtable ssp { br label %1 @@ -34,13 +44,20 @@ define void @example1() nounwind uwtable ssp { ret void } - -; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. +; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. ;CHECK: @example10b ;CHECK: load <4 x i16> ;CHECK: sext <4 x i16> ;CHECK: store <4 x i32> ;CHECK: ret void +;UNROLL: @example10b +;UNROLL: load <4 x i16> +;UNROLL: load <4 x i16> +;UNROLL: load <4 x i16> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: ret void define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp { br label %1 diff --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll index b0f5a80d1e6..652c2a0d025 100644 --- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -licm -S | FileCheck %s -check-prefix=UNROLL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -24,6 +25,20 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void +;UNROLL: @example1 +;UNROLL: load <4 x i32> +;UNROLL: load <4 x i32> +;UNROLL: load <4 x i32> +;UNROLL: load <4 x i32> +;UNROLL: add nsw <4 x i32> +;UNROLL: add nsw <4 x i32> +;UNROLL: add nsw <4 x i32> +;UNROLL: add nsw <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: ret void define void @example1() nounwind uwtable ssp { br label %1 @@ -48,6 +63,12 @@ define void @example1() nounwind uwtable ssp { ;CHECK: @example2 ;CHECK: store <4 x i32> ;CHECK: ret void +;UNROLL: @example2 +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: ret void define void @example2(i32 %n, i32 %x) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph5, label %.preheader @@ -92,6 +113,12 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp { ;CHECK: @example3 ;CHECK: <4 x i32> ;CHECK: ret void +;UNROLL: @example3 +;UNROLL: <4 x i32> +;UNROLL: <4 x i32> +;UNROLL: <4 x i32> +;UNROLL: <4 x i32> +;UNROLL: ret void define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp { %1 = icmp eq i32 %n, 0 br i1 %1, label %._crit_edge, label %.lr.ph @@ -115,6 +142,12 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture ;CHECK: @example4 ;CHECK: load <4 x i32> ;CHECK: ret void +;UNROLL: @example4 +;UNROLL: load <4 x i32> +;UNROLL: load <4 x i32> +;UNROLL: load <4 x i32> +;UNROLL: load <4 x i32> +;UNROLL: ret void define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp { %1 = add nsw i32 %n, -1 %2 = icmp eq i32 %n, 0 @@ -175,6 +208,12 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture ;CHECK: @example8 ;CHECK: store <4 x i32> ;CHECK: ret void +;UNROLL: @example8 +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: store <4 x i32> +;UNROLL: ret void define void @example8(i32 %x) nounwind uwtable ssp { br label %.preheader |