summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVectorize
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2015-09-09 12:51:06 +0000
committerJames Molloy <james.molloy@arm.com>2015-09-09 12:51:06 +0000
commit89eccee4db2601d720c7dabf89cb86f916d12e9c (patch)
treefa5c7231902c9857ef51d2f31d711b9b34479225 /llvm/test/Transforms/LoopVectorize
parent3834d2ca087cd80a106127f4dc4b5ed90b2084dd (diff)
downloadbcm5719-llvm-89eccee4db2601d720c7dabf89cb86f916d12e9c.tar.gz
bcm5719-llvm-89eccee4db2601d720c7dabf89cb86f916d12e9c.zip
Delay predication of stores until near the end of vector code generation
Predicating stores requires creating extra blocks. It's much cleaner if we do this in one pass instead of mutating the CFG while writing vector instructions. Besides which we can make use of helper functions to update domtree for us, reducing the work we need to do. llvm-svn: 247139
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
-rw-r--r--llvm/test/Transforms/LoopVectorize/if-pred-stores.ll43
1 files changed, 34 insertions, 9 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 991d027ada5..0d70f557f83 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,5 +1,8 @@
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC
+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -14,27 +17,49 @@ entry:
; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true
+; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
+; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
;
; VEC: [[cond]]:
-; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
-; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: store i32 %[[v13]], i32* %[[v14]], align 4
; VEC: br label %[[else:.+]]
;
; VEC: [[else]]:
; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true
+; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
+; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
;
; VEC: [[cond2]]:
-; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
-; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: store i32 %[[v17]], i32* %[[v18]], align 4
; VEC: br label %[[else2:.+]]
;
; VEC: [[else2]]:
+; VEC-IC-LABEL: test
+; VEC-IC: %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
+; VEC-IC: %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
+; VEC-IC: %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0
+; VEC-IC: br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]]
+;
+; VEC-IC: [[cond]]:
+; VEC-IC: %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0
+; VEC-IC: store i32 %[[v4]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else:.+]]
+;
+; VEC-IC: [[else]]:
+; VEC-IC: %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1
+; VEC-IC: br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]]
+;
+; VEC-IC: [[cond2]]:
+; VEC-IC: %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1
+; VEC-IC: store i32 %[[v6]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else2:.+]]
+;
+; VEC-IC: [[else2]]:
+
; UNROLL-LABEL: test
; UNROLL: vector.body:
; UNROLL: %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
@@ -90,9 +115,9 @@ for.end:
; vectorized loop body.
; PR18724
-; UNROLL-LABEL: bug18724
-; UNROLL: store i32
-; UNROLL: store i32
+; UNROLL-NOSIMPLIFY-LABEL: bug18724
+; UNROLL-NOSIMPLIFY: store i32
+; UNROLL-NOSIMPLIFY: store i32
define void @bug18724() {
entry:
OpenPOWER on IntegriCloud