diff options
author | James Molloy <james.molloy@arm.com> | 2015-09-09 12:51:06 +0000 |
---|---|---|
committer | James Molloy <james.molloy@arm.com> | 2015-09-09 12:51:06 +0000 |
commit | 89eccee4db2601d720c7dabf89cb86f916d12e9c (patch) | |
tree | fa5c7231902c9857ef51d2f31d711b9b34479225 /llvm/test/Transforms | |
parent | 3834d2ca087cd80a106127f4dc4b5ed90b2084dd (diff) | |
download | bcm5719-llvm-89eccee4db2601d720c7dabf89cb86f916d12e9c.tar.gz bcm5719-llvm-89eccee4db2601d720c7dabf89cb86f916d12e9c.zip |
Delay predication of stores until near the end of vector code generation
Predicating stores requires creating extra blocks. It's much cleaner if we do this in one pass instead of mutating the CFG while writing vector instructions.
Besides which we can make use of helper functions to update domtree for us, reducing the work we need to do.
llvm-svn: 247139
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/if-pred-stores.ll | 43 |
1 files changed, 34 insertions, 9 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 991d027ada5..0d70f557f83 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -1,5 +1,8 @@ -; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL -; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg < %s | FileCheck %s --check-prefix=VEC +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC + target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" @@ -14,27 +17,49 @@ entry: ; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true> ; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0 ; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true +; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0 +; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0 ; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]] ; ; VEC: [[cond]]: -; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0 -; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0 ; VEC: store i32 %[[v13]], i32* %[[v14]], align 4 ; VEC: br label %[[else:.+]] ; ; VEC: [[else]]: ; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1 ; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true +; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1 +; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1 ; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]] ; ; VEC: [[cond2]]: -; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1 -; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1 ; VEC: store i32 %[[v17]], i32* %[[v18]], align 4 ; VEC: br label %[[else2:.+]] ; ; VEC: [[else2]]: +; VEC-IC-LABEL: test +; VEC-IC: %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100> +; VEC-IC: %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20> +; VEC-IC: %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0 +; VEC-IC: br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]] +; +; VEC-IC: [[cond]]: +; VEC-IC: %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0 +; VEC-IC: store i32 %[[v4]], i32* %{{.*}}, align 4 +; VEC-IC: br label %[[else:.+]] +; +; VEC-IC: [[else]]: +; VEC-IC: %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1 +; VEC-IC: br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]] +; +; VEC-IC: [[cond2]]: +; VEC-IC: %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1 +; VEC-IC: store i32 %[[v6]], i32* %{{.*}}, align 4 +; VEC-IC: br label %[[else2:.+]] +; +; VEC-IC: [[else2]]: + ; UNROLL-LABEL: test ; UNROLL: vector.body: ; UNROLL: %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0 @@ -90,9 +115,9 @@ for.end: ; vectorized loop body. ; PR18724 -; UNROLL-LABEL: bug18724 -; UNROLL: store i32 -; UNROLL: store i32 +; UNROLL-NOSIMPLIFY-LABEL: bug18724 +; UNROLL-NOSIMPLIFY: store i32 +; UNROLL-NOSIMPLIFY: store i32 define void @bug18724() { entry: |