summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2014-10-15 17:35:01 +0000
committerHal Finkel <hfinkel@anl.gov>2014-10-15 17:35:01 +0000
commit3b7fc86677aa17fa00ada1db6e33e6e81948b3dc (patch)
tree40c03e4494bab4917241a6c4985335d53e534653 /llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
parent8683d2b0d2deb319d15438d052fe9b6a07f8e65a (diff)
downloadbcm5719-llvm-3b7fc86677aa17fa00ada1db6e33e6e81948b3dc.tar.gz
bcm5719-llvm-3b7fc86677aa17fa00ada1db6e33e6e81948b3dc.zip
[SLPVectorize] Basic ephemeral-value awareness
The SLP vectorizer should not vectorize ephemeral values. These are used to express information to the optimizer, and vectorizing them does not lead to faster code (because the ephemeral values are dropped prior to code generation, vectorized or not), and obscures the information the instructions are attempting to communicate (the logic that interprets the arguments to @llvm.assume generically does not understand vectorized conditions). Also, uses by ephemeral values are free (because they, and the necessary extractelement instructions, will be dropped prior to code generation). llvm-svn: 219816
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll43
1 files changed, 43 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 9eda29f101a..0221613b970 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -35,6 +35,49 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
ret <4 x float> %rd
}
+declare void @llvm.assume(i1) nounwind
+
+; This entire tree is ephemeral, don't vectorize any of it.
+define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_eph(
+; CHECK-NOT: icmp ne <4 x i32>
+; CHECK-NOT: select <4 x i1>
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %a0 = extractelement <4 x float> %a, i32 0
+ %a1 = extractelement <4 x float> %a, i32 1
+ %a2 = extractelement <4 x float> %a, i32 2
+ %a3 = extractelement <4 x float> %a, i32 3
+ %b0 = extractelement <4 x float> %b, i32 0
+ %b1 = extractelement <4 x float> %b, i32 1
+ %b2 = extractelement <4 x float> %b, i32 2
+ %b3 = extractelement <4 x float> %b, i32 3
+ %cmp0 = icmp ne i32 %c0, 0
+ %cmp1 = icmp ne i32 %c1, 0
+ %cmp2 = icmp ne i32 %c2, 0
+ %cmp3 = icmp ne i32 %c3, 0
+ %s0 = select i1 %cmp0, float %a0, float %b0
+ %s1 = select i1 %cmp1, float %a1, float %b1
+ %s2 = select i1 %cmp2, float %a2, float %b2
+ %s3 = select i1 %cmp3, float %a3, float %b3
+ %ra = insertelement <4 x float> undef, float %s0, i32 0
+ %rb = insertelement <4 x float> %ra, float %s1, i32 1
+ %rc = insertelement <4 x float> %rb, float %s2, i32 2
+ %rd = insertelement <4 x float> %rc, float %s3, i32 3
+ %q0 = extractelement <4 x float> %rd, i32 0
+ %q1 = extractelement <4 x float> %rd, i32 1
+ %q2 = extractelement <4 x float> %rd, i32 2
+ %q3 = extractelement <4 x float> %rd, i32 3
+ %q4 = fadd float %q0, %q1
+ %q5 = fadd float %q2, %q3
+ %q6 = fadd float %q4, %q5
+ %qi = fcmp olt float %q6, %q5
+ call void @llvm.assume(i1 %qi)
+ ret <4 x float> undef
+}
+
; Insert in an order different from the vector indices to make sure it
; doesn't matter
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
OpenPOWER on IntegriCloud