summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-01-10 19:45:33 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-01-10 19:45:33 +0000
commit94a4cc027ab111f6055c81605b3a47caae46cbe6 (patch)
tree74d042f49c41355e385be63621f7e5df8d2e771a /llvm/test
parent9be98b6bef38904c95c1508aee3e901620681388 (diff)
downloadbcm5719-llvm-94a4cc027ab111f6055c81605b3a47caae46cbe6.tar.gz
bcm5719-llvm-94a4cc027ab111f6055c81605b3a47caae46cbe6.zip
[X86][SSE] Improved (v)insertps shuffle matching
In the current code we only attempt to match against insertps if we have exactly one element from the second input vector, irrespective of how much of the shuffle result is zeroable. This patch checks to see if there is a single non-zeroable element from either input that requires insertion. It also supports matching of cases where only one of the inputs need to be referenced. We also split insertps shuffle matching off into a new lowerVectorShuffleAsInsertPS function. Differential Revision: http://reviews.llvm.org/D6879 llvm-svn: 225589
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/combine-or.ll10
-rw-r--r--llvm/test/CodeGen/X86/masked_memop.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining.ll28
3 files changed, 29 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index c70067f389a..280fcbc7a3a 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -240,12 +240,10 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test19:
; CHECK: # BB#0:
; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: xorps %xmm3, %xmm3
-; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm0[0,3]
-; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[0,0]
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,2]
-; CHECK-NEXT: orps %xmm3, %xmm2
+; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,3]
+; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; CHECK-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2,2]
+; CHECK-NEXT: orps %xmm1, %xmm2
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index cce2d909120..726d7125a63 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -71,7 +71,7 @@ define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float
; AVX2-LABEL: test5
; AVX2: vmaskmovpd
; AVX2: vblendvpd
-; AVX2: vmaskmovpd
+; AVX2: vmaskmovpd
; AVX2: vblendvpd
define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double> %dst) {
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
@@ -150,7 +150,7 @@ define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val)
}
; AVX2-LABEL: test14
-; AVX2: vshufps $-24
+; AVX2: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX2: vmaskmovps
define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
@@ -194,7 +194,7 @@ define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
}
-declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
+declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
declare void @llvm.masked.store.v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
@@ -202,8 +202,8 @@ declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
+declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
+declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 897a69a5496..4e2bf87fdf6 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -553,18 +553,30 @@ define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i3
}
define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; SSE-LABEL: combine_bitwise_ops_test3c:
-; SSE: # BB#0:
-; SSE-NEXT: xorps %xmm1, %xmm0
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_bitwise_ops_test3c:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm0
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test3c:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm0
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test3c:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm0
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; SSE41-NEXT: retq
;
; AVX-LABEL: combine_bitwise_ops_test3c:
; AVX: # BB#0:
; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
OpenPOWER on IntegriCloud