diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-01-10 19:45:33 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-01-10 19:45:33 +0000 |
| commit | 94a4cc027ab111f6055c81605b3a47caae46cbe6 (patch) | |
| tree | 74d042f49c41355e385be63621f7e5df8d2e771a /llvm/test | |
| parent | 9be98b6bef38904c95c1508aee3e901620681388 (diff) | |
| download | bcm5719-llvm-94a4cc027ab111f6055c81605b3a47caae46cbe6.tar.gz bcm5719-llvm-94a4cc027ab111f6055c81605b3a47caae46cbe6.zip | |
[X86][SSE] Improved (v)insertps shuffle matching
In the current code we only attempt to match against insertps if we have exactly one element from the second input vector, irrespective of how much of the shuffle result is zeroable.
This patch checks to see if there is a single non-zeroable element from either input that requires insertion. It also supports matching of cases where only one of the inputs need to be referenced.
We also split insertps shuffle matching off into a new lowerVectorShuffleAsInsertPS function.
Differential Revision: http://reviews.llvm.org/D6879
llvm-svn: 225589
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-or.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_memop.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining.ll | 28 |
3 files changed, 29 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index c70067f389a..280fcbc7a3a 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -240,12 +240,10 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test19: ; CHECK: # BB#0: ; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm0[0,3] -; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3] -; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[0,0] -; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,2] -; CHECK-NEXT: orps %xmm3, %xmm2 +; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,3] +; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3] +; CHECK-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2,2] +; CHECK-NEXT: orps %xmm1, %xmm2 ; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3> diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index cce2d909120..726d7125a63 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -71,7 +71,7 @@ define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float ; AVX2-LABEL: test5 ; AVX2: vmaskmovpd ; AVX2: vblendvpd -; AVX2: vmaskmovpd +; AVX2: vmaskmovpd ; AVX2: vblendvpd define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double> %dst) { %mask = icmp eq <8 x i32> %trigger, zeroinitializer @@ -150,7 +150,7 @@ define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val) } ; AVX2-LABEL: test14 -; AVX2: vshufps $-24 +; AVX2: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; AVX2: vmaskmovps define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { %mask = icmp eq <2 x i32> %trigger, zeroinitializer @@ -194,7 +194,7 @@ define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) { } -declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) +declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) declare void @llvm.masked.store.v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) @@ -202,8 +202,8 @@ declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>) declare void @llvm.masked.store.v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>) -declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) -declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>) +declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) +declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>) declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>) declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>) declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 897a69a5496..4e2bf87fdf6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -553,18 +553,30 @@ define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i3 } define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -; SSE-LABEL: combine_bitwise_ops_test3c: -; SSE: # BB#0: -; SSE-NEXT: xorps %xmm1, %xmm0 -; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] -; SSE-NEXT: retq +; SSE2-LABEL: combine_bitwise_ops_test3c: +; SSE2: # BB#0: +; SSE2-NEXT: xorps %xmm1, %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: combine_bitwise_ops_test3c: +; SSSE3: # BB#0: +; SSSE3-NEXT: xorps %xmm1, %xmm0 +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: combine_bitwise_ops_test3c: +; SSE41: # BB#0: +; SSE41-NEXT: xorps %xmm1, %xmm0 +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; SSE41-NEXT: retq ; ; AVX-LABEL: combine_bitwise_ops_test3c: ; AVX: # BB#0: ; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; AVX-NEXT: retq %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7> |

