diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-24 13:45:30 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-24 13:45:30 +0000 |
| commit | 7eedee938f839680aad4fcb831bd0c77b7025f76 (patch) | |
| tree | b935b046c2b3b81100a13f4a307d9b58664b38bc | |
| parent | 1de59c5d92b96eda920394bfa0799cd2800201fa (diff) | |
| download | bcm5719-llvm-7eedee938f839680aad4fcb831bd0c77b7025f76.tar.gz bcm5719-llvm-7eedee938f839680aad4fcb831bd0c77b7025f76.zip | |
[X86][SSE] Demonstrate issue with decoding shuffle masks that have been lowered as rematerialized constants on scalar unit
Found whilst investigating PR27472
llvm-svn: 267339
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll | 15 |
2 files changed, 37 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index f66f0d4dc0a..cee102cb6c8 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -9,6 +9,28 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) +define <16 x i8> @combine_vpshufb_zero(<16 x i8> %a0) { +; SSE-LABEL: combine_vpshufb_zero: +; SSE: # BB#0: +; SSE-NEXT: movl $128, %eax +; SSE-NEXT: movd %eax, %xmm1 +; SSE-NEXT: pshufb %xmm1, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE-NEXT: retq +; +; AVX-LABEL: combine_vpshufb_zero: +; AVX: # BB#0: +; AVX-NEXT: movl $128, %eax +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX-NEXT: retq + %res0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %res1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res0, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %res2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %res1, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) + ret <16 x i8> %res2 +} + define <4 x float> @combine_pshufb_movddup(<4 x float> %a0) { ; SSE-LABEL: combine_pshufb_movddup: ; SSE: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index d0f14675a14..a6376dc48cf 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -20,6 +20,21 @@ define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res1 } +define <16 x i8> @combine_vpperm_zero(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: combine_vpperm_zero: +; CHECK: # BB#0: +; CHECK-NEXT: movl $128, %eax +; CHECK-NEXT: vmovd %eax, %xmm2 +; CHECK-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0] +; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: retq + %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %res2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res1, <16 x i8> undef, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) + ret <16 x i8> %res2 +} + define <16 x i8> @combine_vpperm_identity_bitcast(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: combine_vpperm_identity_bitcast: ; CHECK: # BB#0: |

