diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-10-01 17:30:44 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-10-01 17:30:44 +0000 |
| commit | d25c200cd69b11381a8e5879df234387fb55a95e (patch) | |
| tree | 72d91d8fee6546fe1da7c82e67df5f71233627da /llvm/test | |
| parent | c7076a3ba948dcb276836c00d94378e76bd065e3 (diff) | |
| download | bcm5719-llvm-d25c200cd69b11381a8e5879df234387fb55a95e.tar.gz bcm5719-llvm-d25c200cd69b11381a8e5879df234387fb55a95e.zip | |
[X86][SSE] Add shuffle combining tests with PACKSS/PACKUS
llvm-svn: 314628
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll | 90 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll | 66 |
2 files changed, 156 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 671d191b5dd..0e3c4e402a8 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -804,6 +804,96 @@ define <32 x i8> @combine_unpack_unpack_pshufb(<32 x i8> %a0) { ret <32 x i8> %6 } +define <16 x i16> @shuffle_combine_packssdw_pshufb(<8 x i32> %a0) { +; X32-LABEL: shuffle_combine_packssdw_pshufb: +; X32: # BB#0: +; X32-NEXT: vpsrad $31, %ymm0, %ymm0 +; X32-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17] +; X32-NEXT: retl +; +; X64-LABEL: shuffle_combine_packssdw_pshufb: +; X64: # BB#0: +; X64-NEXT: vpsrad $31, %ymm0, %ymm0 +; X64-NEXT: vpackssdw %ymm0, %ymm0, %ymm0 +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17] +; X64-NEXT: retq + %1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> + %2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %1) + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8> + ret <16 x i16> %3 +} +declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone + +define <32 x i8> @shuffle_combine_packsswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) { +; X32-LABEL: shuffle_combine_packsswb_pshufb: +; X32: # BB#0: +; X32-NEXT: vpsraw $15, %ymm0, %ymm0 +; X32-NEXT: vpsraw $15, %ymm1, %ymm1 +; X32-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16] +; X32-NEXT: retl +; +; X64-LABEL: shuffle_combine_packsswb_pshufb: +; X64: # BB#0: +; X64-NEXT: vpsraw $15, %ymm0, %ymm0 +; X64-NEXT: vpsraw $15, %ymm1, %ymm1 +; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16] +; X64-NEXT: retq + %1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> + %2 = ashr <16 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> + %3 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) + %4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) + ret <32 x i8> %4 +} +declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone + +define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) { +; X32-LABEL: shuffle_combine_packusdw_pshufb: +; X32: # BB#0: +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0 +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17] +; X32-NEXT: retl +; +; X64-LABEL: shuffle_combine_packusdw_pshufb: +; X64: # BB#0: +; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0 +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17] +; X64-NEXT: retq + %1 = and <8 x i32> %a0, <i32 255, i32 65535, i32 255, i32 65535, i32 255, i32 255, i32 255, i32 65535> + %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1) + %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8> + ret <16 x i16> %3 +} +declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone + +define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) { +; X32-LABEL: shuffle_combine_packuswb_pshufb: +; X32: # BB#0: +; X32-NEXT: vpsrlw $8, %ymm0, %ymm0 +; X32-NEXT: vpsrlw $8, %ymm1, %ymm1 +; X32-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16] +; X32-NEXT: retl +; +; X64-LABEL: shuffle_combine_packuswb_pshufb: +; X64: # BB#0: +; X64-NEXT: vpsrlw $8, %ymm0, %ymm0 +; X64-NEXT: vpsrlw $8, %ymm1, %ymm1 +; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16] +; X64-NEXT: retq + %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %3 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) + %4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) + ret <32 x i8> %4 +} +declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone + define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) { ; X32-LABEL: combine_pshufb_insertion_as_broadcast_v2i64: ; X32: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index a11cac0b9bd..6a88bf010e7 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -640,6 +640,72 @@ define <8 x i16> @shuffle_combine_unpack_insert(<8 x i16> %a0) { ret <8 x i16> %8 } +define <16 x i8> @shuffle_combine_packssdw_pshufb(<4 x i32> %a0) { +; SSE-LABEL: shuffle_combine_packssdw_pshufb: +; SSE: # BB#0: +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_combine_packssdw_pshufb: +; AVX: # BB#0: +; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; AVX-NEXT: retq + %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> + %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %1) + %3 = bitcast <8 x i16> %2 to <16 x i8> + %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>) + ret <16 x i8> %4 +} +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone + +define <16 x i8> @shuffle_combine_packsswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: shuffle_combine_packsswb_pshufb: +; SSE: # BB#0: +; SSE-NEXT: psraw $15, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_combine_packsswb_pshufb: +; AVX: # BB#0: +; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0] +; AVX-NEXT: retq + %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> + %2 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> + %3 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) + %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) + ret <16 x i8> %4 +} +declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone + +define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: shuffle_combine_packuswb_pshufb: +; SSE: # BB#0: +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: packuswb %xmm1, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0] +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_combine_packuswb_pshufb: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0] +; AVX-NEXT: retq + %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %3 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) + %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) + ret <16 x i8> %4 +} +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone + define <16 x i8> @constant_fold_pshufb() { ; SSE-LABEL: constant_fold_pshufb: ; SSE: # BB#0: |

