summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-01 17:30:44 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-01 17:30:44 +0000
commitd25c200cd69b11381a8e5879df234387fb55a95e (patch)
tree72d91d8fee6546fe1da7c82e67df5f71233627da /llvm/test
parentc7076a3ba948dcb276836c00d94378e76bd065e3 (diff)
downloadbcm5719-llvm-d25c200cd69b11381a8e5879df234387fb55a95e.tar.gz
bcm5719-llvm-d25c200cd69b11381a8e5879df234387fb55a95e.zip
[X86][SSE] Add shuffle combining tests with PACKSS/PACKUS
llvm-svn: 314628
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll90
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll66
2 files changed, 156 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 671d191b5dd..0e3c4e402a8 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -804,6 +804,96 @@ define <32 x i8> @combine_unpack_unpack_pshufb(<32 x i8> %a0) {
ret <32 x i8> %6
}
+define <16 x i16> @shuffle_combine_packssdw_pshufb(<8 x i32> %a0) {
+; X32-LABEL: shuffle_combine_packssdw_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpsrad $31, %ymm0, %ymm0
+; X32-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packssdw_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpsrad $31, %ymm0, %ymm0
+; X64-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X64-NEXT: retq
+ %1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %1)
+ %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8>
+ ret <16 x i16> %3
+}
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <32 x i8> @shuffle_combine_packsswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
+; X32-LABEL: shuffle_combine_packsswb_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpsraw $15, %ymm0, %ymm0
+; X32-NEXT: vpsraw $15, %ymm1, %ymm1
+; X32-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packsswb_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpsraw $15, %ymm0, %ymm0
+; X64-NEXT: vpsraw $15, %ymm1, %ymm1
+; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X64-NEXT: retq
+ %1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %2 = ashr <16 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %3 = tail call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
+ %4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <32 x i8> %4
+}
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) {
+; X32-LABEL: shuffle_combine_packusdw_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
+; X32-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packusdw_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X64-NEXT: retq
+ %1 = and <8 x i32> %a0, <i32 255, i32 65535, i32 255, i32 65535, i32 255, i32 255, i32 255, i32 65535>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1)
+ %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0, i32 8, i32 9, i32 10, i32 11, i32 11, i32 10, i32 9, i32 8>
+ ret <16 x i16> %3
+}
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
+; X32-LABEL: shuffle_combine_packuswb_pshufb:
+; X32: # BB#0:
+; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
+; X32-NEXT: vpsrlw $8, %ymm1, %ymm1
+; X32-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X32-NEXT: retl
+;
+; X64-LABEL: shuffle_combine_packuswb_pshufb:
+; X64: # BB#0:
+; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
+; X64-NEXT: vpsrlw $8, %ymm1, %ymm1
+; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X64-NEXT: retq
+ %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %3 = tail call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
+ %4 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %3, <32 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <32 x i8> %4
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
define <16 x i8> @combine_pshufb_insertion_as_broadcast_v2i64(i64 %a0) {
; X32-LABEL: combine_pshufb_insertion_as_broadcast_v2i64:
; X32: # BB#0:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index a11cac0b9bd..6a88bf010e7 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -640,6 +640,72 @@ define <8 x i16> @shuffle_combine_unpack_insert(<8 x i16> %a0) {
ret <8 x i16> %8
}
+define <16 x i8> @shuffle_combine_packssdw_pshufb(<4 x i32> %a0) {
+; SSE-LABEL: shuffle_combine_packssdw_pshufb:
+; SSE: # BB#0:
+; SSE-NEXT: psrad $31, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_combine_packssdw_pshufb:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; AVX-NEXT: retq
+ %1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %1)
+ %3 = bitcast <8 x i16> %2 to <16 x i8>
+ %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>)
+ ret <16 x i8> %4
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @shuffle_combine_packsswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
+; SSE-LABEL: shuffle_combine_packsswb_pshufb:
+; SSE: # BB#0:
+; SSE-NEXT: psraw $15, %xmm0
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_combine_packsswb_pshufb:
+; AVX: # BB#0:
+; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0]
+; AVX-NEXT: retq
+ %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %2 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %3 = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
+ %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <16 x i8> %4
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
+; SSE-LABEL: shuffle_combine_packuswb_pshufb:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_combine_packuswb_pshufb:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %3 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
+ %4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %3, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+ ret <16 x i8> %4
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
define <16 x i8> @constant_fold_pshufb() {
; SSE-LABEL: constant_fold_pshufb:
; SSE: # BB#0:
OpenPOWER on IntegriCloud