summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-28 16:25:01 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-28 16:25:01 +0000
commit3f10e669817f174cb99da771e1cd6ecbbfb44fa1 (patch)
tree3944b493e1be9c23aee12fb82b3b5fd81dc18b95 /llvm/test
parent7fcacd8e0e9fd5dc937da96c77b7b2239e022f69 (diff)
downloadbcm5719-llvm-3f10e669817f174cb99da771e1cd6ecbbfb44fa1.tar.gz
bcm5719-llvm-3f10e669817f174cb99da771e1cd6ecbbfb44fa1.zip
[X86][SSE] Added support for combining bit-shifts with shuffles.
Bit-shifts by a whole number of bytes can be represented as a shuffle mask suitable for combining. Added a 'getFauxShuffleMask' function to allow us to create shuffle masks from other suitable operations. llvm-svn: 288040
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll63
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll18
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll18
3 files changed, 41 insertions, 58 deletions
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index de946c04ea0..0cb6c52e90d 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -442,20 +442,18 @@ define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp
;
; SSSE3-LABEL: sext_16i8_to_4i64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: movdqa %xmm2, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
-; SSSE3-NEXT: psrad $24, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSSE3-NEXT: psrld $16, %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: psrad $24, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,2,u,u,u,3,u,u,u],zero,xmm1[u,u,u],zero
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: psrad $24, %xmm1
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_16i8_to_4i64:
@@ -532,34 +530,31 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp
;
; SSSE3-LABEL: sext_16i8_to_8i64:
; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <u,u,u,2,u,u,u,3,u,u,u,255,u,u,u,255>
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: psrad $24, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: psrad $24, %xmm0
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
-; SSSE3-NEXT: psrld $16, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT: pshufb %xmm2, %xmm3
+; SSSE3-NEXT: movdqa %xmm3, %xmm2
; SSSE3-NEXT: psrad $31, %xmm2
-; SSSE3-NEXT: psrad $24, %xmm1
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: psrad $24, %xmm3
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
; SSSE3-NEXT: movdqa %xmm2, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: psrad $24, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSSE3-NEXT: psrld $16, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm3, %xmm4
-; SSSE3-NEXT: psrad $31, %xmm4
-; SSSE3-NEXT: psrad $24, %xmm3
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_16i8_to_8i64:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 2be4a06363e..396d6a34156 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -599,14 +599,12 @@ define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
define <32 x i8> @combine_psrlw_pshufb(<16 x i16> %a0) {
; X32-LABEL: combine_psrlw_pshufb:
; X32: # BB#0:
-; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,ymm0[1],zero,ymm0[3],zero,ymm0[5],zero,ymm0[7],zero,ymm0[9],zero,ymm0[11],zero,ymm0[13],zero,ymm0[15],zero,ymm0[17],zero,ymm0[19],zero,ymm0[21],zero,ymm0[23],zero,ymm0[25],zero,ymm0[27],zero,ymm0[29],zero,ymm0[31]
; X32-NEXT: retl
;
; X64-LABEL: combine_psrlw_pshufb:
; X64: # BB#0:
-; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,ymm0[1],zero,ymm0[3],zero,ymm0[5],zero,ymm0[7],zero,ymm0[9],zero,ymm0[11],zero,ymm0[13],zero,ymm0[15],zero,ymm0[17],zero,ymm0[19],zero,ymm0[21],zero,ymm0[23],zero,ymm0[25],zero,ymm0[27],zero,ymm0[29],zero,ymm0[31]
; X64-NEXT: retq
%1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%2 = bitcast <16 x i16> %1 to <32 x i8>
@@ -617,14 +615,12 @@ define <32 x i8> @combine_psrlw_pshufb(<16 x i16> %a0) {
define <32 x i8> @combine_pslld_pshufb(<8 x i32> %a0) {
; X32-LABEL: combine_pslld_pshufb:
; X32: # BB#0:
-; X32-NEXT: vpslld $24, %ymm0, %ymm0
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,zero,zero,ymm0[4],zero,zero,zero,ymm0[8],zero,zero,zero,ymm0[12],zero,zero,zero,ymm0[16],zero,zero,zero,ymm0[20],zero,zero,zero,ymm0[24],zero,zero,zero,ymm0[28],zero,zero,zero
; X32-NEXT: retl
;
; X64-LABEL: combine_pslld_pshufb:
; X64: # BB#0:
-; X64-NEXT: vpslld $24, %ymm0, %ymm0
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,zero,zero,ymm0[4],zero,zero,zero,ymm0[8],zero,zero,zero,ymm0[12],zero,zero,zero,ymm0[16],zero,zero,zero,ymm0[20],zero,zero,zero,ymm0[24],zero,zero,zero,ymm0[28],zero,zero,zero
; X64-NEXT: retq
%1 = shl <8 x i32> %a0, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
%2 = bitcast <8 x i32> %1 to <32 x i8>
@@ -635,14 +631,12 @@ define <32 x i8> @combine_pslld_pshufb(<8 x i32> %a0) {
define <32 x i8> @combine_psrlq_pshufb(<4 x i64> %a0) {
; X32-LABEL: combine_psrlq_pshufb:
; X32: # BB#0:
-; X32-NEXT: vpsrlq $32, %ymm0, %ymm0
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23]
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[7,6,5,4],zero,zero,zero,zero,ymm0[15,14,13,12],zero,zero,zero,zero,ymm0[23,22,21],zero,zero,zero,zero,ymm0[31,30,29,28],zero
; X32-NEXT: retl
;
; X64-LABEL: combine_psrlq_pshufb:
; X64: # BB#0:
-; X64-NEXT: vpsrlq $32, %ymm0, %ymm0
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23]
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[7,6,5,4],zero,zero,zero,zero,ymm0[15,14,13,12],zero,zero,zero,zero,ymm0[23,22,21],zero,zero,zero,zero,ymm0[31,30,29,28],zero
; X64-NEXT: retq
%1 = lshr <4 x i64> %a0, <i64 32, i64 32, i64 32, i64 32>
%2 = bitcast <4 x i64> %1 to <32 x i8>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
index 277d5413f36..4e93c6e3b0c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -412,14 +412,12 @@ define <16 x i8> @combine_pshufb_as_unary_unpckhwd(<16 x i8> %a0) {
define <16 x i8> @combine_psrlw_pshufb(<8 x i16> %a0) {
; SSE-LABEL: combine_psrlw_pshufb:
; SSE: # BB#0:
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: combine_psrlw_pshufb:
; AVX: # BB#0:
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%2 = bitcast <8 x i16> %1 to <16 x i8>
@@ -430,14 +428,12 @@ define <16 x i8> @combine_psrlw_pshufb(<8 x i16> %a0) {
define <16 x i8> @combine_pslld_pshufb(<4 x i32> %a0) {
; SSE-LABEL: combine_pslld_pshufb:
; SSE: # BB#0:
-; SSE-NEXT: pslld $8, %xmm0
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,1,0],zero,xmm0[6,5,4],zero,xmm0[10,9,8],zero,xmm0[14,13,12],zero
; SSE-NEXT: retq
;
; AVX-LABEL: combine_pslld_pshufb:
; AVX: # BB#0:
-; AVX-NEXT: vpslld $8, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,1,0],zero,xmm0[6,5,4],zero,xmm0[10,9,8],zero,xmm0[14,13,12],zero
; AVX-NEXT: retq
%1 = shl <4 x i32> %a0, <i32 8, i32 8, i32 8, i32 8>
%2 = bitcast <4 x i32> %1 to <16 x i8>
@@ -448,14 +444,12 @@ define <16 x i8> @combine_pslld_pshufb(<4 x i32> %a0) {
define <16 x i8> @combine_psrlq_pshufb(<2 x i64> %a0) {
; SSE-LABEL: combine_psrlq_pshufb:
; SSE: # BB#0:
-; SSE-NEXT: psrlq $48, %xmm0
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[7,6],zero,zero,zero,zero,zero,zero,xmm0[15,14]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_psrlq_pshufb:
; AVX: # BB#0:
-; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[7,6],zero,zero,zero,zero,zero,zero,xmm0[15,14]
; AVX-NEXT: retq
%1 = lshr <2 x i64> %a0, <i64 48, i64 48>
%2 = bitcast <2 x i64> %1 to <16 x i8>
OpenPOWER on IntegriCloud