summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-04 18:11:02 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-04 18:11:02 +0000
commitac3e7f3f57f143514d13263889ca655185ea2e3c (patch)
treeac7f4f02f5024e801c4e9a4cbe91bff4cdac196b /llvm/test/CodeGen/X86
parentf809c5f11ccd5bb245d1dd1cb3ea9e87c9bd1f69 (diff)
downloadbcm5719-llvm-ac3e7f3f57f143514d13263889ca655185ea2e3c.tar.gz
bcm5719-llvm-ac3e7f3f57f143514d13263889ca655185ea2e3c.zip
[X86][SSE4A] Add support for combining from non-v16i8 EXTRQI/INSERTQI shuffles
With the improved shuffle decoding we can now combine EXTRQI/INSERTQI shuffles from non-v16i8 vector types llvm-svn: 307099
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll29
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll48
2 files changed, 41 insertions, 36 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
index 0f2fe73912d..1669317f4aa 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
@@ -24,22 +24,14 @@ define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
}
define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
-; SSSE3-LABEL: combine_extrqi_pshufb_8i16:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: retq
-;
-; SSE42-LABEL: combine_extrqi_pshufb_8i16:
-; SSE42: # BB#0:
-; SSE42-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; SSE42-NEXT: retq
+; SSE-LABEL: combine_extrqi_pshufb_8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSE-NEXT: retq
;
; AVX-LABEL: combine_extrqi_pshufb_8i16:
; AVX: # BB#0:
-; AVX-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4,5],zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = bitcast <8 x i16> %1 to <16 x i8>
@@ -73,20 +65,19 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
; SSSE3: # BB#0:
-; SSSE3-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE42-LABEL: combine_insertqi_pshufb_8i16:
; SSE42: # BB#0:
-; SSE42-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE42-NEXT: retq
;
; AVX-LABEL: combine_insertqi_pshufb_8i16:
; AVX: # BB#0:
-; AVX-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = bitcast <8 x i16> %1 to <16 x i8>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
index 72bacd1f783..e458bb6fa52 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
@@ -11,7 +11,6 @@
define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) {
; ALL-LABEL: extrqi_len0_idx0:
; ALL: # BB#0:
-; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
; ALL-NEXT: retq
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0)
ret <2 x i64> %1
@@ -130,10 +129,7 @@ define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) {
;
; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz:
; BTVER1: # BB#0:
-; BTVER1-NEXT: movdqa %xmm0, %xmm1
-; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
; BTVER1-NEXT: retq
;
; BTVER2-LABEL: shuf_01zzzzzz23zzzzzz:
@@ -172,10 +168,21 @@ define <8 x i16> @shuf_12zzuuuu(<8 x i16> %a0) {
}
define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) {
-; ALL-LABEL: shuf_012zuuuu:
-; ALL: # BB#0:
-; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; ALL-NEXT: retq
+; AMD10H-LABEL: shuf_012zuuuu:
+; AMD10H: # BB#0:
+; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; AMD10H-NEXT: retq
+;
+; BTVER1-LABEL: shuf_012zuuuu:
+; BTVER1: # BB#0:
+; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; BTVER1-NEXT: retq
+;
+; BTVER2-LABEL: shuf_012zuuuu:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; BTVER2-NEXT: retq
%s = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %s
}
@@ -191,10 +198,7 @@ define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) {
;
; BTVER1-LABEL: shuf_0zzz1zzz:
; BTVER1: # BB#0:
-; BTVER1-NEXT: movdqa %xmm0, %xmm1
-; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
; BTVER1-NEXT: retq
;
; BTVER2-LABEL: shuf_0zzz1zzz:
@@ -232,10 +236,20 @@ define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
; A length of zero is equivalent to a bit length of 64.
define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: insertqi_len0_idx0:
-; ALL: # BB#0:
-; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7],xmm0[u,u,u,u,u,u,u,u]
-; ALL-NEXT: retq
+; AMD10H-LABEL: insertqi_len0_idx0:
+; AMD10H: # BB#0:
+; AMD10H-NEXT: movaps %xmm1, %xmm0
+; AMD10H-NEXT: retq
+;
+; BTVER1-LABEL: insertqi_len0_idx0:
+; BTVER1: # BB#0:
+; BTVER1-NEXT: movaps %xmm1, %xmm0
+; BTVER1-NEXT: retq
+;
+; BTVER2-LABEL: insertqi_len0_idx0:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovaps %xmm1, %xmm0
+; BTVER2-NEXT: retq
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0)
ret <2 x i64> %1
}
OpenPOWER on IntegriCloud