summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/masked_gather_scatter.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_gather_scatter.ll')
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll203
1 files changed, 126 insertions, 77 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index df88e179ba9..83309afd1d0 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -915,12 +915,13 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
; KNL_64-LABEL: test17:
; KNL_64: # %bb.0:
; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0
; KNL_64-NEXT: kshiftlw $14, %k0, %k0
; KNL_64-NEXT: kshiftrw $14, %k0, %k1
-; KNL_64-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k1}
+; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
; KNL_64-NEXT: vmovapd %xmm2, %xmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
@@ -928,31 +929,36 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
; KNL_32-LABEL: test17:
; KNL_32: # %bb.0:
; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
; KNL_32-NEXT: kshiftrw $14, %k0, %k1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vgatherdpd (%eax,%ymm0,8), %zmm2 {%k1}
+; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovapd %xmm2, %xmm0
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
; SKX-LABEL: test17:
; SKX: # %bb.0:
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vpmovq2m %xmm1, %k1
-; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovapd %xmm2, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test17:
; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vpmovq2m %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovapd %xmm2, %xmm0
; SKX_32-NEXT: retl
@@ -1074,8 +1080,8 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; KNL_32-LABEL: test20:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
@@ -1093,6 +1099,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; SKX_32-LABEL: test20:
; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vpmovq2m %xmm2, %k1
; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1}
@@ -1106,9 +1113,9 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
; KNL_64-LABEL: test21:
; KNL_64: # %bb.0:
; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2
; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: kshiftlw $14, %k0, %k0
; KNL_64-NEXT: kshiftrw $14, %k0, %k1
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
@@ -1117,10 +1124,10 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; KNL_32-LABEL: test21:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
; KNL_32-NEXT: kshiftrw $14, %k0, %k1
; KNL_32-NEXT: vpscatterdd %zmm0, (,%zmm1) {%k1}
@@ -1131,6 +1138,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
; SKX: # %bb.0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vpmovq2m %xmm2, %k1
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpscatterqd %xmm0, (,%xmm1) {%k1}
; SKX-NEXT: retq
;
@@ -1138,6 +1146,8 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
; SKX_32: # %bb.0:
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vpmovq2m %xmm2, %k1
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1}
; SKX_32-NEXT: retl
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
@@ -1151,7 +1161,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; KNL_64-LABEL: test22:
; KNL_64: # %bb.0:
; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0
; KNL_64-NEXT: kshiftlw $14, %k0, %k0
@@ -1164,7 +1174,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; KNL_32-LABEL: test22:
; KNL_32: # %bb.0:
; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
@@ -1177,6 +1187,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; SKX-LABEL: test22:
; SKX: # %bb.0:
+; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vpmovq2m %xmm1, %k1
; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
@@ -1185,6 +1196,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; SKX_32-LABEL: test22:
; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vpmovq2m %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1252,28 +1264,28 @@ declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <
define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
; KNL_64-LABEL: test23:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; KNL_64-NEXT: kshiftlw $14, %k0, %k0
; KNL_64-NEXT: kshiftrw $14, %k0, %k1
-; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
-; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
+; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test23:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
; KNL_32-NEXT: kshiftrw $14, %k0, %k1
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
-; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
+; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
@@ -1281,8 +1293,10 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
; SKX: # %bb.0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vpmovq2m %xmm1, %k1
-; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm2 {%k1}
-; SKX-NEXT: vmovdqa %xmm2, %xmm0
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
+; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SKX-NEXT: retq
;
; SKX_32-LABEL: test23:
@@ -1290,8 +1304,10 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vpmovq2m %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm2 {%k1}
-; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1}
+; SKX_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
@@ -1302,28 +1318,28 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> %src0) {
; KNL_64-LABEL: test23b:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; KNL_64-NEXT: kshiftlw $14, %k0, %k0
; KNL_64-NEXT: kshiftrw $14, %k0, %k1
-; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
-; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
+; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1}
+; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test23b:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
; KNL_32-NEXT: kshiftrw $14, %k0, %k1
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
-; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
+; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
+; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
@@ -1331,8 +1347,9 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32>
; SKX: # %bb.0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vpmovq2m %xmm1, %k1
-; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm2 {%k1}
-; SKX-NEXT: vmovdqa %xmm2, %xmm0
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1}
+; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SKX-NEXT: retq
;
; SKX_32-LABEL: test23b:
@@ -1340,8 +1357,9 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32>
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vpmovq2m %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm2 {%k1}
-; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1}
+; SKX_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SKX_32-NEXT: retl
%gep.random = getelementptr i32, i32* %base, <2 x i64> %ind
%res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
@@ -1351,22 +1369,22 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32>
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test24:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: movw $3, %ax
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
+; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test24:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: movw $3, %cx
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
+; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
@@ -1374,8 +1392,9 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; SKX: # %bb.0:
; SKX-NEXT: movb $3, %al
; SKX-NEXT: kmovw %eax, %k1
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
-; SKX-NEXT: vmovdqa %xmm1, %xmm0
+; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SKX-NEXT: retq
;
; SKX_32-LABEL: test24:
@@ -1383,8 +1402,9 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: movb $3, %cl
; SKX_32-NEXT: kmovw %ecx, %k1
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1}
-; SKX_32-NEXT: vmovdqa %xmm1, %xmm0
+; SKX_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
@@ -1396,12 +1416,13 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
; KNL_64-LABEL: test25:
; KNL_64: # %bb.0:
; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0
; KNL_64-NEXT: kshiftlw $14, %k0, %k0
; KNL_64-NEXT: kshiftrw $14, %k0, %k1
-; KNL_64-NEXT: vpgatherdq (%rdi,%ymm0,8), %zmm2 {%k1}
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
; KNL_64-NEXT: vmovdqa %xmm2, %xmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
@@ -1409,31 +1430,36 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
; KNL_32-LABEL: test25:
; KNL_32: # %bb.0:
; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
; KNL_32-NEXT: kshiftrw $14, %k0, %k1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpgatherdq (%eax,%ymm0,8), %zmm2 {%k1}
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovdqa %xmm2, %xmm0
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
; SKX-LABEL: test25:
; SKX: # %bb.0:
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vpmovq2m %xmm1, %k1
-; SKX-NEXT: vpgatherdq (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovdqa %xmm2, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test25:
; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX_32-NEXT: vpmovq2m %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpgatherdq (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
@@ -1446,10 +1472,11 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
; KNL_64-LABEL: test26:
; KNL_64: # %bb.0:
; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
-; KNL_64-NEXT: vpgatherdq (%rdi,%ymm0,8), %zmm1 {%k1}
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovdqa %xmm1, %xmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
@@ -1457,27 +1484,32 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
; KNL_32-LABEL: test26:
; KNL_32: # %bb.0:
; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: movb $3, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
-; KNL_32-NEXT: vpgatherdq (%eax,%ymm0,8), %zmm1 {%k1}
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
; KNL_32-NEXT: vmovdqa %xmm1, %xmm0
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
; SKX-LABEL: test26:
; SKX: # %bb.0:
+; SKX-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherdq (%rdi,%xmm0,8), %xmm1 {%k1}
+; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
; SKX-NEXT: vmovdqa %xmm1, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test26:
; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0
+; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vpgatherdq (%eax,%xmm0,8), %xmm1 {%k1}
+; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
; SKX_32-NEXT: vmovdqa %xmm1, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
@@ -1490,40 +1522,40 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test27:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; KNL_64-NEXT: movw $3, %ax
; KNL_64-NEXT: kmovw %eax, %k1
-; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
-; KNL_64-NEXT: vmovaps %xmm1, %xmm0
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test27:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: movw $3, %cx
; KNL_32-NEXT: kmovw %ecx, %k1
-; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
-; KNL_32-NEXT: vmovaps %xmm1, %xmm0
+; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
+; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
; SKX-LABEL: test27:
; SKX: # %bb.0:
+; SKX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; SKX-NEXT: movb $3, %al
; SKX-NEXT: kmovw %eax, %k1
-; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
-; SKX-NEXT: vmovaps %xmm1, %xmm0
+; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test27:
; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: movb $3, %cl
; SKX_32-NEXT: kmovw %ecx, %k1
-; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
-; SKX_32-NEXT: vmovaps %xmm1, %xmm0
+; SKX_32-NEXT: vgatherdps (%eax,%xmm1,4), %xmm0 {%k1}
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
@@ -1536,7 +1568,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
; KNL_64-LABEL: test28:
; KNL_64: # %bb.0:
; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
@@ -1545,8 +1577,8 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; KNL_32-LABEL: test28:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: movw $3, %ax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: vpscatterdd %zmm0, (,%zmm1) {%k1}
@@ -1555,6 +1587,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; SKX-LABEL: test28:
; SKX: # %bb.0:
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpscatterqd %xmm0, (,%xmm1) {%k1}
; SKX-NEXT: retq
@@ -1563,6 +1596,8 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
; SKX_32: # %bb.0:
; SKX_32-NEXT: movb $3, %al
; SKX_32-NEXT: kmovw %eax, %k1
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1}
; SKX_32-NEXT: retl
call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
@@ -2638,7 +2673,9 @@ define <16 x float> @sext_i8_index(float* %base, <16 x i8> %ind) {
define <8 x float> @sext_v8i8_index(float* %base, <8 x i8> %ind) {
; KNL_64-LABEL: sext_v8i8_index:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: vpmovsxbd %xmm0, %ymm1
+; KNL_64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL_64-NEXT: vpslld $24, %ymm0, %ymm0
+; KNL_64-NEXT: vpsrad $24, %ymm0, %ymm1
; KNL_64-NEXT: movw $255, %ax
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
@@ -2647,8 +2684,10 @@ define <8 x float> @sext_v8i8_index(float* %base, <8 x i8> %ind) {
;
; KNL_32-LABEL: sext_v8i8_index:
; KNL_32: # %bb.0:
+; KNL_32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpmovsxbd %xmm0, %ymm1
+; KNL_32-NEXT: vpslld $24, %ymm0, %ymm0
+; KNL_32-NEXT: vpsrad $24, %ymm0, %ymm1
; KNL_32-NEXT: movw $255, %cx
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
@@ -2657,16 +2696,20 @@ define <8 x float> @sext_v8i8_index(float* %base, <8 x i8> %ind) {
;
; SKX-LABEL: sext_v8i8_index:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm0, %ymm1
+; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vpslld $24, %ymm0, %ymm0
+; SKX-NEXT: vpsrad $24, %ymm0, %ymm1
; SKX-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: sext_v8i8_index:
; SKX_32: # %bb.0:
+; SKX_32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vpmovsxbd %xmm0, %ymm1
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpslld $24, %ymm0, %ymm0
+; SKX_32-NEXT: vpsrad $24, %ymm0, %ymm1
; SKX_32-NEXT: vgatherdps (%eax,%ymm1,4), %ymm0 {%k1}
; SKX_32-NEXT: retl
@@ -2682,26 +2725,28 @@ declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1
define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32> %ind, <2 x i1> %mask) {
; KNL_64-LABEL: test_scatter_2i32_index:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_64-NEXT: vpsllq $32, %xmm1, %xmm1
+; KNL_64-NEXT: vpsraq $32, %zmm1, %zmm1
; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2
; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0
; KNL_64-NEXT: kshiftlw $14, %k0, %k0
; KNL_64-NEXT: kshiftrw $14, %k0, %k1
-; KNL_64-NEXT: vscatterdpd %zmm0, (%rdi,%ymm1,8) {%k1}
+; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm1,8) {%k1}
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test_scatter_2i32_index:
; KNL_32: # %bb.0:
-; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1
+; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1
; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0
; KNL_32-NEXT: kshiftlw $14, %k0, %k0
; KNL_32-NEXT: kshiftrw $14, %k0, %k1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vscatterdpd %zmm0, (%eax,%ymm1,8) {%k1}
+; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm1,8) {%k1}
; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl
;
@@ -2709,15 +2754,19 @@ define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32>
; SKX: # %bb.0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vpmovq2m %xmm2, %k1
-; SKX-NEXT: vscatterdpd %xmm0, (%rdi,%xmm1,8) {%k1}
+; SKX-NEXT: vpsllq $32, %xmm1, %xmm1
+; SKX-NEXT: vpsraq $32, %xmm1, %xmm1
+; SKX-NEXT: vscatterqpd %xmm0, (%rdi,%xmm1,8) {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test_scatter_2i32_index:
; SKX_32: # %bb.0:
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vpmovq2m %xmm2, %k1
+; SKX_32-NEXT: vpsllq $32, %xmm1, %xmm1
+; SKX_32-NEXT: vpsraq $32, %xmm1, %xmm1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT: vscatterdpd %xmm0, (%eax,%xmm1,8) {%k1}
+; SKX_32-NEXT: vscatterqpd %xmm0, (%eax,%xmm1,8) {%k1}
; SKX_32-NEXT: retl
%gep = getelementptr double, double *%base, <2 x i32> %ind
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %gep, i32 4, <2 x i1> %mask)
OpenPOWER on IntegriCloud