summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/avx512-bugfix-26264.ll47
-rw-r--r--llvm/test/CodeGen/X86/avx512-calling-conv.ll7
-rw-r--r--llvm/test/CodeGen/X86/avx512-ext.ll58
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll10
-rw-r--r--llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll18
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll84
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-v1.ll32
7 files changed, 143 insertions, 113 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll b/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll
new file mode 100644
index 00000000000..e37aa701e2d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512-bugfix-26264.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw < %s | FileCheck %s --check-prefix=AVX512BW
+
+define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) {
+; AVX512BW-LABEL: test_load_32f64:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
+; AVX512BW-NEXT: vmovupd (%rdi), %zmm1 {%k1}
+; AVX512BW-NEXT: kshiftrd $16, %k1, %k2
+; AVX512BW-NEXT: vmovupd 128(%rdi), %zmm3 {%k2}
+; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
+; AVX512BW-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
+; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
+; AVX512BW-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm1
+; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
+; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
+; AVX512BW-NEXT: retq
+ %res = call <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
+ ret <32 x double> %res
+}
+
+define <32 x i64> @test_load_32i64(<32 x i64>* %ptrs, <32 x i1> %mask, <32 x i64> %src0) {
+; AVX512BW-LABEL: test_load_32i64:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
+; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
+; AVX512BW-NEXT: kshiftrd $16, %k1, %k2
+; AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm3 {%k2}
+; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
+; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
+; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
+; AVX512BW-NEXT: vmovdqu64 192(%rdi), %zmm4 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm1
+; AVX512BW-NEXT: vmovaps %zmm3, %zmm2
+; AVX512BW-NEXT: vmovaps %zmm4, %zmm3
+; AVX512BW-NEXT: retq
+ %res = call <32 x i64> @llvm.masked.load.v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
+ ret <32 x i64> %res
+}
+
+declare <32 x i64> @llvm.masked.load.v32i64(<32 x i64>* %ptrs, i32, <32 x i1> %mask, <32 x i64> %src0)
+declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index 27bd19fac4c..518d32267d1 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -102,11 +102,10 @@ define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
;
; SKX-LABEL: test4:
; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm1
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k0
-; SKX-NEXT: vpslld $31, %xmm1, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
-; SKX-NEXT: kandw %k1, %k0, %k0
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k0 {%k1}
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 90bba6c287b..ac4f429d247 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -314,7 +314,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re
; SKX-LABEL: zext_4x8mem_to_4x32:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i8>,<4 x i8> *%i,align 1
@@ -335,7 +335,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re
; SKX-LABEL: sext_4x8mem_to_4x32:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i8>,<4 x i8> *%i,align 1
@@ -503,7 +503,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re
; SKX-LABEL: zext_2x8mem_to_2x64:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <2 x i8>,<2 x i8> *%i,align 1
@@ -524,7 +524,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin
; SKX-LABEL: sext_2x8mem_to_2x64mask:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <2 x i8>,<2 x i8> *%i,align 1
@@ -555,7 +555,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re
; SKX-LABEL: zext_4x8mem_to_4x64:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i8>,<4 x i8> *%i,align 1
@@ -577,7 +577,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin
; SKX-LABEL: sext_4x8mem_to_4x64mask:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i8>,<4 x i8> *%i,align 1
@@ -660,7 +660,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind
; SKX-LABEL: zext_4x16mem_to_4x32:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i16>,<4 x i16> *%i,align 1
@@ -681,7 +681,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw
; SKX-LABEL: sext_4x16mem_to_4x32mask:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i16>,<4 x i16> *%i,align 1
@@ -886,7 +886,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind
; SKX-LABEL: zext_2x16mem_to_2x64:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <2 x i16>,<2 x i16> *%i,align 1
@@ -908,7 +908,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw
; SKX-LABEL: sext_2x16mem_to_2x64mask:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <2 x i16>,<2 x i16> *%i,align 1
@@ -940,7 +940,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind
; SKX-LABEL: zext_4x16mem_to_4x64:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i16>,<4 x i16> *%i,align 1
@@ -962,7 +962,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw
; SKX-LABEL: sext_4x16mem_to_4x64mask:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i16>,<4 x i16> *%i,align 1
@@ -1075,7 +1075,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind
; SKX-LABEL: zext_2x32mem_to_2x64:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <2 x i32>,<2 x i32> *%i,align 1
@@ -1097,7 +1097,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw
; SKX-LABEL: sext_2x32mem_to_2x64mask:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <2 x i32>,<2 x i32> *%i,align 1
@@ -1129,7 +1129,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind
; SKX-LABEL: zext_4x32mem_to_4x64:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i32>,<4 x i32> *%i,align 1
@@ -1151,7 +1151,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw
; SKX-LABEL: sext_4x32mem_to_4x64mask:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
%a = load <4 x i32>,<4 x i32> *%i,align 1
@@ -1192,7 +1192,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind
; SKX-LABEL: zext_4x32_to_4x64mask:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
-; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
; SKX-NEXT: retq
%x = zext <4 x i32> %a to <4 x i64>
@@ -1347,19 +1347,12 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
}
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
-; KNL-LABEL: trunc_16i32_to_16i1:
-; KNL: ## BB#0:
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: trunc_16i32_to_16i1:
-; SKX: ## BB#0:
-; SKX-NEXT: vpslld $31, %zmm0, %zmm0
-; SKX-NEXT: vpmovd2m %zmm0, %k0
-; SKX-NEXT: kmovw %k0, %eax
-; SKX-NEXT: retq
+; ALL-LABEL: trunc_16i32_to_16i1:
+; ALL: ## BB#0:
+; ALL-NEXT: vpslld $31, %zmm0, %zmm0
+; ALL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; ALL-NEXT: kmovw %k0, %eax
+; ALL-NEXT: retq
%mask_b = trunc <16 x i32>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
@@ -1376,10 +1369,9 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
; SKX-LABEL: trunc_4i32_to_4i1:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
; SKX-NEXT: vpslld $31, %xmm1, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k1
-; SKX-NEXT: kandw %k1, %k0, %k0
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1}
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: retq
%mask_a = trunc <4 x i32>%a to <4 x i1>
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 6a3d1c20170..7ae6c8a0efa 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -354,7 +354,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
; SKX-NEXT: LBB17_1:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
; SKX-NEXT: LBB17_3:
-; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: retq
%mask = icmp sgt i32 %a1, %b1
@@ -1415,7 +1415,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
; SKX-LABEL: test22:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
; SKX-NEXT: kmovb %k0, (%rdi)
; SKX-NEXT: retq
store <4 x i1> %a, <4 x i1>* %addr
@@ -1436,7 +1436,7 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
; SKX-LABEL: test23:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k0
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0
; SKX-NEXT: kmovb %k0, (%rdi)
; SKX-NEXT: retq
store <2 x i1> %a, <2 x i1>* %addr
@@ -1484,7 +1484,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
; SKX-LABEL: store_v2i1:
; SKX: ## BB#0:
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
-; SKX-NEXT: vpmovq2m %xmm0, %k0
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0
; SKX-NEXT: knotw %k0, %k0
; SKX-NEXT: kmovb %k0, (%rdi)
; SKX-NEXT: retq
@@ -1515,7 +1515,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
; SKX-LABEL: store_v4i1:
; SKX: ## BB#0:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
; SKX-NEXT: knotw %k0, %k0
; SKX-NEXT: kmovb %k0, (%rdi)
; SKX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll
index c54010cd91b..8241b07d342 100644
--- a/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll
+++ b/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll
@@ -5,7 +5,7 @@ define <8 x i1> @test(<2 x i1> %a) {
; CHECK-LABEL: test:
; CHECK: # BB#0:
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
; CHECK-NEXT: kshiftlb $2, %k0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
@@ -17,7 +17,7 @@ define <8 x i1> @test1(<2 x i1> %a) {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
; CHECK-NEXT: kshiftlb $4, %k0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
@@ -29,12 +29,12 @@ define <8 x i1> @test2(<2 x i1> %a) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
; CHECK-NEXT: vpmovm2q %k0, %zmm0
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[0,1,0,1]
; CHECK-NEXT: vpsllq $63, %zmm0, %zmm0
-; CHECK-NEXT: vpmovq2m %zmm0, %k0
+; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
%res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
@@ -45,7 +45,7 @@ define <8 x i1> @test3(<4 x i1> %a) {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpmovd2m %xmm0, %k0
+; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
; CHECK-NEXT: kshiftlb $4, %k0, %k0
; CHECK-NEXT: kshiftrb $4, %k0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
@@ -59,7 +59,7 @@ define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
; CHECK-LABEL: test4:
; CHECK: # BB#0:
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpmovd2m %xmm0, %k0
+; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
; CHECK-NEXT: kshiftlb $4, %k0, %k0
; CHECK-NEXT: kshiftrb $4, %k0, %k1
; CHECK-NEXT: korb %k0, %k1, %k0
@@ -74,7 +74,7 @@ define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
; CHECK-LABEL: test5:
; CHECK: # BB#0:
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
; CHECK-NEXT: kshiftlw $2, %k0, %k0
; CHECK-NEXT: kshiftrw $2, %k0, %k1
; CHECK-NEXT: korw %k0, %k1, %k0
@@ -89,7 +89,7 @@ define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
; CHECK-LABEL: test6:
; CHECK: # BB#0:
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
-; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
; CHECK-NEXT: kshiftlw $2, %k0, %k0
; CHECK-NEXT: kshiftrw $2, %k0, %k1
; CHECK-NEXT: korw %k0, %k1, %k0
@@ -105,7 +105,7 @@ define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
; CHECK-LABEL: test7:
; CHECK: # BB#0:
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpmovd2m %xmm0, %k0
+; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
; CHECK-NEXT: kshiftlb $4, %k0, %k0
; CHECK-NEXT: kshiftrb $4, %k0, %k1
; CHECK-NEXT: korb %k0, %k1, %k0
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index e08fb156378..9f8e819cad5 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -679,9 +679,8 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
-; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm0
-; KNL_64-NEXT: vpsllq $63, %zmm0, %zmm0
-; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
+; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
; KNL_64-NEXT: retq
;
@@ -691,16 +690,15 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
-; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm0
-; KNL_32-NEXT: vpsllvq .LCPI14_0, %zmm0, %zmm0
-; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
+; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test15:
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
-; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
@@ -708,7 +706,7 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
; SKX_32-LABEL: test15:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
-; SKX_32-NEXT: vpmovd2m %xmm1, %k1
+; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
; SKX_32-NEXT: vmovaps %zmm1, %zmm0
@@ -755,7 +753,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
; SKX-LABEL: test16:
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
-; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1}
; SKX-NEXT: vmovaps %zmm2, %zmm0
; SKX-NEXT: retq
@@ -763,7 +761,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
; SKX_32-LABEL: test16:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
-; SKX_32-NEXT: vpmovd2m %xmm1, %k1
+; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1}
; SKX_32-NEXT: vmovaps %zmm2, %zmm0
@@ -801,7 +799,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
; SKX-LABEL: test17:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT: vpmovq2m %xmm1, %k1
+; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovaps %zmm2, %zmm0
; SKX-NEXT: retq
@@ -809,7 +807,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
; SKX_32-LABEL: test17:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT: vpmovq2m %xmm1, %k1
+; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovaps %zmm2, %zmm0
@@ -833,9 +831,8 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
-; KNL_64-NEXT: vpmovsxdq %ymm2, %zmm2
-; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
-; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; KNL_64-NEXT: retq
;
@@ -844,23 +841,22 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
-; KNL_32-NEXT: vpmovsxdq %ymm2, %zmm2
-; KNL_32-NEXT: vpsllvq .LCPI17_0, %zmm2, %zmm2
-; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
+; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test18:
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
-; SKX-NEXT: vpmovd2m %xmm2, %k1
+; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test18:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
-; SKX_32-NEXT: vpmovd2m %xmm2, %k1
+; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1
; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1}
; SKX_32-NEXT: retl
call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
@@ -897,14 +893,14 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
; SKX-LABEL: test19:
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
-; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX-NEXT: vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test19:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
-; SKX_32-NEXT: vpmovd2m %xmm1, %k1
+; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1}
; SKX_32-NEXT: retl
@@ -922,9 +918,8 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
; KNL_64-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
-; KNL_64-NEXT: vpmovsxdq %ymm2, %zmm2
-; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
-; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1}
; KNL_64-NEXT: retq
;
@@ -936,16 +931,15 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
-; KNL_32-NEXT: vpmovsxdq %ymm2, %zmm2
-; KNL_32-NEXT: vpsllvq .LCPI19_0, %zmm2, %zmm2
-; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
+; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL_32-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test20:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX-NEXT: vpmovq2m %xmm2, %k0
+; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlw $2, %k0, %k0
; SKX-NEXT: kshiftrw $2, %k0, %k1
; SKX-NEXT: vscatterqps %xmm0, (,%ymm1) {%k1}
@@ -955,7 +949,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
; SKX_32: # BB#0:
; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX_32-NEXT: vpmovq2m %xmm2, %k0
+; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX_32-NEXT: kshiftlw $2, %k0, %k0
; SKX_32-NEXT: kshiftrw $2, %k0, %k1
; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1}
@@ -990,7 +984,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
; SKX-LABEL: test21:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX-NEXT: vpmovq2m %xmm2, %k0
+; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlw $2, %k0, %k0
; SKX-NEXT: kshiftrw $2, %k0, %k1
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1000,7 +994,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
; SKX_32-LABEL: test21:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX_32-NEXT: vpmovq2m %xmm2, %k0
+; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX_32-NEXT: kshiftlw $2, %k0, %k0
; SKX_32-NEXT: kshiftrw $2, %k0, %k1
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1024,9 +1018,8 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
-; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
-; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
-; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1
+; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
; KNL_64-NEXT: vmovaps %zmm2, %zmm0
; KNL_64-NEXT: retq
@@ -1040,9 +1033,8 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
-; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
-; KNL_32-NEXT: vpsllvq .LCPI21_0, %zmm1, %zmm1
-; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
; KNL_32-NEXT: vmovaps %zmm2, %zmm0
; KNL_32-NEXT: retl
@@ -1051,7 +1043,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; SKX: # BB#0:
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT: vpmovq2m %xmm1, %k0
+; SKX-NEXT: vptestmq %xmm1, %xmm1, %k0
; SKX-NEXT: kshiftlw $2, %k0, %k0
; SKX-NEXT: kshiftrw $2, %k0, %k1
; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
@@ -1062,7 +1054,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
; SKX_32: # BB#0:
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT: vpmovq2m %xmm1, %k0
+; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k0
; SKX_32-NEXT: kshiftlw $2, %k0, %k0
; SKX_32-NEXT: kshiftrw $2, %k0, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1104,7 +1096,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
; SKX-LABEL: test23:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT: vpmovq2m %xmm1, %k1
+; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovaps %zmm2, %zmm0
; SKX-NEXT: retq
@@ -1112,7 +1104,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
; SKX_32-LABEL: test23:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT: vpmovq2m %xmm1, %k1
+; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovaps %zmm2, %zmm0
@@ -1189,7 +1181,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
; SKX-LABEL: test25:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT: vpmovq2m %xmm1, %k1
+; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
; SKX-NEXT: vmovaps %zmm2, %zmm0
; SKX-NEXT: retq
@@ -1197,7 +1189,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
; SKX_32-LABEL: test25:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT: vpmovq2m %xmm1, %k1
+; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
; SKX_32-NEXT: vmovaps %zmm2, %zmm0
@@ -1468,7 +1460,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX-LABEL: test30:
; SKX: # BB#0:
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
-; SKX-NEXT: vpmovd2m %xmm2, %k1
+; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1
; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
@@ -1508,7 +1500,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX_32-NEXT: .Ltmp0:
; SKX_32-NEXT: .cfi_def_cfa_offset 16
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
-; SKX_32-NEXT: vpmovd2m %xmm2, %k1
+; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1
; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp)
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
index b18d7829e9a..f9ad5a4cc45 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -13,11 +13,11 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
; VL_BW_DQ-LABEL: shuf2i1_1_0:
; VL_BW_DQ: # BB#0:
; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
-; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
-; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
; VL_BW_DQ-NEXT: retq
%b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 0>
@@ -35,14 +35,14 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
; VL_BW_DQ-LABEL: shuf2i1_1_2:
; VL_BW_DQ: # BB#0:
; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
-; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
; VL_BW_DQ-NEXT: movb $1, %al
; VL_BW_DQ-NEXT: kmovb %eax, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1
; VL_BW_DQ-NEXT: vpalignr $8, %xmm0, %xmm1, %xmm0
; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
-; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
; VL_BW_DQ-NEXT: retq
%b = shufflevector <2 x i1> %a, <2 x i1> <i1 1, i1 0>, <2 x i32> <i32 1, i32 2>
@@ -59,11 +59,11 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
; VL_BW_DQ-LABEL: shuf4i1_3_2_10:
; VL_BW_DQ: # BB#0:
; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0
-; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vptestmd %xmm0, %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0
; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0
-; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vptestmd %xmm0, %xmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0
; VL_BW_DQ-NEXT: retq
%b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -91,7 +91,7 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
; VL_BW_DQ-NEXT: retq
%a2 = icmp eq <8 x i64> %a, %a1
@@ -125,7 +125,7 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1
; VL_BW_DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
; VL_BW_DQ-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
; VL_BW_DQ-NEXT: vpslld $31, %zmm1, %zmm0
-; VL_BW_DQ-NEXT: vpmovd2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2b %k0, %xmm0
; VL_BW_DQ-NEXT: retq
%a2 = icmp eq <16 x i32> %a, %a1
@@ -180,7 +180,7 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
; VL_BW_DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm0
; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %zmm0
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -209,7 +209,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -235,7 +235,7 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
; VL_BW_DQ-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -265,7 +265,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -295,7 +295,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -330,7 +330,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
@@ -363,7 +363,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; VL_BW_DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
-; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: retq
%c = shufflevector <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1> %a, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
@@ -389,7 +389,7 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm0
; VL_BW_DQ-NEXT: vpbroadcastd %xmm0, %zmm0
; VL_BW_DQ-NEXT: vpslld $31, %zmm0, %zmm0
-; VL_BW_DQ-NEXT: vpmovd2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovw %k0, %eax
; VL_BW_DQ-NEXT: retq
%b = bitcast i16 %a to <16 x i1>
OpenPOWER on IntegriCloud