summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx512-ext.ll (renamed from llvm/test/CodeGen/X86/avx512-trunc-ext.ll)26
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll390
-rw-r--r--llvm/test/CodeGen/X86/avx512-trunc.ll364
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics.ll78
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll156
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll780
-rw-r--r--llvm/test/CodeGen/X86/masked_memop.ll11
-rw-r--r--llvm/test/MC/X86/x86-64-avx512bw.s120
-rw-r--r--llvm/test/MC/X86/x86-64-avx512bw_vl.s480
-rw-r--r--llvm/test/MC/X86/x86-64-avx512f_vl.s1200
10 files changed, 3576 insertions, 29 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-trunc-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index f25458972e4..aa1dd4928c3 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1,24 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
-
-; KNL-LABEL: trunc_16x32_to_16x8
-; KNL: vpmovdb
-; KNL: ret
-define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
- %x = trunc <16 x i32> %i to <16 x i8>
- ret <16 x i8> %x
-}
-
-; KNL-LABEL: trunc_8x64_to_8x16
-; KNL: vpmovqw
-; KNL: ret
-define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
- %x = trunc <8 x i64> %i to <8 x i16>
- ret <8 x i16> %x
-}
-
-;SKX-LABEL: zext_8x8mem_to_8x16:
+ ;SKX-LABEL: zext_8x8mem_to_8x16:
;SKX: ## BB#0:
;SKX-NEXT: vpmovw2m %xmm0, %k1
;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
@@ -895,13 +878,6 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
ret <8 x i32> %y
}
-; KNL-LABEL: trunc_v16i32_to_v16i16
-; KNL: vpmovdw
-; KNL: ret
-define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
- %1 = trunc <16 x i32> %x to <16 x i16>
- ret <16 x i16> %1
-}
; KNL-LABEL: trunc_i32_to_i1
; KNL: movw $-4, %ax
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 6e50fda7467..7c30063ce28 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -3119,6 +3119,396 @@ define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16
ret <16 x float> %res2
}
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
+; CHECK: vpmovqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqb %zmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
+; CHECK: vpmovqb %zmm0, (%rdi)
+; CHECK: vpmovqb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
+; CHECK: vpmovsqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
+; CHECK: vpmovsqb %zmm0, (%rdi)
+; CHECK: vpmovsqb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
+; CHECK: vpmovusqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
+; CHECK: vpmovusqb %zmm0, (%rdi)
+; CHECK: vpmovusqb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
+; CHECK: vpmovqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqw %zmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
+; CHECK: vpmovqw %zmm0, (%rdi)
+; CHECK: vpmovqw %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
+; CHECK: vpmovsqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
+; CHECK: vpmovsqw %zmm0, (%rdi)
+; CHECK: vpmovsqw %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
+; CHECK: vpmovusqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
+; CHECK: vpmovusqw %zmm0, (%rdi)
+; CHECK: vpmovusqw %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
+; CHECK: vpmovqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovqd %zmm0, %ymm0
+ %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i32> %res0, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
+; CHECK: vpmovqd %zmm0, (%rdi)
+; CHECK: vpmovqd %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
+; CHECK: vpmovsqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
+ %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i32> %res0, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
+; CHECK: vpmovsqd %zmm0, (%rdi)
+; CHECK: vpmovsqd %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
+; CHECK: vpmovusqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
+ %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i32> %res0, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
+; CHECK: vpmovusqd %zmm0, (%rdi)
+; CHECK: vpmovusqd %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
+; CHECK: vpmovdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdb %zmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
+; CHECK: vpmovdb %zmm0, (%rdi)
+; CHECK: vpmovdb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
+; CHECK: vpmovsdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
+; CHECK: vpmovsdb %zmm0, (%rdi)
+; CHECK: vpmovsdb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
+; CHECK: vpmovusdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
+; CHECK: vpmovusdb %zmm0, (%rdi)
+; CHECK: vpmovusdb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
+; CHECK: vpmovdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
+; CHECK: vpmovdw %zmm0, (%rdi)
+; CHECK: vpmovdw %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
+; CHECK: vpmovsdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
+; CHECK: vpmovsdw %zmm0, (%rdi)
+; CHECK: vpmovsdw %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
+; CHECK: vpmovusdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
+; CHECK: vpmovusdw %zmm0, (%rdi)
+; CHECK: vpmovusdw %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll
new file mode 100644
index 00000000000..9205feda7eb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -0,0 +1,364 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
+
+ attributes #0 = { nounwind }
+
+; KNL-LABEL: trunc_16x32_to_16x8
+; KNL: vpmovdb
+; KNL: ret
+define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
+ %x = trunc <16 x i32> %i to <16 x i8>
+ ret <16 x i8> %x
+}
+
+; KNL-LABEL: trunc_8x64_to_8x16
+; KNL: vpmovqw
+; KNL: ret
+define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
+ %x = trunc <8 x i64> %i to <8 x i16>
+ ret <8 x i16> %x
+}
+
+; KNL-LABEL: trunc_v16i32_to_v16i16
+; KNL: vpmovdw
+; KNL: ret
+define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
+ %1 = trunc <16 x i32> %x to <16 x i16>
+ ret <16 x i16> %1
+}
+
+define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
+; SKX-LABEL: trunc_qb_512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqw %zmm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i8>
+ ret <8 x i8> %x
+}
+
+define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
+; SKX-LABEL: trunc_qb_512_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqb %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i8>
+ store <8 x i8> %x, <8 x i8>* %res
+ ret void
+}
+
+define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
+; SKX-LABEL: trunc_qb_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i8>
+ ret <4 x i8> %x
+}
+
+define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
+; SKX-LABEL: trunc_qb_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqb %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i8>
+ store <4 x i8> %x, <4 x i8>* %res
+ ret void
+}
+
+define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
+; SKX-LABEL: trunc_qb_128:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i8>
+ ret <2 x i8> %x
+}
+
+define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
+; SKX-LABEL: trunc_qb_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i8>
+ store <2 x i8> %x, <2 x i8>* %res
+ ret void
+}
+
+define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
+; SKX-LABEL: trunc_qw_512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqw %zmm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
+; SKX-LABEL: trunc_qw_512_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqw %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i16>
+ store <8 x i16> %x, <8 x i16>* %res
+ ret void
+}
+
+define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
+; SKX-LABEL: trunc_qw_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i16>
+ ret <4 x i16> %x
+}
+
+define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
+; SKX-LABEL: trunc_qw_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqw %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i16>
+ store <4 x i16> %x, <4 x i16>* %res
+ ret void
+}
+
+define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
+; SKX-LABEL: trunc_qw_128:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i16>
+ ret <2 x i16> %x
+}
+
+define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
+; SKX-LABEL: trunc_qw_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqw %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i16>
+ store <2 x i16> %x, <2 x i16>* %res
+ ret void
+}
+
+define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
+; SKX-LABEL: trunc_qd_512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %zmm0, %ymm0
+; SKX-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
+; SKX-LABEL: trunc_qd_512_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i32>
+ store <8 x i32> %x, <8 x i32>* %res
+ ret void
+}
+
+define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
+; SKX-LABEL: trunc_qd_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i32>
+ ret <4 x i32> %x
+}
+
+define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
+; SKX-LABEL: trunc_qd_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i32>
+ store <4 x i32> %x, <4 x i32>* %res
+ ret void
+}
+
+define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
+; SKX-LABEL: trunc_qd_128:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i32>
+ ret <2 x i32> %x
+}
+
+define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
+; SKX-LABEL: trunc_qd_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i32>
+ store <2 x i32> %x, <2 x i32>* %res
+ ret void
+}
+
+define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
+; SKX-LABEL: trunc_db_512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdb %zmm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
+; SKX-LABEL: trunc_db_512_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdb %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i8>
+ store <16 x i8> %x, <16 x i8>* %res
+ ret void
+}
+
+define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
+; SKX-LABEL: trunc_db_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i8>
+ ret <8 x i8> %x
+}
+
+define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
+; SKX-LABEL: trunc_db_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdb %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i8>
+ store <8 x i8> %x, <8 x i8>* %res
+ ret void
+}
+
+define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
+; SKX-LABEL: trunc_db_128:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %x = trunc <4 x i32> %i to <4 x i8>
+ ret <4 x i8> %x
+}
+
+define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
+; SKX-LABEL: trunc_db_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i32> %i to <4 x i8>
+ store <4 x i8> %x, <4 x i8>* %res
+ ret void
+}
+
+define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
+; SKX-LABEL: trunc_dw_512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %zmm0, %ymm0
+; SKX-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
+; SKX-LABEL: trunc_dw_512_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i16>
+ store <16 x i16> %x, <16 x i16>* %res
+ ret void
+}
+
+define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
+; SKX-LABEL: trunc_dw_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
+; SKX-LABEL: trunc_dw_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i16>
+ store <8 x i16> %x, <8 x i16>* %res
+ ret void
+}
+
+define <4 x i16> @trunc_dw_128(<4 x i32> %i) #0 {
+; SKX-LABEL: trunc_dw_128:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %x = trunc <4 x i32> %i to <4 x i16>
+ ret <4 x i16> %x
+}
+
+define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
+; SKX-LABEL: trunc_dw_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i32> %i to <4 x i16>
+ store <4 x i16> %x, <4 x i16>* %res
+ ret void
+}
+
+define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
+; SKX-LABEL: trunc_wb_512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %zmm0, %ymm0
+; SKX-NEXT: retq
+ %x = trunc <32 x i16> %i to <32 x i8>
+ ret <32 x i8> %x
+}
+
+define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
+; SKX-LABEL: trunc_wb_512_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <32 x i16> %i to <32 x i8>
+ store <32 x i8> %x, <32 x i8>* %res
+ ret void
+}
+
+define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
+; SKX-LABEL: trunc_wb_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <16 x i16> %i to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
+; SKX-LABEL: trunc_wb_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <16 x i16> %i to <16 x i8>
+ store <16 x i8> %x, <16 x i8>* %res
+ ret void
+}
+
+define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
+; SKX-LABEL: trunc_wb_128:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %x = trunc <8 x i16> %i to <8 x i8>
+ ret <8 x i8> %x
+}
+
+define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
+; SKX-LABEL: trunc_wb_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i16> %i to <8 x i8>
+ store <8 x i8> %x, <8 x i8>* %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index b2b417df2f1..5ad28ab5ab5 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1008,6 +1008,84 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i
ret <32 x i16> %res2
}
+declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
+; CHECK: vpmovwb %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovwb %zmm0, %ymm0
+ %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+ %res3 = add <32 x i8> %res0, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
+; CHECK: vpmovwb %zmm0, (%rdi)
+; CHECK: vpmovwb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+ ret void
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
+; CHECK: vpmovswb %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovswb %zmm0, %ymm0
+ %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+ %res3 = add <32 x i8> %res0, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
+; CHECK: vpmovswb %zmm0, (%rdi)
+; CHECK: vpmovswb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+ ret void
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
+; CHECK: vpmovuswb %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovuswb %zmm0, %ymm0
+ %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+ %res3 = add <32 x i8> %res0, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
+; CHECK: vpmovuswb %zmm0, (%rdi)
+; CHECK: vpmovuswb %zmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+ ret void
+}
+
declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 2373dc089ae..ee76ae2a8a3 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -3876,6 +3876,162 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i
ret <16 x i16> %res2
}
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
+; CHECK: vpmovwb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
+; CHECK: vpmovwb %xmm0, (%rdi)
+; CHECK: vpmovwb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
+; CHECK: vpmovswb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovswb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
+; CHECK: vpmovswb %xmm0, (%rdi)
+; CHECK: vpmovswb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
+; CHECK: vpmovuswb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovuswb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
+; CHECK: vpmovuswb %xmm0, (%rdi)
+; CHECK: vpmovuswb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
+; CHECK: vpmovwb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovwb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
+; CHECK: vpmovwb %ymm0, (%rdi)
+; CHECK: vpmovwb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
+; CHECK: vpmovswb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovswb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
+; CHECK: vpmovswb %ymm0, (%rdi)
+; CHECK: vpmovswb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
+; CHECK: vpmovuswb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovuswb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
+; CHECK: vpmovuswb %ymm0, (%rdi)
+; CHECK: vpmovuswb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+ ret void
+}
+
declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index 46ee51f47b6..7812148de1c 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -3005,6 +3005,786 @@ define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x
ret <8 x float> %res2
}
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
+; CHECK: vpmovqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
+; CHECK: vpmovqb %xmm0, (%rdi)
+; CHECK: vpmovqb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
+; CHECK: vpmovsqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
+; CHECK: vpmovsqb %xmm0, (%rdi)
+; CHECK: vpmovsqb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
+; CHECK: vpmovusqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
+; CHECK: vpmovusqb %xmm0, (%rdi)
+; CHECK: vpmovusqb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
+; CHECK: vpmovqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
+; CHECK: vpmovqb %ymm0, (%rdi)
+; CHECK: vpmovqb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
+; CHECK: vpmovsqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
+; CHECK: vpmovsqb %ymm0, (%rdi)
+; CHECK: vpmovsqb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
+; CHECK: vpmovusqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
+; CHECK: vpmovusqb %ymm0, (%rdi)
+; CHECK: vpmovusqb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
+; CHECK: vpmovqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
+; CHECK: vpmovqw %xmm0, (%rdi)
+; CHECK: vpmovqw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
+; CHECK: vpmovsqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
+; CHECK: vpmovsqw %xmm0, (%rdi)
+; CHECK: vpmovsqw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
+; CHECK: vpmovusqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
+; CHECK: vpmovusqw %xmm0, (%rdi)
+; CHECK: vpmovusqw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
+; CHECK: vpmovqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
+; CHECK: vpmovqw %ymm0, (%rdi)
+; CHECK: vpmovqw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
+; CHECK: vpmovsqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
+; CHECK: vpmovsqw %ymm0, (%rdi)
+; CHECK: vpmovsqw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
+; CHECK: vpmovusqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
+; CHECK: vpmovusqw %ymm0, (%rdi)
+; CHECK: vpmovusqw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
+; CHECK: vpmovqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqd %xmm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
+; CHECK: vpmovqd %xmm0, (%rdi)
+; CHECK: vpmovqd %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
+; CHECK: vpmovsqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqd %xmm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
+; CHECK: vpmovsqd %xmm0, (%rdi)
+; CHECK: vpmovsqd %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
+; CHECK: vpmovusqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqd %xmm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
+; CHECK: vpmovusqd %xmm0, (%rdi)
+; CHECK: vpmovusqd %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
+; CHECK: vpmovqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqd %ymm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
+; CHECK: vpmovqd %ymm0, (%rdi)
+; CHECK: vpmovqd %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
+; CHECK: vpmovsqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqd %ymm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
+; CHECK: vpmovsqd %ymm0, (%rdi)
+; CHECK: vpmovsqd %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
+; CHECK: vpmovusqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqd %ymm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
+; CHECK: vpmovusqd %ymm0, (%rdi)
+; CHECK: vpmovusqd %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128:
+; CHECK: vpmovdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
+; CHECK: vpmovdb %xmm0, (%rdi)
+; CHECK: vpmovdb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
+; CHECK: vpmovsdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
+; CHECK: vpmovsdb %xmm0, (%rdi)
+; CHECK: vpmovsdb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
+; CHECK: vpmovusdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
+; CHECK: vpmovusdb %xmm0, (%rdi)
+; CHECK: vpmovusdb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256:
+; CHECK: vpmovdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
+; CHECK: vpmovdb %ymm0, (%rdi)
+; CHECK: vpmovdb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
+; CHECK: vpmovsdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
+; CHECK: vpmovsdb %ymm0, (%rdi)
+; CHECK: vpmovsdb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
+; CHECK: vpmovusdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
+; CHECK: vpmovusdb %ymm0, (%rdi)
+; CHECK: vpmovusdb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
+; CHECK: vpmovdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
+; CHECK: vpmovdw %xmm0, (%rdi)
+; CHECK: vpmovdw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
+; CHECK: vpmovsdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
+; CHECK: vpmovsdw %xmm0, (%rdi)
+; CHECK: vpmovsdw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
+; CHECK: vpmovusdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
+; CHECK: vpmovusdw %xmm0, (%rdi)
+; CHECK: vpmovusdw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
+; CHECK: vpmovdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
+; CHECK: vpmovdw %ymm0, (%rdi)
+; CHECK: vpmovdw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
+; CHECK: vpmovsdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
+; CHECK: vpmovsdw %ymm0, (%rdi)
+; CHECK: vpmovsdw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
+; CHECK: vpmovusdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
+; CHECK: vpmovusdw %ymm0, (%rdi)
+; CHECK: vpmovusdw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index 6c16e634a59..f51d4fa103e 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -190,10 +190,13 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
; AVX2-LABEL: test15
; AVX2: vpmaskmovd
-; SKX-LABEL: test15
-; SKX: kshiftl
-; SKX: kshiftr
-; SKX: vmovdqu32 {{.*}}{%k1}
+; SKX-LABEL: test15:
+; SKX: ## BB#0:
+; SKX-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
+; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
+; SKX-NEXT: retq
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
diff --git a/llvm/test/MC/X86/x86-64-avx512bw.s b/llvm/test/MC/X86/x86-64-avx512bw.s
index 8d72b086a6c..9e10ca42c3b 100644
--- a/llvm/test/MC/X86/x86-64-avx512bw.s
+++ b/llvm/test/MC/X86/x86-64-avx512bw.s
@@ -3668,6 +3668,126 @@
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff]
vpabsw -8256(%rdx), %zmm30
+// CHECK: vpmovwb %zmm27, %ymm22
+// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x30,0xde]
+ vpmovwb %zmm27, %ymm22
+
+// CHECK: vpmovwb %zmm27, %ymm22 {%k1}
+// CHECK: encoding: [0x62,0x22,0x7e,0x49,0x30,0xde]
+ vpmovwb %zmm27, %ymm22 {%k1}
+
+// CHECK: vpmovwb %zmm27, %ymm22 {%k1} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0xc9,0x30,0xde]
+ vpmovwb %zmm27, %ymm22 {%k1} {z}
+
+// CHECK: vpmovwb %zmm22, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x31]
+ vpmovwb %zmm22, (%rcx)
+
+// CHECK: vpmovwb %zmm22, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x4c,0x30,0x31]
+ vpmovwb %zmm22, (%rcx) {%k4}
+
+// CHECK: vpmovwb %zmm22, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovwb %zmm22, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %zmm22, 4064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x7f]
+ vpmovwb %zmm22, 4064(%rdx)
+
+// CHECK: vpmovwb %zmm22, 4096(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0x00,0x10,0x00,0x00]
+ vpmovwb %zmm22, 4096(%rdx)
+
+// CHECK: vpmovwb %zmm22, -4096(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x80]
+ vpmovwb %zmm22, -4096(%rdx)
+
+// CHECK: vpmovwb %zmm22, -4128(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0xe0,0xef,0xff,0xff]
+ vpmovwb %zmm22, -4128(%rdx)
+
+// CHECK: vpmovswb %zmm18, %ymm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x20,0xd7]
+ vpmovswb %zmm18, %ymm23
+
+// CHECK: vpmovswb %zmm18, %ymm23 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x4a,0x20,0xd7]
+ vpmovswb %zmm18, %ymm23 {%k2}
+
+// CHECK: vpmovswb %zmm18, %ymm23 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xca,0x20,0xd7]
+ vpmovswb %zmm18, %ymm23 {%k2} {z}
+
+// CHECK: vpmovswb %zmm24, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x01]
+ vpmovswb %zmm24, (%rcx)
+
+// CHECK: vpmovswb %zmm24, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x4f,0x20,0x01]
+ vpmovswb %zmm24, (%rcx) {%k7}
+
+// CHECK: vpmovswb %zmm24, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovswb %zmm24, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %zmm24, 4064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x7f]
+ vpmovswb %zmm24, 4064(%rdx)
+
+// CHECK: vpmovswb %zmm24, 4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0x00,0x10,0x00,0x00]
+ vpmovswb %zmm24, 4096(%rdx)
+
+// CHECK: vpmovswb %zmm24, -4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x80]
+ vpmovswb %zmm24, -4096(%rdx)
+
+// CHECK: vpmovswb %zmm24, -4128(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0xe0,0xef,0xff,0xff]
+ vpmovswb %zmm24, -4128(%rdx)
+
+// CHECK: vpmovuswb %zmm22, %ymm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x48,0x10,0xf4]
+ vpmovuswb %zmm22, %ymm28
+
+// CHECK: vpmovuswb %zmm22, %ymm28 {%k3}
+// CHECK: encoding: [0x62,0x82,0x7e,0x4b,0x10,0xf4]
+ vpmovuswb %zmm22, %ymm28 {%k3}
+
+// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xcb,0x10,0xf4]
+ vpmovuswb %zmm22, %ymm28 {%k3} {z}
+
+// CHECK: vpmovuswb %zmm27, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x19]
+ vpmovuswb %zmm27, (%rcx)
+
+// CHECK: vpmovuswb %zmm27, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x4a,0x10,0x19]
+ vpmovuswb %zmm27, (%rcx) {%k2}
+
+// CHECK: vpmovuswb %zmm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x10,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovuswb %zmm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %zmm27, 4064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x7f]
+ vpmovuswb %zmm27, 4064(%rdx)
+
+// CHECK: vpmovuswb %zmm27, 4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0x00,0x10,0x00,0x00]
+ vpmovuswb %zmm27, 4096(%rdx)
+
+// CHECK: vpmovuswb %zmm27, -4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x80]
+ vpmovuswb %zmm27, -4096(%rdx)
+
+// CHECK: vpmovuswb %zmm27, -4128(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0xe0,0xef,0xff,0xff]
+ vpmovuswb %zmm27, -4128(%rdx)
+
// CHECK: vpmulhuw %zmm21, %zmm24, %zmm21
// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xe4,0xed]
vpmulhuw %zmm21, %zmm24, %zmm21
diff --git a/llvm/test/MC/X86/x86-64-avx512bw_vl.s b/llvm/test/MC/X86/x86-64-avx512bw_vl.s
index dcc9db577eb..24b8a56efe6 100644
--- a/llvm/test/MC/X86/x86-64-avx512bw_vl.s
+++ b/llvm/test/MC/X86/x86-64-avx512bw_vl.s
@@ -6583,6 +6583,486 @@
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
vpshufb -4128(%rdx), %ymm18, %ymm19
+// CHECK: vpmovwb %xmm28, %xmm27
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x30,0xe3]
+ vpmovwb %xmm28, %xmm27
+
+// CHECK: vpmovwb %xmm28, %xmm27 {%k2}
+// CHECK: encoding: [0x62,0x02,0x7e,0x0a,0x30,0xe3]
+ vpmovwb %xmm28, %xmm27 {%k2}
+
+// CHECK: vpmovwb %xmm28, %xmm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x8a,0x30,0xe3]
+ vpmovwb %xmm28, %xmm27 {%k2} {z}
+
+// CHECK: vpmovwb %ymm26, %xmm26
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x30,0xd2]
+ vpmovwb %ymm26, %xmm26
+
+// CHECK: vpmovwb %ymm26, %xmm26 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x30,0xd2]
+ vpmovwb %ymm26, %xmm26 {%k4}
+
+// CHECK: vpmovwb %ymm26, %xmm26 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x30,0xd2]
+ vpmovwb %ymm26, %xmm26 {%k4} {z}
+
+// CHECK: vpmovwb %xmm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x39]
+ vpmovwb %xmm23,(%rcx)
+
+// CHECK: vpmovwb %xmm23, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x30,0x39]
+ vpmovwb %xmm23,(%rcx) {%k6}
+
+// CHECK: vpmovwb %xmm23, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmovwb %xmm23,4660(%rax,%r14,8)
+
+// CHECK: vpmovwb %xmm23, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x7f]
+ vpmovwb %xmm23, 1016(%rdx)
+
+// CHECK: vpmovwb %xmm23, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0x00,0x04,0x00,0x00]
+ vpmovwb %xmm23, 1024(%rdx)
+
+// CHECK: vpmovwb %xmm23, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x80]
+ vpmovwb %xmm23,-1024(%rdx)
+
+// CHECK: vpmovwb %xmm23, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0xf8,0xfb,0xff,0xff]
+ vpmovwb %xmm23,-1032(%rdx)
+
+// CHECK: vpmovwb %ymm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x29]
+ vpmovwb %ymm21,(%rcx)
+
+// CHECK: vpmovwb %ymm21, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x30,0x29]
+ vpmovwb %ymm21,(%rcx) {%k5}
+
+// CHECK: vpmovwb %ymm21, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xac,0xf0,0x34,0x12,0x00,0x00]
+ vpmovwb %ymm21, 4660(%rax,%r14,8)
+
+// CHECK: vpmovwb %ymm21, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x7f]
+ vpmovwb %ymm21, 2032(%rdx)
+
+// CHECK: vpmovwb %ymm21, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0x00,0x08,0x00,0x00]
+ vpmovwb %ymm21, 2048(%rdx)
+
+// CHECK: vpmovwb %ymm21, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x80]
+ vpmovwb %ymm21,-2048(%rdx)
+
+// CHECK: vpmovwb %ymm21, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0xf0,0xf7,0xff,0xff]
+ vpmovwb %ymm21, -2064(%rdx)
+
+// CHECK: vpmovswb %xmm19, %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0xd9]
+ vpmovswb %xmm19, %xmm17
+
+// CHECK: vpmovswb %xmm19, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x20,0xd9]
+ vpmovswb %xmm19, %xmm17 {%k1}
+
+// CHECK: vpmovswb %xmm19, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x20,0xd9]
+ vpmovswb %xmm19, %xmm17 {%k1} {z}
+
+// CHECK: vpmovswb %ymm19, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xdd]
+ vpmovswb %ymm19, %xmm21
+
+// CHECK: vpmovswb %ymm19, %xmm21 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x20,0xdd]
+ vpmovswb %ymm19, %xmm21 {%k4}
+
+// CHECK: vpmovswb %ymm19, %xmm21 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x20,0xdd]
+ vpmovswb %ymm19, %xmm21 {%k4} {z}
+
+// CHECK: vpmovswb %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x11]
+ vpmovswb %xmm18,(%rcx)
+
+// CHECK: vpmovswb %xmm18, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x20,0x11]
+ vpmovswb %xmm18,(%rcx) {%k2}
+
+// CHECK: vpmovswb %xmm18, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vpmovswb %xmm18, 4660(%rax,%r14,8)
+
+// CHECK: vpmovswb %xmm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x7f]
+ vpmovswb %xmm18, 1016(%rdx)
+
+// CHECK: vpmovswb %xmm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0x00,0x04,0x00,0x00]
+ vpmovswb %xmm18, 1024(%rdx)
+
+// CHECK: vpmovswb %xmm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x80]
+ vpmovswb %xmm18, -1024(%rdx)
+
+// CHECK: vpmovswb %xmm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovswb %xmm18, -1032(%rdx)
+
+// CHECK: vpmovswb %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x39]
+ vpmovswb %ymm23,(%rcx)
+
+// CHECK: vpmovswb %ymm23, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x20,0x39]
+ vpmovswb %ymm23,(%rcx) {%k2}
+
+// CHECK: vpmovswb %ymm23, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmovswb %ymm23, 4660(%rax,%r14,8)
+
+// CHECK: vpmovswb %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x7f]
+ vpmovswb %ymm23, 2032(%rdx)
+
+// CHECK: vpmovswb %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0x00,0x08,0x00,0x00]
+ vpmovswb %ymm23, 2048(%rdx)
+
+// CHECK: vpmovswb %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x80]
+ vpmovswb %ymm23, -2048(%rdx)
+
+// CHECK: vpmovswb %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovswb %ymm23, -2064(%rdx)
+
+// CHECK: vpmovuswb %xmm17, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x10,0xca]
+ vpmovuswb %xmm17, %xmm26
+
+// CHECK: vpmovuswb %xmm17, %xmm26 {%k6}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0e,0x10,0xca]
+ vpmovuswb %xmm17, %xmm26 {%k6}
+
+// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8e,0x10,0xca]
+ vpmovuswb %xmm17, %xmm26 {%k6} {z}
+
+// CHECK: vpmovuswb %ymm26, %xmm17
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xd1]
+ vpmovuswb %ymm26, %xmm17
+
+// CHECK: vpmovuswb %ymm26, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7e,0x2a,0x10,0xd1]
+ vpmovuswb %ymm26, %xmm17 {%k2}
+
+// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0xaa,0x10,0xd1]
+ vpmovuswb %ymm26, %xmm17 {%k2} {z}
+
+// CHECK: vpmovuswb %xmm19, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x19]
+ vpmovuswb %xmm19,(%rcx)
+
+// CHECK: vpmovuswb %xmm19, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x10,0x19]
+ vpmovuswb %xmm19,(%rcx) {%k1}
+
+// CHECK: vpmovuswb %xmm19, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpmovuswb %xmm19, 4660(%rax,%r14,8)
+
+// CHECK: vpmovuswb %xmm19, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x7f]
+ vpmovuswb %xmm19, 1016(%rdx)
+
+// CHECK: vpmovuswb %xmm19, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0x00,0x04,0x00,0x00]
+ vpmovuswb %xmm19, 1024(%rdx)
+
+// CHECK: vpmovuswb %xmm19, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x80]
+ vpmovuswb %xmm19, -1024(%rdx)
+
+// CHECK: vpmovuswb %xmm19, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0xf8,0xfb,0xff,0xff]
+ vpmovuswb %xmm19, -1032(%rdx)
+
+// CHECK: vpmovuswb %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x39]
+ vpmovuswb %ymm23,(%rcx)
+
+// CHECK: vpmovuswb %ymm23, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x10,0x39]
+ vpmovuswb %ymm23,(%rcx) {%k6}
+
+// CHECK: vpmovuswb %ymm23, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x10,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmovuswb %ymm23, 4660(%rax,%r14,8)
+
+// CHECK: vpmovuswb %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x7f]
+ vpmovuswb %ymm23, 2032(%rdx)
+
+// CHECK: vpmovuswb %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0x00,0x08,0x00,0x00]
+ vpmovuswb %ymm23, 2048(%rdx)
+
+// CHECK: vpmovuswb %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x80]
+ vpmovuswb %ymm23, -2048(%rdx)
+
+// CHECK: vpmovuswb %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovuswb %ymm23, -2064(%rdx)
+
+// CHECK: vpmovwb %xmm17, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xcd]
+ vpmovwb %xmm17, %xmm21
+
+// CHECK: vpmovwb %xmm17, %xmm21 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x30,0xcd]
+ vpmovwb %xmm17, %xmm21 {%k1}
+
+// CHECK: vpmovwb %xmm17, %xmm21 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x30,0xcd]
+ vpmovwb %xmm17, %xmm21 {%k1} {z}
+
+// CHECK: vpmovwb %ymm23, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x30,0xfa]
+ vpmovwb %ymm23, %xmm26
+
+// CHECK: vpmovwb %ymm23, %xmm26 {%k7}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2f,0x30,0xfa]
+ vpmovwb %ymm23, %xmm26 {%k7}
+
+// CHECK: vpmovwb %ymm23, %xmm26 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xaf,0x30,0xfa]
+ vpmovwb %ymm23, %xmm26 {%k7} {z}
+
+// CHECK: vpmovwb %xmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x29]
+ vpmovwb %xmm21, (%rcx)
+
+// CHECK: vpmovwb %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x30,0x29]
+ vpmovwb %xmm21, (%rcx) {%k2}
+
+// CHECK: vpmovwb %xmm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovwb %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %xmm21, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x7f]
+ vpmovwb %xmm21, 1016(%rdx)
+
+// CHECK: vpmovwb %xmm21, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0x00,0x04,0x00,0x00]
+ vpmovwb %xmm21, 1024(%rdx)
+
+// CHECK: vpmovwb %xmm21, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x80]
+ vpmovwb %xmm21, -1024(%rdx)
+
+// CHECK: vpmovwb %xmm21, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovwb %xmm21, -1032(%rdx)
+
+// CHECK: vpmovwb %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x21]
+ vpmovwb %ymm20, (%rcx)
+
+// CHECK: vpmovwb %ymm20, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x30,0x21]
+ vpmovwb %ymm20, (%rcx) {%k4}
+
+// CHECK: vpmovwb %ymm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovwb %ymm20, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %ymm20, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x7f]
+ vpmovwb %ymm20, 2032(%rdx)
+
+// CHECK: vpmovwb %ymm20, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0x00,0x08,0x00,0x00]
+ vpmovwb %ymm20, 2048(%rdx)
+
+// CHECK: vpmovwb %ymm20, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x80]
+ vpmovwb %ymm20, -2048(%rdx)
+
+// CHECK: vpmovwb %ymm20, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0xf0,0xf7,0xff,0xff]
+ vpmovwb %ymm20, -2064(%rdx)
+
+// CHECK: vpmovswb %xmm20, %xmm24
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x20,0xe0]
+ vpmovswb %xmm20, %xmm24
+
+// CHECK: vpmovswb %xmm20, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0c,0x20,0xe0]
+ vpmovswb %xmm20, %xmm24 {%k4}
+
+// CHECK: vpmovswb %xmm20, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8c,0x20,0xe0]
+ vpmovswb %xmm20, %xmm24 {%k4} {z}
+
+// CHECK: vpmovswb %ymm18, %xmm27
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x20,0xd3]
+ vpmovswb %ymm18, %xmm27
+
+// CHECK: vpmovswb %ymm18, %xmm27 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7e,0x29,0x20,0xd3]
+ vpmovswb %ymm18, %xmm27 {%k1}
+
+// CHECK: vpmovswb %ymm18, %xmm27 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xa9,0x20,0xd3]
+ vpmovswb %ymm18, %xmm27 {%k1} {z}
+
+// CHECK: vpmovswb %xmm24, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x01]
+ vpmovswb %xmm24, (%rcx)
+
+// CHECK: vpmovswb %xmm24, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x20,0x01]
+ vpmovswb %xmm24, (%rcx) {%k3}
+
+// CHECK: vpmovswb %xmm24, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovswb %xmm24, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %xmm24, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x7f]
+ vpmovswb %xmm24, 1016(%rdx)
+
+// CHECK: vpmovswb %xmm24, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0x00,0x04,0x00,0x00]
+ vpmovswb %xmm24, 1024(%rdx)
+
+// CHECK: vpmovswb %xmm24, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x80]
+ vpmovswb %xmm24, -1024(%rdx)
+
+// CHECK: vpmovswb %xmm24, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0xf8,0xfb,0xff,0xff]
+ vpmovswb %xmm24, -1032(%rdx)
+
+// CHECK: vpmovswb %ymm27, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x19]
+ vpmovswb %ymm27, (%rcx)
+
+// CHECK: vpmovswb %ymm27, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x20,0x19]
+ vpmovswb %ymm27, (%rcx) {%k7}
+
+// CHECK: vpmovswb %ymm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovswb %ymm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %ymm27, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x7f]
+ vpmovswb %ymm27, 2032(%rdx)
+
+// CHECK: vpmovswb %ymm27, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0x00,0x08,0x00,0x00]
+ vpmovswb %ymm27, 2048(%rdx)
+
+// CHECK: vpmovswb %ymm27, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x80]
+ vpmovswb %ymm27, -2048(%rdx)
+
+// CHECK: vpmovswb %ymm27, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0xf0,0xf7,0xff,0xff]
+ vpmovswb %ymm27, -2064(%rdx)
+
+// CHECK: vpmovuswb %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0xdf]
+ vpmovuswb %xmm19, %xmm23
+
+// CHECK: vpmovuswb %xmm19, %xmm23 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x10,0xdf]
+ vpmovuswb %xmm19, %xmm23 {%k4}
+
+// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x10,0xdf]
+ vpmovuswb %xmm19, %xmm23 {%k4} {z}
+
+// CHECK: vpmovuswb %ymm23, %xmm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x10,0xfc]
+ vpmovuswb %ymm23, %xmm28
+
+// CHECK: vpmovuswb %ymm23, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2e,0x10,0xfc]
+ vpmovuswb %ymm23, %xmm28 {%k6}
+
+// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xae,0x10,0xfc]
+ vpmovuswb %ymm23, %xmm28 {%k6} {z}
+
+// CHECK: vpmovuswb %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x09]
+ vpmovuswb %xmm25, (%rcx)
+
+// CHECK: vpmovuswb %xmm25, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x10,0x09]
+ vpmovuswb %xmm25, (%rcx) {%k3}
+
+// CHECK: vpmovuswb %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovuswb %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %xmm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x7f]
+ vpmovuswb %xmm25, 1016(%rdx)
+
+// CHECK: vpmovuswb %xmm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
+ vpmovuswb %xmm25, 1024(%rdx)
+
+// CHECK: vpmovuswb %xmm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x80]
+ vpmovuswb %xmm25, -1024(%rdx)
+
+// CHECK: vpmovuswb %xmm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovuswb %xmm25, -1032(%rdx)
+
+// CHECK: vpmovuswb %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x21]
+ vpmovuswb %ymm28, (%rcx)
+
+// CHECK: vpmovuswb %ymm28, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x10,0x21]
+ vpmovuswb %ymm28, (%rcx) {%k2}
+
+// CHECK: vpmovuswb %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovuswb %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %ymm28, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x7f]
+ vpmovuswb %ymm28, 2032(%rdx)
+
+// CHECK: vpmovuswb %ymm28, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0x00,0x08,0x00,0x00]
+ vpmovuswb %ymm28, 2048(%rdx)
+
+// CHECK: vpmovuswb %ymm28, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x80]
+ vpmovuswb %ymm28, -2048(%rdx)
+
+// CHECK: vpmovuswb %ymm28, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0xf0,0xf7,0xff,0xff]
+ vpmovuswb %ymm28, -2064(%rdx)
+
// CHECK: vpmulhuw %xmm18, %xmm21, %xmm24
// CHECK: encoding: [0x62,0x21,0x55,0x00,0xe4,0xc2]
vpmulhuw %xmm18, %xmm21, %xmm24
diff --git a/llvm/test/MC/X86/x86-64-avx512f_vl.s b/llvm/test/MC/X86/x86-64-avx512f_vl.s
index eca2ffbfc09..9280be94716 100644
--- a/llvm/test/MC/X86/x86-64-avx512f_vl.s
+++ b/llvm/test/MC/X86/x86-64-avx512f_vl.s
@@ -16285,6 +16285,1206 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff]
vscalefps -516(%rdx){1to8}, %ymm22, %ymm25
+// CHECK: vpmovqb %xmm29, %xmm24
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x32,0xe8]
+ vpmovqb %xmm29, %xmm24
+
+// CHECK: vpmovqb %xmm29, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x0c,0x32,0xe8]
+ vpmovqb %xmm29, %xmm24 {%k4}
+
+// CHECK: vpmovqb %xmm29, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x8c,0x32,0xe8]
+ vpmovqb %xmm29, %xmm24 {%k4} {z}
+
+// CHECK: vpmovqb %ymm29, %xmm17
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xe9]
+ vpmovqb %ymm29, %xmm17
+
+// CHECK: vpmovqb %ymm29, %xmm17 {%k3}
+// CHECK: encoding: [0x62,0x22,0x7e,0x2b,0x32,0xe9]
+ vpmovqb %ymm29, %xmm17 {%k3}
+
+// CHECK: vpmovqb %ymm29, %xmm17 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0xab,0x32,0xe9]
+ vpmovqb %ymm29, %xmm17 {%k3} {z}
+
+// CHECK: vpmovqb %xmm27, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x19]
+ vpmovqb %xmm27, (%rcx)
+
+// CHECK: vpmovqb %xmm27, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0a,0x32,0x19]
+ vpmovqb %xmm27, (%rcx) {%k2}
+
+// CHECK: vpmovqb %xmm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqb %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovqb %xmm27, 254(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x7f]
+ vpmovqb %xmm27, 254(%rdx)
+
+// CHECK: vpmovqb %xmm27, 256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0x00,0x01,0x00,0x00]
+ vpmovqb %xmm27, 256(%rdx)
+
+// CHECK: vpmovqb %xmm27, -256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x80]
+ vpmovqb %xmm27, -256(%rdx)
+
+// CHECK: vpmovqb %xmm27, -258(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff]
+ vpmovqb %xmm27, -258(%rdx)
+
+// CHECK: vpmovqb %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x21]
+ vpmovqb %ymm28, (%rcx)
+
+// CHECK: vpmovqb %ymm28, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x32,0x21]
+ vpmovqb %ymm28, (%rcx) {%k7}
+
+// CHECK: vpmovqb %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqb %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovqb %ymm28, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x7f]
+ vpmovqb %ymm28, 508(%rdx)
+
+// CHECK: vpmovqb %ymm28, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0x00,0x02,0x00,0x00]
+ vpmovqb %ymm28, 512(%rdx)
+
+// CHECK: vpmovqb %ymm28, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x80]
+ vpmovqb %ymm28, -512(%rdx)
+
+// CHECK: vpmovqb %ymm28, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0xfc,0xfd,0xff,0xff]
+ vpmovqb %ymm28, -516(%rdx)
+
+// CHECK: vpmovsqb %xmm19, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x22,0xda]
+ vpmovsqb %xmm19, %xmm26
+
+// CHECK: vpmovsqb %xmm19, %xmm26 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x22,0xda]
+ vpmovsqb %xmm19, %xmm26 {%k1}
+
+// CHECK: vpmovsqb %xmm19, %xmm26 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x22,0xda]
+ vpmovsqb %xmm19, %xmm26 {%k1} {z}
+
+// CHECK: vpmovsqb %ymm20, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0xe4]
+ vpmovsqb %ymm20, %xmm20
+
+// CHECK: vpmovsqb %ymm20, %xmm20 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x22,0xe4]
+ vpmovsqb %ymm20, %xmm20 {%k6}
+
+// CHECK: vpmovsqb %ymm20, %xmm20 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x22,0xe4]
+ vpmovsqb %ymm20, %xmm20 {%k6} {z}
+
+// CHECK: vpmovsqb %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x09]
+ vpmovsqb %xmm25, (%rcx)
+
+// CHECK: vpmovsqb %xmm25, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x22,0x09]
+ vpmovsqb %xmm25, (%rcx) {%k7}
+
+// CHECK: vpmovsqb %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqb %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqb %xmm25, 254(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x7f]
+ vpmovsqb %xmm25, 254(%rdx)
+
+// CHECK: vpmovsqb %xmm25, 256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0x00,0x01,0x00,0x00]
+ vpmovsqb %xmm25, 256(%rdx)
+
+// CHECK: vpmovsqb %xmm25, -256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x80]
+ vpmovsqb %xmm25, -256(%rdx)
+
+// CHECK: vpmovsqb %xmm25, -258(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff]
+ vpmovsqb %xmm25, -258(%rdx)
+
+// CHECK: vpmovsqb %ymm17, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x09]
+ vpmovsqb %ymm17, (%rcx)
+
+// CHECK: vpmovsqb %ymm17, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x22,0x09]
+ vpmovsqb %ymm17, (%rcx) {%k4}
+
+// CHECK: vpmovsqb %ymm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqb %ymm17, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqb %ymm17, 508(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x7f]
+ vpmovsqb %ymm17, 508(%rdx)
+
+// CHECK: vpmovsqb %ymm17, 512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0x00,0x02,0x00,0x00]
+ vpmovsqb %ymm17, 512(%rdx)
+
+// CHECK: vpmovsqb %ymm17, -512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x80]
+ vpmovsqb %ymm17, -512(%rdx)
+
+// CHECK: vpmovsqb %ymm17, -516(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0xfc,0xfd,0xff,0xff]
+ vpmovsqb %ymm17, -516(%rdx)
+
+// CHECK: vpmovusqb %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x12,0xf4]
+ vpmovusqb %xmm22, %xmm28
+
+// CHECK: vpmovusqb %xmm22, %xmm28 {%k2}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0a,0x12,0xf4]
+ vpmovusqb %xmm22, %xmm28 {%k2}
+
+// CHECK: vpmovusqb %xmm22, %xmm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8a,0x12,0xf4]
+ vpmovusqb %xmm22, %xmm28 {%k2} {z}
+
+// CHECK: vpmovusqb %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x12,0xfe]
+ vpmovusqb %ymm23, %xmm22
+
+// CHECK: vpmovusqb %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2f,0x12,0xfe]
+ vpmovusqb %ymm23, %xmm22 {%k7}
+
+// CHECK: vpmovusqb %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xaf,0x12,0xfe]
+ vpmovusqb %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vpmovusqb %xmm26, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x11]
+ vpmovusqb %xmm26, (%rcx)
+
+// CHECK: vpmovusqb %xmm26, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0d,0x12,0x11]
+ vpmovusqb %xmm26, (%rcx) {%k5}
+
+// CHECK: vpmovusqb %xmm26, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x12,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqb %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqb %xmm26, 254(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x7f]
+ vpmovusqb %xmm26, 254(%rdx)
+
+// CHECK: vpmovusqb %xmm26, 256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0x00,0x01,0x00,0x00]
+ vpmovusqb %xmm26, 256(%rdx)
+
+// CHECK: vpmovusqb %xmm26, -256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x80]
+ vpmovusqb %xmm26, -256(%rdx)
+
+// CHECK: vpmovusqb %xmm26, -258(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0xfe,0xfe,0xff,0xff]
+ vpmovusqb %xmm26, -258(%rdx)
+
+// CHECK: vpmovusqb %ymm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x31]
+ vpmovusqb %ymm30, (%rcx)
+
+// CHECK: vpmovusqb %ymm30, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x12,0x31]
+ vpmovusqb %ymm30, (%rcx) {%k2}
+
+// CHECK: vpmovusqb %ymm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x12,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqb %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqb %ymm30, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x7f]
+ vpmovusqb %ymm30, 508(%rdx)
+
+// CHECK: vpmovusqb %ymm30, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0x00,0x02,0x00,0x00]
+ vpmovusqb %ymm30, 512(%rdx)
+
+// CHECK: vpmovusqb %ymm30, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x80]
+ vpmovusqb %ymm30, -512(%rdx)
+
+// CHECK: vpmovusqb %ymm30, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0xfc,0xfd,0xff,0xff]
+ vpmovusqb %ymm30, -516(%rdx)
+
+// CHECK: vpmovqw %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xd3]
+ vpmovqw %xmm18, %xmm19
+
+// CHECK: vpmovqw %xmm18, %xmm19 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x34,0xd3]
+ vpmovqw %xmm18, %xmm19 {%k4}
+
+// CHECK: vpmovqw %xmm18, %xmm19 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x34,0xd3]
+ vpmovqw %xmm18, %xmm19 {%k4} {z}
+
+// CHECK: vpmovqw %ymm22, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x34,0xf3]
+ vpmovqw %ymm22, %xmm19
+
+// CHECK: vpmovqw %ymm22, %xmm19 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x34,0xf3]
+ vpmovqw %ymm22, %xmm19 {%k5}
+
+// CHECK: vpmovqw %ymm22, %xmm19 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x34,0xf3]
+ vpmovqw %ymm22, %xmm19 {%k5} {z}
+
+// CHECK: vpmovqw %xmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x29]
+ vpmovqw %xmm21, (%rcx)
+
+// CHECK: vpmovqw %xmm21, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0b,0x34,0x29]
+ vpmovqw %xmm21, (%rcx) {%k3}
+
+// CHECK: vpmovqw %xmm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqw %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovqw %xmm21, 508(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x7f]
+ vpmovqw %xmm21, 508(%rdx)
+
+// CHECK: vpmovqw %xmm21, 512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0x00,0x02,0x00,0x00]
+ vpmovqw %xmm21, 512(%rdx)
+
+// CHECK: vpmovqw %xmm21, -512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x80]
+ vpmovqw %xmm21, -512(%rdx)
+
+// CHECK: vpmovqw %xmm21, -516(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff]
+ vpmovqw %xmm21, -516(%rdx)
+
+// CHECK: vpmovqw %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x21]
+ vpmovqw %ymm28, (%rcx)
+
+// CHECK: vpmovqw %ymm28, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x34,0x21]
+ vpmovqw %ymm28, (%rcx) {%k6}
+
+// CHECK: vpmovqw %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x34,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqw %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovqw %ymm28, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x7f]
+ vpmovqw %ymm28, 1016(%rdx)
+
+// CHECK: vpmovqw %ymm28, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0x00,0x04,0x00,0x00]
+ vpmovqw %ymm28, 1024(%rdx)
+
+// CHECK: vpmovqw %ymm28, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x80]
+ vpmovqw %ymm28, -1024(%rdx)
+
+// CHECK: vpmovqw %ymm28, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0xf8,0xfb,0xff,0xff]
+ vpmovqw %ymm28, -1032(%rdx)
+
+// CHECK: vpmovsqw %xmm18, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x24,0xd2]
+ vpmovsqw %xmm18, %xmm26
+
+// CHECK: vpmovsqw %xmm18, %xmm26 {%k7}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0f,0x24,0xd2]
+ vpmovsqw %xmm18, %xmm26 {%k7}
+
+// CHECK: vpmovsqw %xmm18, %xmm26 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8f,0x24,0xd2]
+ vpmovsqw %xmm18, %xmm26 {%k7} {z}
+
+// CHECK: vpmovsqw %ymm20, %xmm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x24,0xe4]
+ vpmovsqw %ymm20, %xmm28
+
+// CHECK: vpmovsqw %ymm20, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x24,0xe4]
+ vpmovsqw %ymm20, %xmm28 {%k4}
+
+// CHECK: vpmovsqw %ymm20, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x24,0xe4]
+ vpmovsqw %ymm20, %xmm28 {%k4} {z}
+
+// CHECK: vpmovsqw %xmm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x31]
+ vpmovsqw %xmm30, (%rcx)
+
+// CHECK: vpmovsqw %xmm30, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0c,0x24,0x31]
+ vpmovsqw %xmm30, (%rcx) {%k4}
+
+// CHECK: vpmovsqw %xmm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x24,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqw %xmm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqw %xmm30, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x7f]
+ vpmovsqw %xmm30, 508(%rdx)
+
+// CHECK: vpmovsqw %xmm30, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0x00,0x02,0x00,0x00]
+ vpmovsqw %xmm30, 512(%rdx)
+
+// CHECK: vpmovsqw %xmm30, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x80]
+ vpmovsqw %xmm30, -512(%rdx)
+
+// CHECK: vpmovsqw %xmm30, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0xfc,0xfd,0xff,0xff]
+ vpmovsqw %xmm30, -516(%rdx)
+
+// CHECK: vpmovsqw %ymm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x29]
+ vpmovsqw %ymm21, (%rcx)
+
+// CHECK: vpmovsqw %ymm21, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x24,0x29]
+ vpmovsqw %ymm21, (%rcx) {%k5}
+
+// CHECK: vpmovsqw %ymm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x24,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqw %ymm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqw %ymm21, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x7f]
+ vpmovsqw %ymm21, 1016(%rdx)
+
+// CHECK: vpmovsqw %ymm21, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0x00,0x04,0x00,0x00]
+ vpmovsqw %ymm21, 1024(%rdx)
+
+// CHECK: vpmovsqw %ymm21, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x80]
+ vpmovsqw %ymm21, -1024(%rdx)
+
+// CHECK: vpmovsqw %ymm21, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovsqw %ymm21, -1032(%rdx)
+
+// CHECK: vpmovusqw %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x14,0xe5]
+ vpmovusqw %xmm20, %xmm29
+
+// CHECK: vpmovusqw %xmm20, %xmm29 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x14,0xe5]
+ vpmovusqw %xmm20, %xmm29 {%k1}
+
+// CHECK: vpmovusqw %xmm20, %xmm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x14,0xe5]
+ vpmovusqw %xmm20, %xmm29 {%k1} {z}
+
+// CHECK: vpmovusqw %ymm21, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0xec]
+ vpmovusqw %ymm21, %xmm20
+
+// CHECK: vpmovusqw %ymm21, %xmm20 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x14,0xec]
+ vpmovusqw %ymm21, %xmm20 {%k5}
+
+// CHECK: vpmovusqw %ymm21, %xmm20 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x14,0xec]
+ vpmovusqw %ymm21, %xmm20 {%k5} {z}
+
+// CHECK: vpmovusqw %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x11]
+ vpmovusqw %xmm18, (%rcx)
+
+// CHECK: vpmovusqw %xmm18, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x14,0x11]
+ vpmovusqw %xmm18, (%rcx) {%k1}
+
+// CHECK: vpmovusqw %xmm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqw %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqw %xmm18, 508(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x7f]
+ vpmovusqw %xmm18, 508(%rdx)
+
+// CHECK: vpmovusqw %xmm18, 512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0x00,0x02,0x00,0x00]
+ vpmovusqw %xmm18, 512(%rdx)
+
+// CHECK: vpmovusqw %xmm18, -512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x80]
+ vpmovusqw %xmm18, -512(%rdx)
+
+// CHECK: vpmovusqw %xmm18, -516(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0xfc,0xfd,0xff,0xff]
+ vpmovusqw %xmm18, -516(%rdx)
+
+// CHECK: vpmovusqw %ymm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x11]
+ vpmovusqw %ymm18, (%rcx)
+
+// CHECK: vpmovusqw %ymm18, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x14,0x11]
+ vpmovusqw %ymm18, (%rcx) {%k2}
+
+// CHECK: vpmovusqw %ymm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqw %ymm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqw %ymm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x7f]
+ vpmovusqw %ymm18, 1016(%rdx)
+
+// CHECK: vpmovusqw %ymm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0x00,0x04,0x00,0x00]
+ vpmovusqw %ymm18, 1024(%rdx)
+
+// CHECK: vpmovusqw %ymm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x80]
+ vpmovusqw %ymm18, -1024(%rdx)
+
+// CHECK: vpmovusqw %ymm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovusqw %ymm18, -1032(%rdx)
+
+// CHECK: vpmovqd %xmm25, %xmm21
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xcd]
+ vpmovqd %xmm25, %xmm21
+
+// CHECK: vpmovqd %xmm25, %xmm21 {%k5}
+// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x35,0xcd]
+ vpmovqd %xmm25, %xmm21 {%k5}
+
+// CHECK: vpmovqd %xmm25, %xmm21 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x35,0xcd]
+ vpmovqd %xmm25, %xmm21 {%k5} {z}
+
+// CHECK: vpmovqd %ymm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x35,0xf5]
+ vpmovqd %ymm22, %xmm21
+
+// CHECK: vpmovqd %ymm22, %xmm21 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x35,0xf5]
+ vpmovqd %ymm22, %xmm21 {%k6}
+
+// CHECK: vpmovqd %ymm22, %xmm21 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x35,0xf5]
+ vpmovqd %ymm22, %xmm21 {%k6} {z}
+
+// CHECK: vpmovqd %xmm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x29]
+ vpmovqd %xmm29, (%rcx)
+
+// CHECK: vpmovqd %xmm29, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0e,0x35,0x29]
+ vpmovqd %xmm29, (%rcx) {%k6}
+
+// CHECK: vpmovqd %xmm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqd %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovqd %xmm29, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x7f]
+ vpmovqd %xmm29, 1016(%rdx)
+
+// CHECK: vpmovqd %xmm29, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0x00,0x04,0x00,0x00]
+ vpmovqd %xmm29, 1024(%rdx)
+
+// CHECK: vpmovqd %xmm29, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x80]
+ vpmovqd %xmm29, -1024(%rdx)
+
+// CHECK: vpmovqd %xmm29, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovqd %xmm29, -1032(%rdx)
+
+// CHECK: vpmovqd %ymm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x31]
+ vpmovqd %ymm30, (%rcx)
+
+// CHECK: vpmovqd %ymm30, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x35,0x31]
+ vpmovqd %ymm30, (%rcx) {%k2}
+
+// CHECK: vpmovqd %ymm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x35,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqd %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovqd %ymm30, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x7f]
+ vpmovqd %ymm30, 2032(%rdx)
+
+// CHECK: vpmovqd %ymm30, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0x00,0x08,0x00,0x00]
+ vpmovqd %ymm30, 2048(%rdx)
+
+// CHECK: vpmovqd %ymm30, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x80]
+ vpmovqd %ymm30, -2048(%rdx)
+
+// CHECK: vpmovqd %ymm30, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0xf0,0xf7,0xff,0xff]
+ vpmovqd %ymm30, -2064(%rdx)
+
+// CHECK: vpmovsqd %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0xed]
+ vpmovsqd %xmm21, %xmm21
+
+// CHECK: vpmovsqd %xmm21, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x25,0xed]
+ vpmovsqd %xmm21, %xmm21 {%k2}
+
+// CHECK: vpmovsqd %xmm21, %xmm21 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x25,0xed]
+ vpmovsqd %xmm21, %xmm21 {%k2} {z}
+
+// CHECK: vpmovsqd %ymm29, %xmm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x25,0xed]
+ vpmovsqd %ymm29, %xmm29
+
+// CHECK: vpmovsqd %ymm29, %xmm29 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x25,0xed]
+ vpmovsqd %ymm29, %xmm29 {%k4}
+
+// CHECK: vpmovsqd %ymm29, %xmm29 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x25,0xed]
+ vpmovsqd %ymm29, %xmm29 {%k4} {z}
+
+// CHECK: vpmovsqd %xmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x09]
+ vpmovsqd %xmm17, (%rcx)
+
+// CHECK: vpmovsqd %xmm17, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x25,0x09]
+ vpmovsqd %xmm17, (%rcx) {%k2}
+
+// CHECK: vpmovsqd %xmm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqd %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqd %xmm17, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x7f]
+ vpmovsqd %xmm17, 1016(%rdx)
+
+// CHECK: vpmovsqd %xmm17, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0x00,0x04,0x00,0x00]
+ vpmovsqd %xmm17, 1024(%rdx)
+
+// CHECK: vpmovsqd %xmm17, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x80]
+ vpmovsqd %xmm17, -1024(%rdx)
+
+// CHECK: vpmovsqd %xmm17, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovsqd %xmm17, -1032(%rdx)
+
+// CHECK: vpmovsqd %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x39]
+ vpmovsqd %ymm23, (%rcx)
+
+// CHECK: vpmovsqd %ymm23, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x25,0x39]
+ vpmovsqd %ymm23, (%rcx) {%k5}
+
+// CHECK: vpmovsqd %ymm23, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqd %ymm23, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqd %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x7f]
+ vpmovsqd %ymm23, 2032(%rdx)
+
+// CHECK: vpmovsqd %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0x00,0x08,0x00,0x00]
+ vpmovsqd %ymm23, 2048(%rdx)
+
+// CHECK: vpmovsqd %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x80]
+ vpmovsqd %ymm23, -2048(%rdx)
+
+// CHECK: vpmovsqd %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovsqd %ymm23, -2064(%rdx)
+
+// CHECK: vpmovusqd %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x15,0xe9]
+ vpmovusqd %xmm21, %xmm25
+
+// CHECK: vpmovusqd %xmm21, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0d,0x15,0xe9]
+ vpmovusqd %xmm21, %xmm25 {%k5}
+
+// CHECK: vpmovusqd %xmm21, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8d,0x15,0xe9]
+ vpmovusqd %xmm21, %xmm25 {%k5} {z}
+
+// CHECK: vpmovusqd %ymm21, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x15,0xec]
+ vpmovusqd %ymm21, %xmm20
+
+// CHECK: vpmovusqd %ymm21, %xmm20 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x15,0xec]
+ vpmovusqd %ymm21, %xmm20 {%k2}
+
+// CHECK: vpmovusqd %ymm21, %xmm20 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x15,0xec]
+ vpmovusqd %ymm21, %xmm20 {%k2} {z}
+
+// CHECK: vpmovusqd %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x11]
+ vpmovusqd %xmm18, (%rcx)
+
+// CHECK: vpmovusqd %xmm18, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x15,0x11]
+ vpmovusqd %xmm18, (%rcx) {%k1}
+
+// CHECK: vpmovusqd %xmm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x15,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqd %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqd %xmm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x7f]
+ vpmovusqd %xmm18, 1016(%rdx)
+
+// CHECK: vpmovusqd %xmm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0x00,0x04,0x00,0x00]
+ vpmovusqd %xmm18, 1024(%rdx)
+
+// CHECK: vpmovusqd %xmm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x80]
+ vpmovusqd %xmm18, -1024(%rdx)
+
+// CHECK: vpmovusqd %xmm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovusqd %xmm18, -1032(%rdx)
+
+// CHECK: vpmovusqd %ymm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x29]
+ vpmovusqd %ymm29, (%rcx)
+
+// CHECK: vpmovusqd %ymm29, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x15,0x29]
+ vpmovusqd %ymm29, (%rcx) {%k6}
+
+// CHECK: vpmovusqd %ymm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x15,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqd %ymm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqd %ymm29, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x7f]
+ vpmovusqd %ymm29, 2032(%rdx)
+
+// CHECK: vpmovusqd %ymm29, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0x00,0x08,0x00,0x00]
+ vpmovusqd %ymm29, 2048(%rdx)
+
+// CHECK: vpmovusqd %ymm29, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x80]
+ vpmovusqd %ymm29, -2048(%rdx)
+
+// CHECK: vpmovusqd %ymm29, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0xf0,0xf7,0xff,0xff]
+ vpmovusqd %ymm29, -2064(%rdx)
+
+// CHECK: vpmovdb %xmm21, %xmm30
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x31,0xee]
+ vpmovdb %xmm21, %xmm30
+
+// CHECK: vpmovdb %xmm21, %xmm30 {%k3}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0b,0x31,0xee]
+ vpmovdb %xmm21, %xmm30 {%k3}
+
+// CHECK: vpmovdb %xmm21, %xmm30 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8b,0x31,0xee]
+ vpmovdb %xmm21, %xmm30 {%k3} {z}
+
+// CHECK: vpmovdb %ymm21, %xmm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x31,0xef]
+ vpmovdb %ymm21, %xmm23
+
+// CHECK: vpmovdb %ymm21, %xmm23 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x31,0xef]
+ vpmovdb %ymm21, %xmm23 {%k4}
+
+// CHECK: vpmovdb %ymm21, %xmm23 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x31,0xef]
+ vpmovdb %ymm21, %xmm23 {%k4} {z}
+
+// CHECK: vpmovdb %xmm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x29]
+ vpmovdb %xmm29, (%rcx)
+
+// CHECK: vpmovdb %xmm29, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x31,0x29]
+ vpmovdb %xmm29, (%rcx) {%k3}
+
+// CHECK: vpmovdb %xmm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x31,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdb %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovdb %xmm29, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x7f]
+ vpmovdb %xmm29, 508(%rdx)
+
+// CHECK: vpmovdb %xmm29, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0x00,0x02,0x00,0x00]
+ vpmovdb %xmm29, 512(%rdx)
+
+// CHECK: vpmovdb %xmm29, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x80]
+ vpmovdb %xmm29, -512(%rdx)
+
+// CHECK: vpmovdb %xmm29, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0xfc,0xfd,0xff,0xff]
+ vpmovdb %xmm29, -516(%rdx)
+
+// CHECK: vpmovdb %ymm26, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x11]
+ vpmovdb %ymm26, (%rcx)
+
+// CHECK: vpmovdb %ymm26, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x31,0x11]
+ vpmovdb %ymm26, (%rcx) {%k6}
+
+// CHECK: vpmovdb %ymm26, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x31,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdb %ymm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovdb %ymm26, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x7f]
+ vpmovdb %ymm26, 1016(%rdx)
+
+// CHECK: vpmovdb %ymm26, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0x00,0x04,0x00,0x00]
+ vpmovdb %ymm26, 1024(%rdx)
+
+// CHECK: vpmovdb %ymm26, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x80]
+ vpmovdb %ymm26, -1024(%rdx)
+
+// CHECK: vpmovdb %ymm26, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovdb %ymm26, -1032(%rdx)
+
+// CHECK: vpmovsdb %xmm27, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x21,0xde]
+ vpmovsdb %xmm27, %xmm30
+
+// CHECK: vpmovsdb %xmm27, %xmm30 {%k1}
+// CHECK: encoding: [0x62,0x02,0x7e,0x09,0x21,0xde]
+ vpmovsdb %xmm27, %xmm30 {%k1}
+
+// CHECK: vpmovsdb %xmm27, %xmm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x89,0x21,0xde]
+ vpmovsdb %xmm27, %xmm30 {%k1} {z}
+
+// CHECK: vpmovsdb %ymm27, %xmm26
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x21,0xda]
+ vpmovsdb %ymm27, %xmm26
+
+// CHECK: vpmovsdb %ymm27, %xmm26 {%k3}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2b,0x21,0xda]
+ vpmovsdb %ymm27, %xmm26 {%k3}
+
+// CHECK: vpmovsdb %ymm27, %xmm26 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xab,0x21,0xda]
+ vpmovsdb %ymm27, %xmm26 {%k3} {z}
+
+// CHECK: vpmovsdb %xmm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x31]
+ vpmovsdb %xmm30, (%rcx)
+
+// CHECK: vpmovsdb %xmm30, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x21,0x31]
+ vpmovsdb %xmm30, (%rcx) {%k3}
+
+// CHECK: vpmovsdb %xmm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x21,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdb %xmm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdb %xmm30, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x7f]
+ vpmovsdb %xmm30, 508(%rdx)
+
+// CHECK: vpmovsdb %xmm30, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0x00,0x02,0x00,0x00]
+ vpmovsdb %xmm30, 512(%rdx)
+
+// CHECK: vpmovsdb %xmm30, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x80]
+ vpmovsdb %xmm30, -512(%rdx)
+
+// CHECK: vpmovsdb %xmm30, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0xfc,0xfd,0xff,0xff]
+ vpmovsdb %xmm30, -516(%rdx)
+
+// CHECK: vpmovsdb %ymm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x09]
+ vpmovsdb %ymm25, (%rcx)
+
+// CHECK: vpmovsdb %ymm25, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2d,0x21,0x09]
+ vpmovsdb %ymm25, (%rcx) {%k5}
+
+// CHECK: vpmovsdb %ymm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x21,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdb %ymm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdb %ymm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x7f]
+ vpmovsdb %ymm25, 1016(%rdx)
+
+// CHECK: vpmovsdb %ymm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0x00,0x04,0x00,0x00]
+ vpmovsdb %ymm25, 1024(%rdx)
+
+// CHECK: vpmovsdb %ymm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x80]
+ vpmovsdb %ymm25, -1024(%rdx)
+
+// CHECK: vpmovsdb %ymm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovsdb %ymm25, -1032(%rdx)
+
+// CHECK: vpmovusdb %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x11,0xee]
+ vpmovusdb %xmm29, %xmm30
+
+// CHECK: vpmovusdb %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x7e,0x0f,0x11,0xee]
+ vpmovusdb %xmm29, %xmm30 {%k7}
+
+// CHECK: vpmovusdb %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x8f,0x11,0xee]
+ vpmovusdb %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vpmovusdb %ymm17, %xmm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x11,0xcf]
+ vpmovusdb %ymm17, %xmm23
+
+// CHECK: vpmovusdb %ymm17, %xmm23 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x11,0xcf]
+ vpmovusdb %ymm17, %xmm23 {%k6}
+
+// CHECK: vpmovusdb %ymm17, %xmm23 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x11,0xcf]
+ vpmovusdb %ymm17, %xmm23 {%k6} {z}
+
+// CHECK: vpmovusdb %xmm26, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x11]
+ vpmovusdb %xmm26, (%rcx)
+
+// CHECK: vpmovusdb %xmm26, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x11,0x11]
+ vpmovusdb %xmm26, (%rcx) {%k7}
+
+// CHECK: vpmovusdb %xmm26, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x11,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdb %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdb %xmm26, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x7f]
+ vpmovusdb %xmm26, 508(%rdx)
+
+// CHECK: vpmovusdb %xmm26, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0x00,0x02,0x00,0x00]
+ vpmovusdb %xmm26, 512(%rdx)
+
+// CHECK: vpmovusdb %xmm26, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x80]
+ vpmovusdb %xmm26, -512(%rdx)
+
+// CHECK: vpmovusdb %xmm26, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0xfc,0xfd,0xff,0xff]
+ vpmovusdb %xmm26, -516(%rdx)
+
+// CHECK: vpmovusdb %ymm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x09]
+ vpmovusdb %ymm25, (%rcx)
+
+// CHECK: vpmovusdb %ymm25, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x11,0x09]
+ vpmovusdb %ymm25, (%rcx) {%k6}
+
+// CHECK: vpmovusdb %ymm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x11,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdb %ymm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdb %ymm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x7f]
+ vpmovusdb %ymm25, 1016(%rdx)
+
+// CHECK: vpmovusdb %ymm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0x00,0x04,0x00,0x00]
+ vpmovusdb %ymm25, 1024(%rdx)
+
+// CHECK: vpmovusdb %ymm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x80]
+ vpmovusdb %ymm25, -1024(%rdx)
+
+// CHECK: vpmovusdb %ymm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovusdb %ymm25, -1032(%rdx)
+
+// CHECK: vpmovdw %xmm25, %xmm17
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x33,0xc9]
+ vpmovdw %xmm25, %xmm17
+
+// CHECK: vpmovdw %xmm25, %xmm17 {%k5}
+// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x33,0xc9]
+ vpmovdw %xmm25, %xmm17 {%k5}
+
+// CHECK: vpmovdw %xmm25, %xmm17 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x33,0xc9]
+ vpmovdw %xmm25, %xmm17 {%k5} {z}
+
+// CHECK: vpmovdw %ymm19, %xmm25
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x33,0xd9]
+ vpmovdw %ymm19, %xmm25
+
+// CHECK: vpmovdw %ymm19, %xmm25 {%k4}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x33,0xd9]
+ vpmovdw %ymm19, %xmm25 {%k4}
+
+// CHECK: vpmovdw %ymm19, %xmm25 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x33,0xd9]
+ vpmovdw %ymm19, %xmm25 {%k4} {z}
+
+// CHECK: vpmovdw %xmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x29]
+ vpmovdw %xmm21, (%rcx)
+
+// CHECK: vpmovdw %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x33,0x29]
+ vpmovdw %xmm21, (%rcx) {%k2}
+
+// CHECK: vpmovdw %xmm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x33,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdw %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovdw %xmm21, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x7f]
+ vpmovdw %xmm21, 1016(%rdx)
+
+// CHECK: vpmovdw %xmm21, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0x00,0x04,0x00,0x00]
+ vpmovdw %xmm21, 1024(%rdx)
+
+// CHECK: vpmovdw %xmm21, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x80]
+ vpmovdw %xmm21, -1024(%rdx)
+
+// CHECK: vpmovdw %xmm21, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovdw %xmm21, -1032(%rdx)
+
+// CHECK: vpmovdw %ymm22, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x31]
+ vpmovdw %ymm22, (%rcx)
+
+// CHECK: vpmovdw %ymm22, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x33,0x31]
+ vpmovdw %ymm22, (%rcx) {%k6}
+
+// CHECK: vpmovdw %ymm22, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x33,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdw %ymm22, 291(%rax,%r14,8)
+
+// CHECK: vpmovdw %ymm22, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x7f]
+ vpmovdw %ymm22, 2032(%rdx)
+
+// CHECK: vpmovdw %ymm22, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0x00,0x08,0x00,0x00]
+ vpmovdw %ymm22, 2048(%rdx)
+
+// CHECK: vpmovdw %ymm22, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x80]
+ vpmovdw %ymm22, -2048(%rdx)
+
+// CHECK: vpmovdw %ymm22, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0xf0,0xf7,0xff,0xff]
+ vpmovdw %ymm22, -2064(%rdx)
+
+// CHECK: vpmovsdw %xmm18, %xmm18
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x23,0xd2]
+ vpmovsdw %xmm18, %xmm18
+
+// CHECK: vpmovsdw %xmm18, %xmm18 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0e,0x23,0xd2]
+ vpmovsdw %xmm18, %xmm18 {%k6}
+
+// CHECK: vpmovsdw %xmm18, %xmm18 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8e,0x23,0xd2]
+ vpmovsdw %xmm18, %xmm18 {%k6} {z}
+
+// CHECK: vpmovsdw %ymm18, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0xd4]
+ vpmovsdw %ymm18, %xmm20
+
+// CHECK: vpmovsdw %ymm18, %xmm20 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x23,0xd4]
+ vpmovsdw %ymm18, %xmm20 {%k2}
+
+// CHECK: vpmovsdw %ymm18, %xmm20 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x23,0xd4]
+ vpmovsdw %ymm18, %xmm20 {%k2} {z}
+
+// CHECK: vpmovsdw %xmm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x29]
+ vpmovsdw %xmm29, (%rcx)
+
+// CHECK: vpmovsdw %xmm29, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0x62,0x7e,0x09,0x23,0x29]
+ vpmovsdw %xmm29, (%rcx) {%k1}
+
+// CHECK: vpmovsdw %xmm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x23,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdw %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdw %xmm29, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x7f]
+ vpmovsdw %xmm29, 1016(%rdx)
+
+// CHECK: vpmovsdw %xmm29, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0x00,0x04,0x00,0x00]
+ vpmovsdw %xmm29, 1024(%rdx)
+
+// CHECK: vpmovsdw %xmm29, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x80]
+ vpmovsdw %xmm29, -1024(%rdx)
+
+// CHECK: vpmovsdw %xmm29, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovsdw %xmm29, -1032(%rdx)
+
+// CHECK: vpmovsdw %ymm19, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x19]
+ vpmovsdw %ymm19, (%rcx)
+
+// CHECK: vpmovsdw %ymm19, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x23,0x19]
+ vpmovsdw %ymm19, (%rcx) {%k6}
+
+// CHECK: vpmovsdw %ymm19, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdw %ymm19, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdw %ymm19, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x7f]
+ vpmovsdw %ymm19, 2032(%rdx)
+
+// CHECK: vpmovsdw %ymm19, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0x00,0x08,0x00,0x00]
+ vpmovsdw %ymm19, 2048(%rdx)
+
+// CHECK: vpmovsdw %ymm19, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x80]
+ vpmovsdw %ymm19, -2048(%rdx)
+
+// CHECK: vpmovsdw %ymm19, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0xf0,0xf7,0xff,0xff]
+ vpmovsdw %ymm19, -2064(%rdx)
+
+// CHECK: vpmovusdw %xmm18, %xmm18
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xd2]
+ vpmovusdw %xmm18, %xmm18
+
+// CHECK: vpmovusdw %xmm18, %xmm18 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x13,0xd2]
+ vpmovusdw %xmm18, %xmm18 {%k2}
+
+// CHECK: vpmovusdw %xmm18, %xmm18 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x13,0xd2]
+ vpmovusdw %xmm18, %xmm18 {%k2} {z}
+
+// CHECK: vpmovusdw %ymm25, %xmm28
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x13,0xcc]
+ vpmovusdw %ymm25, %xmm28
+
+// CHECK: vpmovusdw %ymm25, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x13,0xcc]
+ vpmovusdw %ymm25, %xmm28 {%k4}
+
+// CHECK: vpmovusdw %ymm25, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x13,0xcc]
+ vpmovusdw %ymm25, %xmm28 {%k4} {z}
+
+// CHECK: vpmovusdw %xmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x21]
+ vpmovusdw %xmm20, (%rcx)
+
+// CHECK: vpmovusdw %xmm20, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x13,0x21]
+ vpmovusdw %xmm20, (%rcx) {%k6}
+
+// CHECK: vpmovusdw %xmm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdw %xmm20, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdw %xmm20, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x7f]
+ vpmovusdw %xmm20, 1016(%rdx)
+
+// CHECK: vpmovusdw %xmm20, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0x00,0x04,0x00,0x00]
+ vpmovusdw %xmm20, 1024(%rdx)
+
+// CHECK: vpmovusdw %xmm20, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x80]
+ vpmovusdw %xmm20, -1024(%rdx)
+
+// CHECK: vpmovusdw %xmm20, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0xf8,0xfb,0xff,0xff]
+ vpmovusdw %xmm20, -1032(%rdx)
+
+// CHECK: vpmovusdw %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x39]
+ vpmovusdw %ymm23, (%rcx)
+
+// CHECK: vpmovusdw %ymm23, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x29,0x13,0x39]
+ vpmovusdw %ymm23, (%rcx) {%k1}
+
+// CHECK: vpmovusdw %ymm23, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x13,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdw %ymm23, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdw %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x7f]
+ vpmovusdw %ymm23, 2032(%rdx)
+
+// CHECK: vpmovusdw %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0x00,0x08,0x00,0x00]
+ vpmovusdw %ymm23, 2048(%rdx)
+
+// CHECK: vpmovusdw %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x80]
+ vpmovusdw %ymm23, -2048(%rdx)
+
+// CHECK: vpmovusdw %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovusdw %ymm23, -2064(%rdx)
+
// CHECK: vrndscalepd $171, %xmm28, %xmm29
// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab]
vrndscalepd $0xab, %xmm28, %xmm29
OpenPOWER on IntegriCloud